From 5d1b93792d42ac8853cd75013cad2c0c50483ec0 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 27 Apr 2022 03:29:39 -0500 Subject: [PATCH 01/50] OMPRegionInfo --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 8 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 28 ++- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 166 ++++++++++++++---- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 138 ++++++++++----- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 8 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 23 ++- 6 files changed, 268 insertions(+), 103 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 5cc1fdb56aa54..98d8323173f71 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1194,6 +1194,7 @@ struct PushAndPopStackRAII { if (!OMPBuilder) return; +#if 0 // The following callback is the crucial part of clangs cleanup process. // // NOTE: @@ -1206,7 +1207,9 @@ struct PushAndPopStackRAII { // to push & pop an FinalizationInfo object. // The FiniCB will still be needed but at the point where the // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. - auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { + auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP , + omp::Directive LeaveReason, + OMPRegionInfo &Region) { assert(IP.getBlock()->end() == IP.getPoint() && "Clang CG should cause non-terminated block!"); CGBuilderTy::InsertPointGuard IPG(CGF.Builder); @@ -1220,10 +1223,13 @@ struct PushAndPopStackRAII { // OpenMPIRBuilder as it can do this setup internally. llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); OMPBuilder->pushFinalizationCB(std::move(FI)); +#endif } ~PushAndPopStackRAII() { +#if 0 if (OMPBuilder) OMPBuilder->popFinalizationCB(); +#endif } llvm::OpenMPIRBuilder *OMPBuilder; }; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 4d8b0afed6e92..fc1b362da3e13 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1734,7 +1734,9 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // The cleanup callback that finalizes all variabels at the given location, // thus calls destructors etc. - auto FiniCB = [this](InsertPointTy IP) { + auto FiniCB = [this]( InsertPointTy IP, + llvm:: omp::Directive LeaveReason, + llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4004,7 +4006,9 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; - auto FiniCB = [this](InsertPointTy IP) { + auto FiniCB = [this]( InsertPointTy IP, + llvm:: omp::Directive LeaveReason, + llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4073,7 +4077,9 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); - auto FiniCB = [this](InsertPointTy IP) { + auto FiniCB = [this]( InsertPointTy IP, + llvm:: omp::Directive LeaveReason, + llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4154,7 +4160,9 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); - auto FiniCB = [this](InsertPointTy IP) { + auto FiniCB = [this]( InsertPointTy IP, + llvm:: omp::Directive LeaveReason, + llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4200,7 +4208,9 @@ void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { ? EmitScalarExpr(Filter, CGM.Int32Ty) : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); - auto FiniCB = [this](InsertPointTy IP) { + auto FiniCB = [this]( InsertPointTy IP, + llvm:: omp::Directive LeaveReason, + llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4240,7 +4250,9 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { HintInst = Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); - auto FiniCB = [this](InsertPointTy IP) { + auto FiniCB = [this]( InsertPointTy IP, + llvm:: omp::Directive LeaveReason, + llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -5578,7 +5590,9 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { // Without clause, it behaves as if the threads clause is specified. const auto *C = S.getSingleClause(); - auto FiniCB = [this](InsertPointTy IP) { + auto FiniCB = [this]( InsertPointTy IP, + llvm:: omp::Directive LeaveReason, + llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 08ccaca7034c1..05aa66d49653d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -95,6 +95,8 @@ class OpenMPIRBuilder { /// Type used throughout for insertion points. using InsertPointTy = IRBuilder<>::InsertPoint; + struct OMPRegionInfo; + /// Callback type for variable finalization (think destructors). /// /// \param CodeGenIP is the insertion point at which the finalization code @@ -103,21 +105,60 @@ class OpenMPIRBuilder { /// A finalize callback knows about all objects that need finalization, e.g. /// destruction, when the scope of the currently generated construct is left /// at the time, and location, the callback is invoked. - using FinalizeCallbackTy = std::function; + using LeaveRegionCallbackTy = std::function; + + + enum class RegionKind { + Toplevel, + CanonicalLoop, + Directive + }; + + struct OMPRegionInfo { + RegionKind Kind; + omp::Directive DK; + bool IsCancellable; + LeaveRegionCallbackTy FiniCB; + + OMPRegionInfo( + RegionKind Kind, + omp::Directive DK, + bool IsCancellable, + LeaveRegionCallbackTy FiniCB) : Kind(Kind), DK(DK), IsCancellable(IsCancellable), FiniCB(std::move(FiniCB)) { + assertOK(); + } - struct FinalizationInfo { - /// The finalization callback provided by the last in-flight invocation of - /// createXXXX for the directive of kind DK. - FinalizeCallbackTy FiniCB; + void exitingEdge(InsertPointTy ExitingIP, omp::Directive LeaveReason) { + assert(LeaveReason == omp::OMPD_unknown || LeaveReason == omp:: OMPD_cancellation_point); - /// The directive kind of the innermost directive that has an associated - /// region which might require finalization when it is left. - omp::Directive DK; + if (!FiniCB) return; + FiniCB(ExitingIP, LeaveReason, *this); + } - /// Flag to indicate if the directive is cancellable. - bool IsCancellable; + /// Consistency self-check. + void assertOK() const { + if (Kind == RegionKind::Directive) { + switch (DK) { + case omp::OMPD_parallel: + break; + default: + llvm_unreachable("Not a valid OpenMP region"); + } + } + } }; + +private: + /// The finalization stack made up of finalize callbacks currently in-flight, + /// wrapped into FinalizationInfo objects that reference also the finalization + /// target block and the kind of cancellable directive. + SmallVector RegionStack; + +#if 0 /// Push a finalization callback on the finalization stack. /// /// NOTE: Temporary solution until Clang CG is gone. @@ -125,11 +166,30 @@ class OpenMPIRBuilder { FinalizationStack.push_back(FI); } + void pushFinalizationCB(FinalizeCallbackTy FiniCB) { + FinalizationInfo FI{{}, omp::Directive::OMPD_unknown, false}; + FinalizationStack.push_back(FI); + } +#endif + + +#if 0 + void pushCancellationCB(CancellationCallbackTy CancelCB) { + FinalizationInfo FI{ {}, None, false, CancelCB, nullptr }; + FinalizationStack.push_back(FI); + } + + /// Pop the last finalization callback from the finalization stack. /// /// NOTE: Temporary solution until Clang CG is gone. - void popFinalizationCB() { FinalizationStack.pop_back(); } + void popFinalizationCB() { + assert(FinalizationStack.back().UserManaged); + FinalizationStack.pop_back(); + } +#endif +public: /// Callback type for body (=inner region) code generation /// /// The callback takes code locations as arguments, each describing a @@ -239,22 +299,26 @@ class OpenMPIRBuilder { /// Generator for '#omp parallel' /// /// \param Loc The insert and source location description. - /// \param AllocaIP The insertion points to be used for alloca instructions. + /// \param OuterAllocaIP The insertion points to be used for alloca instructions. /// \param BodyGenCB Callback that will generate the region code. /// \param PrivCB Callback to copy a given variable (think copy constructor). /// \param FiniCB Callback to finalize variable copies. /// \param IfCondition The evaluated 'if' clause expression, if any. /// \param NumThreads The evaluated 'num_threads' clause expression, if any. /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). + /// /// \param IsCancellable Flag to indicate a cancellable parallel region. + /// MK: Remove? Any non-cancellable? Makes it a difference to the runtime? /// /// \returns The insertion position *after* the parallel. - IRBuilder<>::InsertPoint - createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, - FinalizeCallbackTy FiniCB, Value *IfCondition, - Value *NumThreads, omp::ProcBindKind ProcBind, - bool IsCancellable); + IRBuilder<>::InsertPoint createParallel(const LocationDescription &Loc, + InsertPointTy OuterAllocaIP, + BodyGenCallbackTy BodyGenCB, + PrivatizeCallbackTy PrivCB, + LeaveRegionCallbackTy FiniCB, + Value *IfCondition, + Value *NumThreads, + omp::ProcBindKind ProcBind, bool IsCancellable); /// Generator for the control flow structure of an OpenMP canonical loop. /// @@ -333,7 +397,8 @@ class OpenMPIRBuilder { Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP = {}, - const Twine &Name = "loop"); + const Twine &Name = "loop", + omp::Directive DK = omp::OMPD_unknown); /// Collapse a loop nest into a single loop. /// @@ -784,9 +849,9 @@ class OpenMPIRBuilder { /// \param CancelFlag Flag indicating if the cancellation is performed. /// \param CanceledDirective The kind of directive that is cancled. /// \param ExitCB Extra code to be generated in the exit block. - void emitCancelationCheckImpl(Value *CancelFlag, + void emitCancelationCheckImpl(LocationDescription Loc, Value *CancelFlag, omp::Directive CanceledDirective, - FinalizeCallbackTy ExitCB = {}); + omp::Directive CanceledBy); /// Generate a barrier runtime call. /// @@ -806,19 +871,47 @@ class OpenMPIRBuilder { /// \param Loc The location at which the request originated and is fulfilled. void emitFlush(const LocationDescription &Loc); - /// The finalization stack made up of finalize callbacks currently in-flight, - /// wrapped into FinalizationInfo objects that reference also the finalization - /// target block and the kind of cancellable directive. - SmallVector FinalizationStack; + +public: +#if 0 + llvm::Optional getTopmostDirective() const { + if (FinalizationStack.empty()) + return None; + return FinalizationStack.back().DK; + } + + bool isTopmostUserManaged() const { + if (FinalizationStack.empty()) + return false; + return FinalizationStack.back().UserManaged; + } + + bool isTopmostBuilderManaged() const { + if (FinalizationStack.empty()) + return false; + return !FinalizationStack.back().UserManaged; + } +#endif + + + private: + OMPRegionInfo *getInnermostDirectionRegion(omp::Directive DK) { + for (auto& R : reverse(RegionStack)) { + if (R.Kind == RegionKind::Toplevel) + return &R; + if (R.Kind == RegionKind::Directive && R.DK == DK) + return &R; + } + llvm_unreachable("expected toplevel region"); + } /// Return true if the last entry in the finalization stack is of kind \p DK /// and cancellable. bool isLastFinalizationInfoCancellable(omp::Directive DK) { - return !FinalizationStack.empty() && - FinalizationStack.back().IsCancellable && - FinalizationStack.back().DK == DK; + return getInnermostDirectionRegion(DK)->IsCancellable; } + public: /// Generate a taskwait runtime call. /// /// \param Loc The location at which the request originated and is fulfilled. @@ -940,7 +1033,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the single call. InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, bool IsNowait, + LeaveRegionCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt); /// Generator for '#omp master' @@ -952,7 +1045,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the master. InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB); + LeaveRegionCallbackTy FiniCB); /// Generator for '#omp masked' /// @@ -963,7 +1056,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the masked. InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, Value *Filter); + LeaveRegionCallbackTy FiniCB, Value *Filter); /// Generator for '#omp critical' /// @@ -976,7 +1069,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the critical. InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, + LeaveRegionCallbackTy FiniCB, StringRef CriticalName, Value *HintInst); /// Generator for '#omp ordered depend (source | sink)' @@ -1005,7 +1098,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the ordered. InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, + LeaveRegionCallbackTy FiniCB, bool IsThreads); /// Generator for '#omp sections' @@ -1023,7 +1116,7 @@ class OpenMPIRBuilder { InsertPointTy AllocaIP, ArrayRef SectionCBs, PrivatizeCallbackTy PrivCB, - FinalizeCallbackTy FiniCB, bool IsCancellable, + LeaveRegionCallbackTy FiniCB, bool IsCancellable, bool IsNowait); /// Generator for '#omp section' @@ -1034,7 +1127,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the section. InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB); + LeaveRegionCallbackTy FiniCB); /// Generate conditional branch and relevant BasicBlocks through which private /// threads copy the 'copyin' variables from Master copy to threadprivate @@ -1238,7 +1331,7 @@ class OpenMPIRBuilder { InsertPointTy EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, bool Conditional = false, + LeaveRegionCallbackTy FiniCB, bool Conditional = false, bool HasFinalize = true, bool IsCancellable = false); /// Get the platform-specific name separator. @@ -1481,6 +1574,7 @@ class OpenMPIRBuilder { /// \returns The CanonicalLoopInfo that represents the emitted loop. CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, + // bool Finalize, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name = {}); diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index c392397090d88..a3f42b3f02555 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" @@ -21,6 +22,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/RegionPrinter.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -29,6 +31,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Value.h" +#include "llvm/IR/Verifier.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" @@ -696,7 +699,7 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, Args); if (UseCancelBarrier && CheckCancelFlag) - emitCancelationCheckImpl(Result, OMPD_parallel); + emitCancelationCheckImpl(Loc, Result, OMPD_parallel, OMPD_barrier); return Builder.saveIP(); } @@ -733,18 +736,9 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; Value *Result = Builder.CreateCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); - auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) { - if (CanceledDirective == OMPD_parallel) { - IRBuilder<>::InsertPointGuard IPG(Builder); - Builder.restoreIP(IP); - createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), - omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, - /* CheckCancelFlag */ false); - } - }; // The actual cancel logic is shared with others, e.g., cancel_barriers. - emitCancelationCheckImpl(Result, CanceledDirective, ExitCB); + emitCancelationCheckImpl(Loc, Result, CanceledDirective, OMPD_cancel); // Update the insertion point and remove the terminator we introduced. Builder.SetInsertPoint(UI->getParent()); @@ -753,9 +747,9 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, return Builder.saveIP(); } -void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag, - omp::Directive CanceledDirective, - FinalizeCallbackTy ExitCB) { +void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, Value *CancelFlag, + omp::Directive CanceledDirective, + omp::Directive CanceledBy) { assert(isLastFinalizationInfoCancellable(CanceledDirective) && "Unexpected cancellation!"); @@ -783,19 +777,22 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag, // From the cancellation block we finalize all variables and go to the // post finalization block that is known to the FiniCB callback. Builder.SetInsertPoint(CancellationBlock); - if (ExitCB) - ExitCB(Builder.saveIP()); - auto &FI = FinalizationStack.back(); - FI.FiniCB(Builder.saveIP()); + // if (ExitCB) + // ExitCB(Builder.saveIP()); + //auto &FI = FinalizationStack.back(); + //FI.FiniCB(Builder.saveIP()); // The continuation block is where code generation continues. Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); } -IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( - const LocationDescription &Loc, InsertPointTy OuterAllocaIP, - BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, - FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, +IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(const LocationDescription &Loc, + InsertPointTy OuterAllocaIP, + BodyGenCallbackTy BodyGenCB, + PrivatizeCallbackTy PrivCB, + LeaveRegionCallbackTy FiniCB, + Value *IfCondition, + Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) { assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous"); @@ -871,6 +868,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( BasicBlock *PRegExitBB = PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); +#if 0 auto FiniCBWrapper = [&](InsertPointTy IP) { // Hide "open-ended" blocks from the given FiniCB by setting the right jump // target to the region exit block. @@ -887,6 +885,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( }; FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); +#endif // Generate the privatization allocas in the block that will become the entry // of the outlined function. @@ -1017,6 +1016,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( I->eraseFromParent(); }; +#if 0 // Adjust the finalization stack, verify the adjustment, and call the // finalize function a last time to finalize values between the pre-fini // block and the exit block if we left the parallel "the normal way". @@ -1024,11 +1024,14 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( (void)FiniInfo; assert(FiniInfo.DK == OMPD_parallel && "Unexpected finalization stack state!"); +#endif Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); +#if 0 FiniCB(PreFiniIP); +#endif OI.OuterAllocaBB = OuterAllocaBlock; OI.EntryBB = PRegEntryBB; @@ -1215,15 +1218,18 @@ void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) { emitTaskyieldImpl(Loc); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( - const LocationDescription &Loc, InsertPointTy AllocaIP, - ArrayRef SectionCBs, PrivatizeCallbackTy PrivCB, - FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) { +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(const LocationDescription &Loc, + InsertPointTy AllocaIP, + ArrayRef SectionCBs, + PrivatizeCallbackTy PrivCB, + LeaveRegionCallbackTy FiniCB, bool IsCancellable, + bool IsNowait) { assert(!isConflictIP(AllocaIP, Loc.IP) && "Dedicated IP allocas required"); if (!updateToLocation(Loc)) return Loc.IP; +#if 0 auto FiniCBWrapper = [&](InsertPointTy IP) { if (IP.getBlock()->end() != IP.getPoint()) return FiniCB(IP); @@ -1244,7 +1250,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( return FiniCB(IP); }; + // TODO: Use CanonicalLoopInfo finalization. FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable}); +#endif // Each section is emitted as a switch case // Each finalization callback is handled from clang.EmitOMPSectionDirective() @@ -1294,6 +1302,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( InsertPointTy AfterIP = applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait); +#if 0 // Apply the finalization callback in LoopAfterBB auto FiniInfo = FinalizationStack.pop_back_val(); assert(FiniInfo.DK == OMPD_sections && @@ -1305,6 +1314,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( CB(Builder.saveIP()); AfterIP = {FiniBB, FiniBB->begin()}; } +#endif return AfterIP; } @@ -1312,13 +1322,21 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB) { + LeaveRegionCallbackTy FiniCB) { if (!updateToLocation(Loc)) return Loc.IP; - auto FiniCBWrapper = [&](InsertPointTy IP) { - if (IP.getBlock()->end() != IP.getPoint()) - return FiniCB(IP); +#if 0 + auto &SectionsFini = FinalizationStack.back(); + assert(SectionsFini.DK == OMPD_sections); + + auto FiniCBWrapper = [&](InsertPointTy IP, omp::Directive CancelledDirective, + omp::Directive CancelledBy) { + assert(CancelledDirective == OMPD_sections); + +#if 0 + if (IP.getBlock()->end() != IP.getPoint()) + return FiniCB(IP,LeavingRegion, CancelledBy); // This must be done otherwise any nested constructs using FinalizeOMPRegion // will fail because that function requires the Finalization Basic Block to // have a terminator, which is already removed by EmitOMPRegionBody. @@ -1333,13 +1351,23 @@ OpenMPIRBuilder::createSection(const LocationDescription &Loc, auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); Instruction *I = Builder.CreateBr(ExitBB); IP = InsertPointTy(I->getParent(), I->getIterator()); - return FiniCB(IP); + +#endif + auto UserFini = splitBB(Builder, true, ".section_userfini"); + + if (FiniCB) + FiniCB(Builder.saveIP(), CancelledDirective, CancelledBy); + + if (SectionsFini.FiniCB) + SectionsFini.FiniCB({UserFini, UserFini->begin()}, CancelledDirective, + CancelledBy); }; +#endif Directive OMPD = Directive::OMPD_sections; // Since we are using Finalization Callback here, HasFinalize // and IsCancellable have to be true - return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper, + return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, {}, /*Conditional*/ false, /*hasFinalize*/ true, /*IsCancellable*/ true); } @@ -1522,7 +1550,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions( OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB) { + LeaveRegionCallbackTy FiniCB) { if (!updateToLocation(Loc)) return Loc.IP; @@ -1547,7 +1575,7 @@ OpenMPIRBuilder::createMaster(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, Value *Filter) { + LeaveRegionCallbackTy FiniCB, Value *Filter) { if (!updateToLocation(Loc)) return Loc.IP; @@ -1608,8 +1636,25 @@ CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp"); Builder.CreateCondBr(Cmp, Body, Exit); - Builder.SetInsertPoint(Body); +#if 0 + bool Finalize = true; + if (Finalize) { + Builder.SetInsertPoint(Body); + Builder.CreateBr(Continue); + } +#endif + +#if 0 + Builder.SetInsertPoint(Continue); Builder.CreateBr(Latch); +#endif + +#if 0 + if (Finalize) { + Builder.SetInsertPoint(Cancel); + Builder.CreateUnreachable(); + } +#endif Builder.SetInsertPoint(Latch); Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1), @@ -1665,10 +1710,13 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, return CL; } -CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop( - const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, - Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, - InsertPointTy ComputeIP, const Twine &Name) { +CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, + LoopBodyGenCallbackTy BodyGenCB, + Value *Start, Value *Stop, Value *Step, + bool IsSigned, bool InclusiveStop, + InsertPointTy ComputeIP , + const Twine &Name , + omp::Directive DK ) { // Consider the following difficulties (assuming 8-bit signed integers): // * Adding \p Step to the loop counter which passes \p Stop may overflow: @@ -2932,7 +2980,7 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt) { + LeaveRegionCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt) { if (!updateToLocation(Loc)) return Loc.IP; @@ -2974,7 +3022,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { + LeaveRegionCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { if (!updateToLocation(Loc)) return Loc.IP; @@ -3054,7 +3102,7 @@ OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd( const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, bool IsThreads) { + LeaveRegionCallbackTy FiniCB, bool IsThreads) { if (!updateToLocation(Loc)) return Loc.IP; @@ -3083,11 +3131,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd( OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, - BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, + BodyGenCallbackTy BodyGenCB, LeaveRegionCallbackTy FiniCB, bool Conditional, bool HasFinalize, bool IsCancellable) { +#if 0 if (HasFinalize) FinalizationStack.push_back({FiniCB, OMPD, IsCancellable}); +#endif // Create inlined region's entry and body blocks, in preparation // for conditional creation @@ -3168,6 +3218,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( // If there is finalization to do, emit it before the exit call if (HasFinalize) { +#if 0 assert(!FinalizationStack.empty() && "Unexpected finalization stack state!"); @@ -3175,6 +3226,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); Fi.FiniCB(FinIP); +#endif BasicBlock *FiniBB = FinIP.getBlock(); Instruction *FiniBBTI = FiniBB->getTerminator(); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 6819d79e01260..7418f9b8feef9 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -951,7 +951,7 @@ struct OpenMPOpt { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) {}; + /// Create a sequential execution region within a merged parallel region, /// encapsulated in a master construct with a barrier for synchronization. @@ -983,7 +983,7 @@ struct OpenMPOpt { assert(SeqEndBB != nullptr && "SeqEndBB should not be null"); SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB); }; - auto FiniCB = [&](InsertPointTy CodeGenIP) {}; + // Find outputs from the sequential region to outside users and // broadcast their values to them. @@ -1026,7 +1026,7 @@ struct OpenMPOpt { OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(ParentBB, ParentBB->end()), DL); InsertPointTy SeqAfterIP = - OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB); + OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, {}); OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel); @@ -1101,7 +1101,7 @@ struct OpenMPOpt { // Create the merged parallel region with default proc binding, to // avoid overriding binding settings, and without explicit cancellation. InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel( - Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, + Loc, AllocaIP, BodyGenCB, PrivCB, {}, nullptr, nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false); BranchInst::Create(AfterBB, AfterIP.getBlock()); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index d90342f3f0f08..4775f3447a71d 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -281,7 +281,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; + llvm::Value *ifCond = nullptr; if (auto ifExprVar = opInst.if_expr_var()) @@ -299,7 +299,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel( - ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind, + ompLoc, allocaIP, bodyGenCB, privCB, {}, ifCond, numThreads, pbKind, isCancellable)); return bodyGenStatus; @@ -324,11 +324,11 @@ convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( - ompLoc, bodyGenCB, finiCB)); + ompLoc, bodyGenCB, {})); return success(); } @@ -352,7 +352,7 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); @@ -371,7 +371,7 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, static_cast(criticalDeclareOp.hint_val())); } builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical( - ompLoc, bodyGenCB, finiCB, criticalOp.name().getValueOr(""), hint)); + ompLoc, bodyGenCB, {}, criticalOp.name().getValueOr(""), hint)); return success(); } @@ -583,12 +583,12 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP( moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( - ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd())); + ompLoc, bodyGenCB, {}, !orderedRegionOp.simd())); return bodyGenStatus; } @@ -648,13 +648,13 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; + llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections( - ompLoc, allocaIP, sectionCBs, privCB, finiCB, false, + ompLoc, allocaIP, sectionCBs, privCB, {}, false, sectionsOp.nowait())); return bodyGenStatus; } @@ -671,9 +671,8 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, convertOmpOpRegions(singleOp.region(), "omp.single.region", builder, moduleTranslation, bodyGenStatus); }; - auto finiCB = [&](InsertPointTy codeGenIP) {}; builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle( - ompLoc, bodyCB, finiCB, singleOp.nowait(), /*DidIt=*/nullptr)); + ompLoc, bodyCB, {}, singleOp.nowait(), /*DidIt=*/nullptr)); return bodyGenStatus; } From bac85dfc8f6318f2e94e3d89946760b71e02197f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 27 Apr 2022 16:12:44 -0500 Subject: [PATCH 02/50] Fix check-mlir --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 14 ++-- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 78 +++++++++++++----- llvm/lib/Analysis/CFGPrinter.cpp | 4 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 81 +++++++++++++------ 4 files changed, 122 insertions(+), 55 deletions(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index fc1b362da3e13..c0a158025aa5d 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1736,7 +1736,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // thus calls destructors etc. auto FiniCB = [this]( InsertPointTy IP, llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { + llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4008,7 +4008,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { auto FiniCB = [this]( InsertPointTy IP, llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { + llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4079,7 +4079,7 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); auto FiniCB = [this]( InsertPointTy IP, llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { + llvm::OpenMPIRBuilder:: OMPRegionInfo*Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4162,7 +4162,7 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { auto FiniCB = [this]( InsertPointTy IP, llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { + llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4210,7 +4210,7 @@ void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { auto FiniCB = [this]( InsertPointTy IP, llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { + llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4252,7 +4252,7 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { auto FiniCB = [this]( InsertPointTy IP, llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { + llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -5592,7 +5592,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { auto FiniCB = [this]( InsertPointTy IP, llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo &Region) { + llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 05aa66d49653d..b99ba4559b7ef 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -76,7 +76,7 @@ class OpenMPIRBuilder { public: /// Create a new OpenMPIRBuilder operating on the given module \p M. This will /// not have an effect on \p M (see initialize). - OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {} + OpenMPIRBuilder(Module &M); ~OpenMPIRBuilder(); /// Initialize the internal state, this will put structures types and @@ -99,7 +99,7 @@ class OpenMPIRBuilder { /// Callback type for variable finalization (think destructors). /// - /// \param CodeGenIP is the insertion point at which the finalization code + /// \param ExitingIP is the insertion point at which the finalization code /// should be placed. /// /// A finalize callback knows about all objects that need finalization, e.g. @@ -108,19 +108,24 @@ class OpenMPIRBuilder { using LeaveRegionCallbackTy = std::function; + OMPRegionInfo *Region)>; enum class RegionKind { + /// Sentinel object so we don't always have to check whether the stack is empty. Toplevel, + + /// Actions on loop-associated directives are deferred until all applyXYZ actions have been applied to them. CanonicalLoop, + + /// Non-loop OpenMP regions. Directive }; struct OMPRegionInfo { RegionKind Kind; omp::Directive DK; - bool IsCancellable; + bool IsCancellable; // TODO: remove; determine ourselves whether there was a cancelling construct inside LeaveRegionCallbackTy FiniCB; OMPRegionInfo( @@ -130,25 +135,21 @@ class OpenMPIRBuilder { LeaveRegionCallbackTy FiniCB) : Kind(Kind), DK(DK), IsCancellable(IsCancellable), FiniCB(std::move(FiniCB)) { assertOK(); } +#ifndef NDEBUG + ~OMPRegionInfo() { + assertOK(); + } +#endif void exitingEdge(InsertPointTy ExitingIP, omp::Directive LeaveReason) { assert(LeaveReason == omp::OMPD_unknown || LeaveReason == omp:: OMPD_cancellation_point); if (!FiniCB) return; - FiniCB(ExitingIP, LeaveReason, *this); + FiniCB(ExitingIP, LeaveReason, this); } /// Consistency self-check. - void assertOK() const { - if (Kind == RegionKind::Directive) { - switch (DK) { - case omp::OMPD_parallel: - break; - default: - llvm_unreachable("Not a valid OpenMP region"); - } - } - } + void assertOK() const; }; @@ -156,7 +157,43 @@ class OpenMPIRBuilder { /// The finalization stack made up of finalize callbacks currently in-flight, /// wrapped into FinalizationInfo objects that reference also the finalization /// target block and the kind of cancellable directive. - SmallVector RegionStack; + SmallVector,8> RegionStack; + + OMPRegionInfo* pushRegion( omp::Directive DK, + bool IsCancellable, + LeaveRegionCallbackTy FiniCB = {}) { + RegionStack.emplace_back(new OMPRegionInfo(RegionKind::Directive, DK, IsCancellable,std::move( FiniCB))); + return RegionStack.back().get(); + } + + void emitRegionExit( InsertPointTy ExitingIP, OMPRegionInfo* RegionToLeave, omp::Directive LeaveReason) { +#ifndef NDEBUG + switch(LeaveReason) { + case omp::OMPD_unknown: + // Regular region exit + break; + case omp::OMPD_cancellation_point: + case omp::OMPD_barrier: + // Cancellation // TODO: Also need need to know whether #pragma omp cancel for/#pragma omp cancel parallel/?? + break; + default: + llvm_unreachable("unrecognized reason to leave region"); + } +#endif + + for (auto &R : reverse(RegionStack) ) { + if ( R->FiniCB) + R->FiniCB(ExitingIP, LeaveReason,R.get()); + + if (R.get() == RegionToLeave) return; + } + llvm_unreachable("region to exit not on stack?"); + } + + void popRegion(omp::Directive DK) { + assert( RegionStack.back()->DK == DK && "unbalanced region push/pop" ); + RegionStack.pop_back(); + } #if 0 /// Push a finalization callback on the finalization stack. @@ -897,10 +934,10 @@ class OpenMPIRBuilder { private: OMPRegionInfo *getInnermostDirectionRegion(omp::Directive DK) { for (auto& R : reverse(RegionStack)) { - if (R.Kind == RegionKind::Toplevel) - return &R; - if (R.Kind == RegionKind::Directive && R.DK == DK) - return &R; + if (R->Kind == RegionKind::Toplevel) + return R.get(); + if (R->Kind == RegionKind::Directive && R->DK == DK) + return R.get(); } llvm_unreachable("expected toplevel region"); } @@ -908,6 +945,7 @@ class OpenMPIRBuilder { /// Return true if the last entry in the finalization stack is of kind \p DK /// and cancellable. bool isLastFinalizationInfoCancellable(omp::Directive DK) { + // FIXME: Don't all the regions in-between also need to be cancellable? return getInnermostDirectionRegion(DK)->IsCancellable; } diff --git a/llvm/lib/Analysis/CFGPrinter.cpp b/llvm/lib/Analysis/CFGPrinter.cpp index f8eba1a00f28d..3cfca42128a26 100644 --- a/llvm/lib/Analysis/CFGPrinter.cpp +++ b/llvm/lib/Analysis/CFGPrinter.cpp @@ -260,7 +260,7 @@ PreservedAnalyses CFGOnlyPrinterPass::run(Function &F, /// program, displaying the CFG of the current function. This depends on there /// being a 'dot' and 'gv' program in your path. /// -void Function::viewCFG() const { viewCFG(false, nullptr, nullptr); } + LLVM_DUMP_METHOD void Function::viewCFG() const { viewCFG(false, nullptr, nullptr); } void Function::viewCFG(bool ViewCFGOnly, const BlockFrequencyInfo *BFI, const BranchProbabilityInfo *BPI) const { @@ -275,7 +275,7 @@ void Function::viewCFG(bool ViewCFGOnly, const BlockFrequencyInfo *BFI, /// into the nodes, just the label. If you are only interested in the CFG /// this can make the graph smaller. /// -void Function::viewCFGOnly() const { viewCFGOnly(nullptr, nullptr); } +LLVM_DUMP_METHOD void Function::viewCFGOnly() const { viewCFGOnly(nullptr, nullptr); } void Function::viewCFGOnly(const BlockFrequencyInfo *BFI, const BranchProbabilityInfo *BPI) const { diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index a3f42b3f02555..d8c94b1a94c04 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -323,6 +323,11 @@ BasicBlock *llvm::splitBB(IRBuilder<> &Builder, bool CreateBranch, return New; } +static BasicBlock *splitBBWithSuffix(IRBuilderBase::InsertPoint IP, bool CreateBranch, llvm::Twine Suffix) { + BasicBlock *Old = IP.getBlock(); + return splitBB(IP, CreateBranch, Old->getName() + Suffix); +} + BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix) { BasicBlock *Old = Builder.GetInsertBlock(); @@ -430,6 +435,8 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { void OpenMPIRBuilder::initialize() { initializeTypes(M); } void OpenMPIRBuilder::finalize(Function *Fn) { + assert(RegionStack.size() == 1 && RegionStack.back()->Kind == RegionKind::Toplevel && "OMPRegion push/pop must be balanced"); + SmallPtrSet ParallelRegionBlockSet; SmallVector Blocks; SmallVector DeferredOutlines; @@ -518,6 +525,15 @@ void OpenMPIRBuilder::finalize(Function *Fn) { OutlineInfos = std::move(DeferredOutlines); } +OpenMPIRBuilder::OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) { + RegionStack.emplace_back(new OMPRegionInfo(RegionKind::Toplevel, omp::OMPD_unknown,/*IsCancellable*/ false, []( InsertPointTy ExitingIP, + omp::Directive LeaveReason, + OMPRegionInfo *Region) { + llvm_unreachable("top-level is not finialized"); + })); + assert(RegionStack.size()==1); +} + OpenMPIRBuilder::~OpenMPIRBuilder() { assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); } @@ -647,6 +663,27 @@ Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { "omp_global_thread_num"); } +void OpenMPIRBuilder::OMPRegionInfo:: assertOK() const { +#ifndef NDEBUG + switch (Kind) { + case RegionKind::Toplevel: + assert(DK == omp::OMPD_unknown && "toplevel region is not a specific kind"); + assert(!IsCancellable && "top-level is not cancellable"); + break; + case RegionKind::CanonicalLoop: + break; + case RegionKind::Directive: + switch (DK) { + case omp::OMPD_parallel: + break; + default: + llvm_unreachable("Not a valid OpenMP region"); + } + break; + } +#endif +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK, bool ForceSimpleCall, bool CheckCancelFlag) { @@ -777,10 +814,12 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, Value *C // From the cancellation block we finalize all variables and go to the // post finalization block that is known to the FiniCB callback. Builder.SetInsertPoint(CancellationBlock); - // if (ExitCB) - // ExitCB(Builder.saveIP()); - //auto &FI = FinalizationStack.back(); - //FI.FiniCB(Builder.saveIP()); +#if 0 + if (ExitCB) + ExitCB(Builder.saveIP()); + auto &FI = FinalizationStack.back(); + FI.FiniCB(Builder.saveIP()); +#endif // The continuation block is where code generation continues. Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); @@ -868,11 +907,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(const LocationDescripti BasicBlock *PRegExitBB = PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); -#if 0 - auto FiniCBWrapper = [&](InsertPointTy IP) { +#if 1 + auto FiniCBWrapper = [&](InsertPointTy IP, omp::Directive LeaveReason, OMPRegionInfo *Region) { // Hide "open-ended" blocks from the given FiniCB by setting the right jump // target to the region exit block. if (IP.getBlock()->end() == IP.getPoint()) { + llvm_unreachable("don't do such thing!!!"); IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(IP); Instruction *I = Builder.CreateBr(PRegExitBB); @@ -881,10 +921,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(const LocationDescripti assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 && IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB && "Unexpected insertion point for finalization call!"); - return FiniCB(IP); + if (FiniCB) + FiniCB(IP, LeaveReason, Region); }; - FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); + //FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); + OMPRegionInfo* ParallelRegion = pushRegion(OMPD_parallel,IsCancellable, FiniCBWrapper); #endif // Generate the privatization allocas in the block that will become the entry @@ -1026,12 +1068,16 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(const LocationDescripti "Unexpected finalization stack state!"); #endif + Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); #if 0 FiniCB(PreFiniIP); #endif + emitRegionExit( PreFiniIP, ParallelRegion, OMPD_unknown ); + + popRegion(omp::OMPD_parallel); OI.OuterAllocaBB = OuterAllocaBlock; OI.EntryBB = PRegEntryBB; @@ -1636,25 +1682,8 @@ CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp"); Builder.CreateCondBr(Cmp, Body, Exit); -#if 0 - bool Finalize = true; - if (Finalize) { - Builder.SetInsertPoint(Body); - Builder.CreateBr(Continue); - } -#endif - -#if 0 - Builder.SetInsertPoint(Continue); + Builder.SetInsertPoint(Body); Builder.CreateBr(Latch); -#endif - -#if 0 - if (Finalize) { - Builder.SetInsertPoint(Cancel); - Builder.CreateUnreachable(); - } -#endif Builder.SetInsertPoint(Latch); Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1), From 51296c7f5554e3899baae015c2d0440f0663fa83 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 27 Apr 2022 20:48:18 -0500 Subject: [PATCH 03/50] sections --- clang/lib/CodeGen/CodeGenFunction.h | 20 +- llvm/include/llvm/Analysis/CFGPrinter.h | 87 ++++- .../llvm/Analysis/DOTGraphTraitsPass.h | 6 +- llvm/include/llvm/Analysis/RegionPrinter.h | 15 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 115 +------ llvm/include/llvm/IR/IRBuilder.h | 10 +- llvm/lib/Analysis/CFGPrinter.cpp | 72 ++++- llvm/lib/Analysis/RegionPrinter.cpp | 306 +++++++++++++++++- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 147 ++++++++- llvm/lib/Support/GraphWriter.cpp | 2 +- 10 files changed, 618 insertions(+), 162 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index b3694f4e2ae2e..82220c7217d8f 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -34,6 +34,7 @@ #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Analysis/CFGPrinter.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" @@ -1774,18 +1775,19 @@ class CodeGenFunction : public CodeGenTypeCache { /// Emit the Finalization for an OMP region /// \param CGF The Codegen function this belongs to /// \param IP Insertion point for generating the finalization code. - static void FinalizeOMPRegion(CodeGenFunction &CGF, InsertPointTy IP) { - CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - assert(IP.getBlock()->end() != IP.getPoint() && - "OpenMP IR Builder should cause terminated block!"); + static void FinalizeOMPRegion(CodeGenFunction &CGF, InsertPointTy IP) { // TODO: move to .cpp file + CGBuilderTy::InsertPointGuard IPG(CGF.Builder); // MK: needed? + + CGF.Builder.restoreIP(IP); + auto DestBB = llvm:: splitBB( CGF.Builder, false, ".ompfinalize"); - llvm::BasicBlock *IPBB = IP.getBlock(); - llvm::BasicBlock *DestBB = IPBB->getUniqueSuccessor(); - assert(DestBB && "Finalization block should have one successor!"); + // llvm::BasicBlock *IPBB = IP.getBlock(); + // llvm::BasicBlock *DestBB = IPBB->getUniqueSuccessor(); + // assert(DestBB && "Finalization block should have one successor!"); // erase and replace with cleanup branch. - IPBB->getTerminator()->eraseFromParent(); - CGF.Builder.SetInsertPoint(IPBB); + // IPBB->getTerminator()->eraseFromParent(); // Don't do this! + // CGF.Builder.SetInsertPoint(IPBB); CodeGenFunction::JumpDest Dest = CGF.getJumpDestInCurrentScope(DestBB); CGF.EmitBranchThroughCleanup(Dest); } diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h index 768cda59c57de..e2c55d66faf7b 100644 --- a/llvm/include/llvm/Analysis/CFGPrinter.h +++ b/llvm/include/llvm/Analysis/CFGPrinter.h @@ -29,7 +29,9 @@ #include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/FormatVariadic.h" + namespace llvm { + template struct GraphTraits; class CFGViewerPass : public PassInfoMixin { public: @@ -52,7 +54,7 @@ class CFGOnlyPrinterPass : public PassInfoMixin { }; class DOTFuncInfo { -private: +public: const Function *F; const BlockFrequencyInfo *BFI; const BranchProbabilityInfo *BPI; @@ -60,13 +62,22 @@ class DOTFuncInfo { bool ShowHeat; bool EdgeWeights; bool RawWeights; + const BasicBlock *HighlightBB; + const Instruction *HighlightInst; public: - DOTFuncInfo(const Function *F) : DOTFuncInfo(F, nullptr, nullptr, 0) {} - + // DOTFuncInfo(const Function *F) : DOTFuncInfo(F, nullptr, nullptr, 0) {} + DOTFuncInfo(const Function *F, const BasicBlock *HighlightBB = nullptr, + const Instruction *HighlightInst = nullptr) + : DOTFuncInfo(F, nullptr, nullptr, 0, HighlightBB, HighlightInst) {} DOTFuncInfo(const Function *F, const BlockFrequencyInfo *BFI, const BranchProbabilityInfo *BPI, uint64_t MaxFreq) - : F(F), BFI(BFI), BPI(BPI), MaxFreq(MaxFreq) { + : DOTFuncInfo(F, BFI, BPI, 0, nullptr, nullptr) {} + DOTFuncInfo(const Function *F, const BlockFrequencyInfo *BFI, + const BranchProbabilityInfo *BPI, uint64_t MaxFreq, + const BasicBlock *HighlightBB, const Instruction *HighlightInst) + : F(F), BFI(BFI), BPI(BPI), MaxFreq(MaxFreq), HighlightBB(HighlightBB), + HighlightInst(HighlightInst) { ShowHeat = false; EdgeWeights = !!BPI; // Print EdgeWeights when BPI is available. RawWeights = !!BFI; // Print RawWeights when BFI is available. @@ -148,13 +159,44 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } static std::string getCompleteNodeLabel( - const BasicBlock *Node, DOTFuncInfo *, + const BasicBlock *Node, DOTFuncInfo *CFGInfo, llvm::function_ref - HandleBasicBlock = [](raw_string_ostream &OS, - const BasicBlock &Node) -> void { OS << Node; }, + HandleBasicBlock = {}, llvm::function_ref - HandleComment = eraseComment) { - enum { MaxColumns = 80 }; + HandleComment = eraseComment, + unsigned LongestCol = 0) { + + auto BasicBlockHandler = HandleBasicBlock; + if (!BasicBlockHandler) { + BasicBlockHandler = [CFGInfo, + LongestCol](raw_string_ostream &OS, + const BasicBlock &Node) -> void { + if (!CFGInfo || !CFGInfo->HighlightBB) { + OS << Node; + return; + } + + Node.printAsOperand(OS, false); + OS << ":\n"; + for (auto &&Inst : Node) { + if (&Inst == CFGInfo->HighlightInst) { + OS << '<'; + for (unsigned I = 2; I < LongestCol; ++I) + OS << '-'; + OS << ">\n"; + } + OS << Inst << "\n"; + } + if (CFGInfo->HighlightBB == &Node && !CFGInfo->HighlightInst) { + OS << '<'; + for (unsigned I = 2; I < LongestCol; ++I) + OS << '-'; + OS << '>'; + } + }; + } + + //enum { MaxColumns = 80 }; std::string Str; raw_string_ostream OS(Str); @@ -163,7 +205,8 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { OS << ":"; } - HandleBasicBlock(OS, *Node); + BasicBlockHandler(OS, *Node); + unsigned LongCol = 1; std::string OutStr = OS.str(); if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); @@ -180,6 +223,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } else if (OutStr[i] == ';') { // Delete comments! unsigned Idx = OutStr.find('\n', i + 1); // Find end of line HandleComment(OutStr, i, Idx); +#if 0 } else if (ColNum == MaxColumns) { // Wrap lines. // Wrap very long names even though we can't find a space. if (!LastSpace) @@ -188,11 +232,19 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { ColNum = i - LastSpace; LastSpace = 0; i += 3; // The loop will advance 'i' again. +#endif } else ++ColNum; + LongCol = std::max(LongCol, ColNum); if (OutStr[i] == ' ') LastSpace = i; } + + if (!HandleBasicBlock && CFGInfo && CFGInfo->HighlightBB && !LongestCol) { + return getCompleteNodeLabel(Node, CFGInfo, HandleBasicBlock, + HandleComment, LongCol); + } + return OutStr; } @@ -281,6 +333,9 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } std::string getNodeAttributes(const BasicBlock *Node, DOTFuncInfo *CFGInfo) { + if (Node == CFGInfo->HighlightBB) { + return "style=filled,fillcolor=olivedrab1"; + } if (!CFGInfo->showHeatColors()) return ""; @@ -298,6 +353,18 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { bool isNodeHidden(const BasicBlock *Node, const DOTFuncInfo *CFGInfo); void computeDeoptOrUnreachablePaths(const Function *F); }; + + +void viewCFG(const Function *F) ; +void viewCFG(const Function &F) ; +void viewCFG(const BasicBlock *BB) ; +void viewCFG(const BasicBlock &BB) ; +void viewCFG(const Instruction *I) ; +void viewCFG(const Instruction &I) ; + +// RegionPrinter.cpp +void viewRegion(const Function *F) ; +void viewRegion(const Function &F) ; } // End llvm namespace namespace llvm { diff --git a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h index 470e008df06ec..9b265486b2788 100644 --- a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -27,13 +27,14 @@ struct DefaultAnalysisGraphTraits { static GraphT getGraph(AnalysisT *A) { return A; } }; + template < typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits > class DOTGraphTraitsViewer : public FunctionPass { public: - DOTGraphTraitsViewer(StringRef GraphName, char &ID) - : FunctionPass(ID), Name(GraphName) {} + DOTGraphTraitsViewer(StringRef GraphName, char &ID) + : FunctionPass(ID), Name(GraphName) {} /// Return true if this function should be processed. /// @@ -69,6 +70,7 @@ class DOTGraphTraitsViewer : public FunctionPass { std::string Name; }; + template < typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits > diff --git a/llvm/include/llvm/Analysis/RegionPrinter.h b/llvm/include/llvm/Analysis/RegionPrinter.h index 154ac35c486ad..d096e27687a0b 100644 --- a/llvm/include/llvm/Analysis/RegionPrinter.h +++ b/llvm/include/llvm/Analysis/RegionPrinter.h @@ -18,8 +18,11 @@ namespace llvm { class FunctionPass; class Function; class RegionInfo; + class BasicBlock; + class Instruction; FunctionPass *createRegionViewerPass(); + FunctionPass *createRegionViewerPass(const BasicBlock *BB, const Instruction *Inst); FunctionPass *createRegionOnlyViewerPass(); FunctionPass *createRegionPrinterPass(); FunctionPass *createRegionOnlyPrinterPass(); @@ -32,7 +35,8 @@ namespace llvm { /// Includes the instructions in each BasicBlock. /// /// @param RI The analysis to display. - void viewRegion(llvm::RegionInfo *RI); + void viewRegion(RegionInfo *RI); + void viewRegion(RegionInfo &RI); /// Analyze the regions of a function and open its GraphViz /// visualization in a viewer. @@ -43,7 +47,14 @@ namespace llvm { /// manager currently holds. /// /// @param F Function to analyze. - void viewRegion(const llvm::Function *F); + void viewRegion(const Function *F); + void viewRegion(const Function &F); + + void viewRegion(const BasicBlock *I); + void viewRegion(const BasicBlock &I); + + void viewRegion(const Instruction *I); + void viewRegion(const Instruction &I); /// Open a viewer to display the GraphViz vizualization of the analysis /// result. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index b99ba4559b7ef..a6577b21e2074 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -141,12 +141,7 @@ class OpenMPIRBuilder { } #endif - void exitingEdge(InsertPointTy ExitingIP, omp::Directive LeaveReason) { - assert(LeaveReason == omp::OMPD_unknown || LeaveReason == omp:: OMPD_cancellation_point); - if (!FiniCB) return; - FiniCB(ExitingIP, LeaveReason, this); - } /// Consistency self-check. void assertOK() const; @@ -159,72 +154,28 @@ class OpenMPIRBuilder { /// target block and the kind of cancellable directive. SmallVector,8> RegionStack; - OMPRegionInfo* pushRegion( omp::Directive DK, - bool IsCancellable, - LeaveRegionCallbackTy FiniCB = {}) { - RegionStack.emplace_back(new OMPRegionInfo(RegionKind::Directive, DK, IsCancellable,std::move( FiniCB))); - return RegionStack.back().get(); - } - void emitRegionExit( InsertPointTy ExitingIP, OMPRegionInfo* RegionToLeave, omp::Directive LeaveReason) { -#ifndef NDEBUG - switch(LeaveReason) { - case omp::OMPD_unknown: - // Regular region exit - break; - case omp::OMPD_cancellation_point: - case omp::OMPD_barrier: - // Cancellation // TODO: Also need need to know whether #pragma omp cancel for/#pragma omp cancel parallel/?? - break; - default: - llvm_unreachable("unrecognized reason to leave region"); - } -#endif + OMPRegionInfo *getInnermostDirectionRegion(omp::Directive DK) ; - for (auto &R : reverse(RegionStack) ) { - if ( R->FiniCB) - R->FiniCB(ExitingIP, LeaveReason,R.get()); + OMPRegionInfo* pushRegion( omp::Directive DK, + bool IsCancellable, + LeaveRegionCallbackTy FiniCB = {}); - if (R.get() == RegionToLeave) return; - } - llvm_unreachable("region to exit not on stack?"); - } + void emitRegionExit( InsertPointTy ExitingIP, OMPRegionInfo* RegionToLeave, omp::Directive LeaveReason =omp:: OMPD_unknown); - void popRegion(omp::Directive DK) { - assert( RegionStack.back()->DK == DK && "unbalanced region push/pop" ); - RegionStack.pop_back(); - } + void popRegion(omp::Directive DK); -#if 0 - /// Push a finalization callback on the finalization stack. - /// - /// NOTE: Temporary solution until Clang CG is gone. - void pushFinalizationCB(const FinalizationInfo &FI) { - FinalizationStack.push_back(FI); - } - void pushFinalizationCB(FinalizeCallbackTy FiniCB) { - FinalizationInfo FI{{}, omp::Directive::OMPD_unknown, false}; - FinalizationStack.push_back(FI); - } -#endif -#if 0 - void pushCancellationCB(CancellationCallbackTy CancelCB) { - FinalizationInfo FI{ {}, None, false, CancelCB, nullptr }; - FinalizationStack.push_back(FI); + /// Return true if the last entry in the finalization stack is of kind \p DK + /// and cancellable. + bool isLastFinalizationInfoCancellable(omp::Directive DK) { + // FIXME: Don't all the regions in-between also need to be cancellable? + return getInnermostDirectionRegion(DK)->IsCancellable; } - /// Pop the last finalization callback from the finalization stack. - /// - /// NOTE: Temporary solution until Clang CG is gone. - void popFinalizationCB() { - assert(FinalizationStack.back().UserManaged); - FinalizationStack.pop_back(); - } -#endif public: /// Callback type for body (=inner region) code generation @@ -888,7 +839,7 @@ class OpenMPIRBuilder { /// \param ExitCB Extra code to be generated in the exit block. void emitCancelationCheckImpl(LocationDescription Loc, Value *CancelFlag, omp::Directive CanceledDirective, - omp::Directive CanceledBy); + omp::Directive CancelledBy); /// Generate a barrier runtime call. /// @@ -908,48 +859,6 @@ class OpenMPIRBuilder { /// \param Loc The location at which the request originated and is fulfilled. void emitFlush(const LocationDescription &Loc); - -public: -#if 0 - llvm::Optional getTopmostDirective() const { - if (FinalizationStack.empty()) - return None; - return FinalizationStack.back().DK; - } - - bool isTopmostUserManaged() const { - if (FinalizationStack.empty()) - return false; - return FinalizationStack.back().UserManaged; - } - - bool isTopmostBuilderManaged() const { - if (FinalizationStack.empty()) - return false; - return !FinalizationStack.back().UserManaged; - } -#endif - - - private: - OMPRegionInfo *getInnermostDirectionRegion(omp::Directive DK) { - for (auto& R : reverse(RegionStack)) { - if (R->Kind == RegionKind::Toplevel) - return R.get(); - if (R->Kind == RegionKind::Directive && R->DK == DK) - return R.get(); - } - llvm_unreachable("expected toplevel region"); - } - - /// Return true if the last entry in the finalization stack is of kind \p DK - /// and cancellable. - bool isLastFinalizationInfoCancellable(omp::Directive DK) { - // FIXME: Don't all the regions in-between also need to be cancellable? - return getInnermostDirectionRegion(DK)->IsCancellable; - } - - public: /// Generate a taskwait runtime call. /// /// \param Loc The location at which the request originated and is fulfilled. diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 5ac7890905720..00544f64dae4d 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -241,7 +241,10 @@ class IRBuilderBase { /// Creates a new insertion point at the given location. InsertPoint(BasicBlock *InsertBlock, BasicBlock::iterator InsertPoint) - : Block(InsertBlock), Point(InsertPoint) {} + : Block(InsertBlock), Point(InsertPoint) { + assert(!isSet() || InsertBlock->end() == InsertPoint || + InsertPoint->getParent() == InsertBlock); + } /// Returns true if this insert point is set. bool isSet() const { return (Block != nullptr); } @@ -2551,6 +2554,11 @@ class IRBuilder : public IRBuilderBase { // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRBuilder<>, LLVMBuilderRef) + +void viewCFG(const llvm::IRBuilderBase *Builder) ; +void viewCFG(const llvm::IRBuilderBase &Builder) ; +void viewCFG(const llvm::IRBuilderBase::InsertPoint *IP) ; +void viewCFG(const llvm::IRBuilderBase::InsertPoint &IP) ; } // end namespace llvm #endif // LLVM_IR_IRBUILDER_H diff --git a/llvm/lib/Analysis/CFGPrinter.cpp b/llvm/lib/Analysis/CFGPrinter.cpp index 3cfca42128a26..55998c603cca0 100644 --- a/llvm/lib/Analysis/CFGPrinter.cpp +++ b/llvm/lib/Analysis/CFGPrinter.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/CFGPrinter.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -128,7 +129,7 @@ PreservedAnalyses CFGViewerPass::run(Function &F, FunctionAnalysisManager &AM) { return PreservedAnalyses::all(); auto *BFI = &AM.getResult(F); auto *BPI = &AM.getResult(F); - viewCFG(F, BFI, BPI, getMaxFreq(F, BFI)); + ::viewCFG(F, BFI, BPI, getMaxFreq(F, BFI)); return PreservedAnalyses::all(); } @@ -144,7 +145,7 @@ struct CFGOnlyViewerLegacyPass : public FunctionPass { return false; auto *BPI = &getAnalysis().getBPI(); auto *BFI = &getAnalysis().getBFI(); - viewCFG(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true); + ::viewCFG(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true); return false; } @@ -169,7 +170,7 @@ PreservedAnalyses CFGOnlyViewerPass::run(Function &F, return PreservedAnalyses::all(); auto *BFI = &AM.getResult(F); auto *BPI = &AM.getResult(F); - viewCFG(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true); + ::viewCFG(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true); return PreservedAnalyses::all(); } @@ -260,7 +261,7 @@ PreservedAnalyses CFGOnlyPrinterPass::run(Function &F, /// program, displaying the CFG of the current function. This depends on there /// being a 'dot' and 'gv' program in your path. /// - LLVM_DUMP_METHOD void Function::viewCFG() const { viewCFG(false, nullptr, nullptr); } +void Function::viewCFG() const { viewCFG(false, nullptr, nullptr); } void Function::viewCFG(bool ViewCFGOnly, const BlockFrequencyInfo *BFI, const BranchProbabilityInfo *BPI) const { @@ -270,12 +271,13 @@ void Function::viewCFG(bool ViewCFGOnly, const BlockFrequencyInfo *BFI, ViewGraph(&CFGInfo, "cfg" + getName(), ViewCFGOnly); } + /// viewCFGOnly - This function is meant for use from the debugger. It works /// just like viewCFG, but it does not include the contents of basic blocks /// into the nodes, just the label. If you are only interested in the CFG /// this can make the graph smaller. /// -LLVM_DUMP_METHOD void Function::viewCFGOnly() const { viewCFGOnly(nullptr, nullptr); } +void Function::viewCFGOnly() const { viewCFGOnly(nullptr, nullptr); } void Function::viewCFGOnly(const BlockFrequencyInfo *BFI, const BranchProbabilityInfo *BPI) const { @@ -332,3 +334,63 @@ bool DOTGraphTraits::isNodeHidden(const BasicBlock *Node, } return false; } + + + + +void llvm::viewCFG(const Function* F) { + if (!F) return; +F->viewCFG(); +} +void llvm::viewCFG(const Function& F) { + return viewCFG(&F); +} + +void llvm::viewCFG(const BasicBlock* BB) { + if (!BB) return; + auto *F = BB->getParent(); + DOTFuncInfo CFGInfo(F, BB, nullptr); + ViewGraph(&CFGInfo, "cfg" + F->getName(), false); +} +void llvm::viewCFG(const BasicBlock& BB) { + return viewCFG(&BB); +} + +void llvm::viewCFG(const Instruction* I) { + if (!I) return; + auto *BB =I-> getParent(); + auto *F = BB->getParent(); + DOTFuncInfo CFGInfo(F, BB, I); + ViewGraph(&CFGInfo, "cfg" + F->getName(), false); +} +void llvm::viewCFG(const Instruction& I) { + return viewCFG(&I); +} + +void llvm::viewCFG(const llvm::IRBuilderBase* Builder) { + if (!Builder) return; + return viewCFG(Builder->saveIP()); +} +void llvm::viewCFG(const llvm::IRBuilderBase &Builder) { + return viewCFG(&Builder); +} + +void llvm::viewCFG(const llvm::IRBuilderBase::InsertPoint* IP) { + if (!IP) return; + if (!IP->isSet()) return; + + assert(IP->isSet()); + BasicBlock* Block = IP->getBlock(); + BasicBlock::iterator Point = IP->getPoint(); + Function* F = Block->getParent(); + + // if (!CFGFuncName.empty() && !getName().contains(CFGFuncName)) + // return; + Instruction *Inst = (Point == Block->end()) ? nullptr : &*Point; + DOTFuncInfo CFGInfo(F, Block, Inst); + ViewGraph(&CFGInfo, "cfg" + F->getName(), false); +} +void llvm::viewCFG(const llvm::IRBuilderBase ::InsertPoint &IP) { + return viewCFG(&IP); +} + diff --git a/llvm/lib/Analysis/RegionPrinter.cpp b/llvm/lib/Analysis/RegionPrinter.cpp index bb1676c7a9030..6f591362c48e0 100644 --- a/llvm/lib/Analysis/RegionPrinter.cpp +++ b/llvm/lib/Analysis/RegionPrinter.cpp @@ -30,7 +30,58 @@ onlySimpleRegions("only-simple-regions", cl::Hidden, cl::init(false)); + + +namespace { + struct HighlightingRegionInfo { + RegionInfo* RI; + const Function* F; + const BasicBlock* HighlightBB; + const Instruction* HighlightInst; + + HighlightingRegionInfo() = delete; + + HighlightingRegionInfo(RegionInfo *RI, const Function* F, const BasicBlock* HighlightBB = nullptr, const Instruction* HighlightInst = nullptr) + : RI(RI), F(F), HighlightBB(HighlightBB), HighlightInst(HighlightInst) {} + + public: + RegionInfo* getRegionInfo() const {return RI;} + const Function* getFunction() const { return F; } + }; + + +#if 0 + struct HighlightingRegionInfoPassGraphTraits { + static HighlightingRegionInfo* getGraph(RegionInfoPass* RIP) { + // ... + return nullptr; + } + }; +#endif +} + namespace llvm { + template <> + struct GraphTraits : public GraphTraits> { + using nodes_iterator = + df_iterator, false, + GraphTraits>>; + + static NodeRef getEntryNode(HighlightingRegionInfo *G) { + return GraphTraits>::getEntryNode(G->RI->getTopLevelRegion()); + } + + static nodes_iterator nodes_begin(HighlightingRegionInfo* G) { + return nodes_iterator::begin(getEntryNode(G)); + } + + static nodes_iterator nodes_end(HighlightingRegionInfo *G) { + return nodes_iterator::end(getEntryNode(G)); + } + }; + + + template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { @@ -134,16 +185,167 @@ struct DOTGraphTraits : public DOTGraphTraits { printRegionCluster(*G->getTopLevelRegion(), GW, 4); } }; + + } //end namespace llvm + + namespace { + struct RegionInfoPassGraphTraits { + static RegionInfo* getGraph(RegionInfoPass* RIP) { + return &RIP->getRegionInfo(); + } + }; -struct RegionInfoPassGraphTraits { - static RegionInfo *getGraph(RegionInfoPass *RIP) { - return &RIP->getRegionInfo(); - } -}; + +} + + +namespace llvm { + + +#if 0 + template <> + struct GraphTraits : public GraphTraits { + using Base = GraphTraits; + // using Base::NodeRef; + }; +#endif + + + template <> + struct DOTGraphTraits : public DOTGraphTraits { + using Base = DOTGraphTraits; + using Traits = GraphTraits; + + DOTGraphTraits(bool IsSimple = false) : Base(IsSimple) {} + + +#if 0 + static std::string getGraphName(const HighlightingRegionInfo *G) { + return Base::getGraphName(G->RI); + } + + std::string getNodeLabel(RegionNode *Node, HighlightingRegionInfo *G) { + return Base::getNodeLabel(Node, G->RI); + } + + std::string getEdgeAttributes( + RegionNode *SrcNode, + Traits::ChildIteratorType CI, + HighlightingRegionInfo *G) { + return Base::getEdgeAttributes(SrcNode, CI, G->RI); + } +#endif + + static std::string getGraphName(const HighlightingRegionInfo *) { return "Region Graph"; } + + std::string getNodeLabel(RegionNode *Node, HighlightingRegionInfo *G) { + //return Base::getNodeLabel(Node, reinterpret_cast(G->RI->getTopLevelRegion())); + + if (!Node->isSubRegion()) { + BasicBlock *BB = Node->getNodeAs(); + + DOTFuncInfo CFGInfo(G->F, G->HighlightBB, G->HighlightInst); + if (isSimple()) + return DOTGraphTraits + ::getSimpleNodeLabel(BB, &CFGInfo); + else + return DOTGraphTraits + ::getCompleteNodeLabel(BB, &CFGInfo); + } + + return "Not implemented"; + } + + + static std::string getNodeAttributes(RegionNode *R, HighlightingRegionInfo *G) { + auto HighlightBB = G->HighlightBB; + if (!R->isSubRegion() && R->getNodeAs() == HighlightBB) { + return "penwidth=5.0,style=filled"; + } + + return ""; + } + + + std::string getEdgeAttributes(RegionNode *srcNode, + Traits::ChildIteratorType CI, + HighlightingRegionInfo *G) { + RegionNode *destNode = *CI; + + if (srcNode->isSubRegion() || destNode->isSubRegion()) + return ""; + + // In case of a backedge, do not use it to define the layout of the nodes. + BasicBlock *srcBB = srcNode->getNodeAs(); + BasicBlock *destBB = destNode->getNodeAs(); + + Region *R = G->RI->getRegionFor(destBB); + + while (R && R->getParent()) + if (R->getParent()->getEntry() == destBB) + R = R->getParent(); + else + break; + + if (R && R->getEntry() == destBB && R->contains(srcBB)) + return "constraint=false"; + + return ""; + } + + + static void printRegionCluster(const Region &R, GraphWriter &GW, unsigned depth = 0, const BasicBlock *HighlightBB=nullptr,const Instruction *HighlightInst = nullptr ) { + raw_ostream &O = GW.getOStream(); + O.indent(2 * depth) << "subgraph cluster_" << static_cast(&R) + << " {\n"; + O.indent(2 * (depth + 1)) << "label = \"\";\n"; + + + + if (!onlySimpleRegions || R.isSimple()) { + O.indent(2 * (depth + 1)) << "style = filled;\n"; + O.indent(2 * (depth + 1)) << "color = " + << ((R.getDepth() * 2 % 12) + 1) << "\n"; + + } else { + O.indent(2 * (depth + 1)) << "style = solid;\n"; + O.indent(2 * (depth + 1)) << "color = " + << ((R.getDepth() * 2 % 12) + 2) << "\n"; + } + + for (const auto &RI : R) + printRegionCluster(*RI, GW, depth + 1,HighlightBB, HighlightInst ); + + const RegionInfo &RI = *static_cast(R.getRegionInfo()); + + for (auto *BB : R.blocks()) + if (RI.getRegionFor(BB) == &R) + O.indent(2 * (depth + 1)) << "Node" + << static_cast(RI.getTopLevelRegion()->getBBNode(BB)) + << ";\n"; + + + O.indent(2 * depth) << "}\n"; + } + + + static void addCustomGraphFeatures( + const HighlightingRegionInfo *G, + GraphWriter &GW) { + raw_ostream &O = GW.getOStream(); + O << "\tcolorscheme = \"paired12\"\n"; + printRegionCluster(*G->RI->getTopLevelRegion(), GW, 4, G->HighlightBB, G->HighlightInst); + } + }; +} + + + +namespace { struct RegionPrinter : public DOTGraphTraitsPrinter { @@ -168,18 +370,51 @@ struct RegionOnlyPrinter }; char RegionOnlyPrinter::ID = 0; -struct RegionViewer - : public DOTGraphTraitsViewer { + + +struct RegionViewer: public FunctionPass { + using Base = FunctionPass; + static char ID; - RegionViewer() - : DOTGraphTraitsViewer("reg", ID) { - initializeRegionViewerPass(*PassRegistry::getPassRegistry()); + RegionViewer() : RegionViewer(nullptr,nullptr) {} + + RegionViewer(const BasicBlock *HighlightBB, const Instruction *HighlightInst) : FunctionPass(ID), HighlightBB(HighlightBB), HighlightInst(HighlightInst) { + initializeRegionViewerPass(*PassRegistry::getPassRegistry()); + } + + + virtual bool processFunction(Function &F, RegionInfo &Analysis) { + return true; + } + + bool runOnFunction(Function &F) override { + auto &Analysis = getAnalysis().getRegionInfo(); + + if (!processFunction(F, Analysis)) + return false; + + + + HighlightingRegionInfo Graph(&Analysis, &F, HighlightBB, HighlightInst); + ViewGraph(&Graph, "reg", false, Twine("Region Graph for '") + F.getName().str() + "' function"); + + return false; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); } + + +private: + const BasicBlock*HighlightBB ; + const Instruction*HighlightInst ; }; char RegionViewer::ID = 0; + + struct RegionOnlyViewer : public DOTGraphTraitsViewer { @@ -219,12 +454,16 @@ FunctionPass* llvm::createRegionViewerPass() { return new RegionViewer(); } +FunctionPass *llvm::createRegionViewerPass(const BasicBlock *BB, const Instruction *Inst) { + return new RegionViewer(BB,Inst); +} + FunctionPass* llvm::createRegionOnlyViewerPass() { return new RegionOnlyViewer(); } #ifndef NDEBUG -static void viewRegionInfo(RegionInfo *RI, bool ShortNames) { +static void viewRegionInfo( RegionInfo *RI, bool ShortNames) { assert(RI && "Argument must be non-null"); llvm::Function *F = RI->getTopLevelRegion()->getEntry()->getParent(); @@ -249,15 +488,54 @@ static void invokeFunctionPass(const Function *F, FunctionPass *ViewerPass) { FPM.doFinalization(); } -void llvm::viewRegion(RegionInfo *RI) { viewRegionInfo(RI, false); } +void llvm::viewRegion( RegionInfo *RI) { if (!RI) return; viewRegionInfo(RI, false); } +void llvm::viewRegion( RegionInfo &RI) { return viewRegion(&RI); } void llvm::viewRegion(const Function *F) { + if (!F) return; invokeFunctionPass(F, createRegionViewerPass()); } +void llvm::viewRegion(const Function &F) { + viewRegion(&F); +} void llvm::viewRegionOnly(RegionInfo *RI) { viewRegionInfo(RI, true); } void llvm::viewRegionOnly(const Function *F) { + if (!F) return; invokeFunctionPass(F, createRegionOnlyViewerPass()); } + + + + + + + +void llvm::viewRegion(const llvm::BasicBlock *BB) { + if (!BB) return ; + + auto F =BB->getParent(); + + invokeFunctionPass(F, createRegionViewerPass( BB, nullptr )); +} +void llvm::viewRegion(const llvm::BasicBlock &BB) { + return viewRegion(&BB); +} + + +void llvm::viewRegion(const llvm::Instruction *Inst){ + if (!Inst) return; + + auto Block = Inst->getParent(); + auto F = Inst->getFunction(); + + invokeFunctionPass(F, createRegionViewerPass( Block, Inst )); +} + +void llvm:: viewRegion(const llvm::Instruction &I) { + return viewRegion(&I); +} + + #endif diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index d8c94b1a94c04..2952cc4af1761 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -435,8 +435,6 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { void OpenMPIRBuilder::initialize() { initializeTypes(M); } void OpenMPIRBuilder::finalize(Function *Fn) { - assert(RegionStack.size() == 1 && RegionStack.back()->Kind == RegionKind::Toplevel && "OMPRegion push/pop must be balanced"); - SmallPtrSet ParallelRegionBlockSet; SmallVector Blocks; SmallVector DeferredOutlines; @@ -535,6 +533,7 @@ OpenMPIRBuilder::OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) { } OpenMPIRBuilder::~OpenMPIRBuilder() { + assert(RegionStack.size() == 1 && RegionStack.back()->Kind == RegionKind::Toplevel && "OMPRegion push/pop must be balanced"); assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); } @@ -663,21 +662,76 @@ Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { "omp_global_thread_num"); } +OpenMPIRBuilder::OMPRegionInfo *OpenMPIRBuilder::getInnermostDirectionRegion(omp::Directive DK) { + for (auto& R : reverse(RegionStack)) { + if (R->Kind == RegionKind::Toplevel) + return R.get(); + if (R->Kind == RegionKind::Directive && R->DK == DK) + return R.get(); + } + llvm_unreachable("expected toplevel region"); +} + +OpenMPIRBuilder::OMPRegionInfo*OpenMPIRBuilder:: pushRegion( omp::Directive DK, + bool IsCancellable, + LeaveRegionCallbackTy FiniCB ) { + RegionStack.emplace_back(new OMPRegionInfo(RegionKind::Directive, DK, IsCancellable,std::move( FiniCB))); + return RegionStack.back().get(); +} + + +void OpenMPIRBuilder::emitRegionExit( InsertPointTy ExitingIP, OMPRegionInfo* RegionToLeave, omp::Directive LeaveReason) { +#ifndef NDEBUG + switch(LeaveReason) { + case OMPD_unknown: + // Regular region exit + break; + case OMPD_cancellation_point: + case OMPD_barrier: + case OMPD_cancel: + // Cancellation // TODO: Also need need to know whether #pragma omp cancel for/#pragma omp cancel parallel/?? + break; + default: + llvm_unreachable("unrecognized reason to leave region"); + } +#endif + + for (auto &R : reverse(RegionStack) ) { + if ( R->FiniCB) + R->FiniCB(ExitingIP, LeaveReason,R.get()); + + if (R.get() == RegionToLeave) return; + } + llvm_unreachable("region to exit not on stack?"); +} + + +void OpenMPIRBuilder::popRegion(omp::Directive DK) { + assert( RegionStack.back()->DK == DK && "unbalanced region push/pop" ); + RegionStack.back()->assertOK(); + RegionStack.pop_back(); +} + + + + + void OpenMPIRBuilder::OMPRegionInfo:: assertOK() const { #ifndef NDEBUG switch (Kind) { case RegionKind::Toplevel: - assert(DK == omp::OMPD_unknown && "toplevel region is not a specific kind"); + assert(DK == OMPD_unknown && "toplevel region is not a specific kind"); assert(!IsCancellable && "top-level is not cancellable"); break; case RegionKind::CanonicalLoop: break; case RegionKind::Directive: switch (DK) { - case omp::OMPD_parallel: + case OMPD_parallel: + case OMPD_sections: break; default: - llvm_unreachable("Not a valid OpenMP region"); + llvm_unreachable("Not a recognized OpenMP region"); } break; } @@ -748,13 +802,33 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, if (!updateToLocation(Loc)) return Loc.IP; +#if 0 // LLVM utilities like blocks with terminators. - auto *UI = Builder.CreateUnreachable(); + Instruction* UI ; + if (Builder.GetInsertPoint() == Builder.GetInsertBlock()->end() && !Builder.GetInsertBlock()->getTerminator()) { + UI = Builder.CreateUnreachable(); + } else { + UI = &*Builder.GetInsertPoint(); + } +#endif + BasicBlock *New = nullptr; + if (IfCondition) { + auto Old = Builder.GetInsertBlock(); + New = splitBB(Builder, false); + BasicBlock *ThenBlock = BasicBlock::Create( + Builder.getContext(), Old->getName() + ".if", New->getParent(), New); + Builder.CreateCondBr(IfCondition, ThenBlock, New); + Builder.SetInsertPoint(ThenBlock); + Builder.CreateBr(New); + Builder.SetInsertPoint(ThenBlock->getTerminator()); + } +#if 0 Instruction *ThenTI = UI, *ElseTI = nullptr; if (IfCondition) SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); Builder.SetInsertPoint(ThenTI); +#endif Value *CancelKind = nullptr; switch (CanceledDirective) { @@ -774,13 +848,34 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Value *Result = Builder.CreateCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); +#if 0 +// FIXME: This is bad for so many reasons. + // 1. Just pass IP to createBarrier + // 2. This is createParallel's task + // 3. The parallel may be nowait + // 4. There may be other omp regions in-between + auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) { + if (CanceledDirective == OMPD_parallel) { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.restoreIP(IP); + createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), + omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false); + } + }; +#endif + // The actual cancel logic is shared with others, e.g., cancel_barriers. emitCancelationCheckImpl(Loc, Result, CanceledDirective, OMPD_cancel); +#if 0 // Update the insertion point and remove the terminator we introduced. Builder.SetInsertPoint(UI->getParent()); UI->eraseFromParent(); +#endif + if (New) + return {New, New->begin()}; return Builder.saveIP(); } @@ -820,9 +915,10 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, Value *C auto &FI = FinalizationStack.back(); FI.FiniCB(Builder.saveIP()); #endif + emitRegionExit({CancellationBlock, CancellationBlock->begin()}, getInnermostDirectionRegion(CanceledDirective), CanceledBy); // The continuation block is where code generation continues. - Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); + Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); // MK: needed? } IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(const LocationDescription &Loc, @@ -909,6 +1005,16 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(const LocationDescripti #if 1 auto FiniCBWrapper = [&](InsertPointTy IP, omp::Directive LeaveReason, OMPRegionInfo *Region) { + // FIXME: This is broken + // 1. Should be done after the FiniCB + // 2. It may deadlock + if (LeaveReason != OMPD_unknown) { + createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), + omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false); + } + +#if 0 // Hide "open-ended" blocks from the given FiniCB by setting the right jump // target to the region exit block. if (IP.getBlock()->end() == IP.getPoint()) { @@ -921,8 +1027,13 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(const LocationDescripti assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 && IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB && "Unexpected insertion point for finalization call!"); +#endif + + if (FiniCB) - FiniCB(IP, LeaveReason, Region); + FiniCB(IP, LeaveReason, Region); // Needed? + + }; //FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); @@ -1055,7 +1166,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(const LocationDescripti } for (Instruction *I : ToBeDeleted) - I->eraseFromParent(); + I->eraseFromParent(); // FIXME: Don't add temporary instructions!! }; #if 0 @@ -1075,7 +1186,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(const LocationDescripti #if 0 FiniCB(PreFiniIP); #endif - emitRegionExit( PreFiniIP, ParallelRegion, OMPD_unknown ); + emitRegionExit( PreFiniIP, ParallelRegion ); popRegion(omp::OMPD_parallel); @@ -1299,6 +1410,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(const LocationDes // TODO: Use CanonicalLoopInfo finalization. FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable}); #endif +auto SectionsRegion = pushRegion(OMPD_sections, IsCancellable); // Each section is emitted as a switch case // Each finalization callback is handled from clang.EmitOMPSectionDirective() @@ -1343,10 +1455,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(const LocationDes Value *LB = ConstantInt::get(I32Ty, 0); Value *UB = ConstantInt::get(I32Ty, SectionCBs.size()); Value *ST = ConstantInt::get(I32Ty, 1); - llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop( - Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); - InsertPointTy AfterIP = - applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait); + llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); + auto AfterIP = LoopInfo->getAfterIP(); + applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait); #if 0 // Apply the finalization callback in LoopAfterBB @@ -1361,8 +1472,14 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(const LocationDes AfterIP = {FiniBB, FiniBB->begin()}; } #endif + //Instruction *I = Builder.CreateBr(ExitBB); - return AfterIP; + Builder.restoreIP(AfterIP); + auto Finish = splitBB(Builder,true, "section_finish" ); + emitRegionExit(Builder.saveIP(),SectionsRegion ); + popRegion(OMPD_sections); + + return { Finish, Finish->begin() }; } OpenMPIRBuilder::InsertPointTy diff --git a/llvm/lib/Support/GraphWriter.cpp b/llvm/lib/Support/GraphWriter.cpp index e875e18a7e92e..838fc40610b2f 100644 --- a/llvm/lib/Support/GraphWriter.cpp +++ b/llvm/lib/Support/GraphWriter.cpp @@ -256,7 +256,7 @@ bool llvm::DisplayGraph(StringRef FilenameRef, bool wait, if (!Viewer && S.TryFindProgram("xdg-open", ViewerPath)) Viewer = VK_XDGOpen; #ifdef _WIN32 - if (!Viewer && S.TryFindProgram("cmd", ViewerPath)) { + if (!Viewer && S.TryFindProgram("cmd.exe", ViewerPath)) { Viewer = VK_CmdStart; } #endif From fa7166ef06e58870a869955a41b0db7a794291d8 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 27 Apr 2022 22:13:32 -0500 Subject: [PATCH 04/50] WIP --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 73 +++++++++++++++++++++------ clang/lib/CodeGen/CGStmtOpenMP.cpp | 43 ++++++++++------ 2 files changed, 86 insertions(+), 30 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 98d8323173f71..7d20dbd53137e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -63,6 +63,9 @@ class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { InlinedRegion, /// Region with outlined function for standalone 'target' directive. TargetRegion, + + /// Handled by OpenMPIRBuilder + OpenMPIRBuilderRegion, }; CGOpenMPRegionInfo(const CapturedStmt &CS, @@ -110,6 +113,28 @@ class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { bool HasCancel; }; + +class OpenMPIRBuilderRegionInfo final : public CGOpenMPRegionInfo { +public: + OpenMPIRBuilderRegionInfo(const CapturedStmt &CS, OpenMPDirectiveKind Kind) + : CGOpenMPRegionInfo( + CS, OpenMPIRBuilderRegion, + [](CodeGenFunction &, PrePostActionTy &) { + llvm_unreachable("Should never be called"); + }, + Kind, /*HasCancel*/ true) {} + + static bool classof(const CGCapturedStmtInfo *Info) { + return CGOpenMPRegionInfo::classof(Info) && + cast(Info)->getRegionKind() == + OpenMPIRBuilderRegion; + } + const VarDecl *getThreadIDVariable() const override { return nullptr; } + + // private: + // CodeGenFunction:: JumpDest CancelDest; +}; + /// API for captured statement code generation in OpenMP constructs. class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: @@ -1194,7 +1219,8 @@ struct PushAndPopStackRAII { if (!OMPBuilder) return; -#if 0 + // auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {}; + // The following callback is the crucial part of clangs cleanup process. // // NOTE: @@ -1207,9 +1233,10 @@ struct PushAndPopStackRAII { // to push & pop an FinalizationInfo object. // The FiniCB will still be needed but at the point where the // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. - auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP , - omp::Directive LeaveReason, - OMPRegionInfo &Region) { + auto CancelCB = [&CGF, Kind](llvm::OpenMPIRBuilder::InsertPointTy IP, + llvm::omp::Directive CanceledDirective, + llvm::omp::Directive CanceledBy) { + assert(CanceledDirective == Kind); assert(IP.getBlock()->end() == IP.getPoint() && "Clang CG should cause non-terminated block!"); CGBuilderTy::InsertPointGuard IPG(CGF.Builder); @@ -1218,18 +1245,16 @@ struct PushAndPopStackRAII { CGF.getOMPCancelDestination(OMPD_parallel); CGF.EmitBranchThroughCleanup(Dest); }; - + + llvm_unreachable("TODO: set UserManaged=true"); // TODO: Remove this once we emit parallel regions through the // OpenMPIRBuilder as it can do this setup internally. - llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); - OMPBuilder->pushFinalizationCB(std::move(FI)); -#endif + // llvm::OpenMPIRBuilder::FinalizationInfo FI{{}, Kind, HasCancel, /*UserManaged*/ true}; + // OMPBuilder->pushFinalizationCB(std::move(FI)); } ~PushAndPopStackRAII() { -#if 0 - if (OMPBuilder) - OMPBuilder->popFinalizationCB(); -#endif + // if (OMPBuilder) + // OMPBuilder->popFinalizationCB(); } llvm::OpenMPIRBuilder *OMPBuilder; }; @@ -2588,14 +2613,32 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks, bool ForceSimpleCall) { // Check if we should use the OMPBuilder - auto *OMPRegionInfo = - dyn_cast_or_null(CGF.CapturedStmtInfo); - if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { + + // FIXME: The OpenMPIRBuilder finalization stack does not necessarily + // correspond the scope structure expected by CGOpenMPRuntime because until + // OpenMPIRBuilder implementation is complete, some directives will still be + // emitted by OpenMPIRBuilder itself. Note that + // isLastFinalizationInfoCancellable may also be wrong and match the wrong + // level which happen to be the same OpenMPDirectiveKind. + // CGOpenMPRegionInfo* OMPRegionInfo = + // dyn_cast_or_null(CGF.CapturedStmtInfo); + if (auto *IRBuilderRegion = + dyn_cast_or_null(CGF.CapturedStmtInfo)) { + // if (OMPBuilder.isLastFinalizationInfoCancellable(Kind)) { CGF.Builder.restoreIP(OMPBuilder.createBarrier( CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); return; + // } + // FIXME: CGF.CapturedStmtInfo is unreliable when using OpenMPIRBuilder. + // OMPRegionInfo = nullptr; + //} else { + // OMPRegionInfo = + // dyn_cast_or_null(CGF.CapturedStmtInfo); } + auto *OMPRegionInfo = + dyn_cast_or_null(CGF.CapturedStmtInfo); + if (!CGF.HaveInsertPoint()) return; // Build call __kmpc_cancel_barrier(loc, thread_id); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index c0a158025aa5d..8e12e9b0f5c05 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4076,24 +4076,37 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); - auto FiniCB = [this]( InsertPointTy IP, - llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo*Region) { - OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); - }; + if (OMPBuilder.isTopmostBuilderManaged()) { + const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); + auto FiniCB = [this](InsertPointTy IP, + llvm::omp::Directive LeavingConstruct, + llvm::omp::Directive CancelledBy) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + }; - auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { - OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section"); - }; + auto BodyGenCB = + [SectionRegionBodyStmt, + this](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP) { // , llvm::BasicBlock &FiniBB + Builder.restoreIP(CodeGenIP); + auto FiniBB = splitBBWithSuffix(Builder, false, ".sectionfini"); - LexicalScope Scope(*this, S.getSourceRange()); - EmitStopPoint(&S); - Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, + *FiniBB); + // OMPBuilderCBHelpers::EmitOMPRegionBody(*this, + // SectionRegionBodyStmt, CodeGenIP, FiniBB); - return; + EmitStmt(SectionRegionBodyStmt); + + Builder.CreateBr(FiniBB); + }; + + LexicalScope Scope(*this, S.getSourceRange()); + EmitStopPoint(&S); + Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); + + return; + } } LexicalScope Scope(*this, S.getSourceRange()); EmitStopPoint(&S); From 39fad43505fcdbd7711aefa17da562f4b0d8262b Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 27 Apr 2022 23:22:18 -0500 Subject: [PATCH 05/50] try backport non-irbuilder region solution --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 46 ++++++++++++++--- clang/lib/CodeGen/CGOpenMPRuntime.h | 9 ++++ clang/lib/CodeGen/CGStmtOpenMP.cpp | 72 ++++++++++++++++++--------- clang/lib/CodeGen/CodeGenFunction.h | 24 +++++++++ 4 files changed, 120 insertions(+), 31 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 7d20dbd53137e..d4ff59faf7110 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -63,8 +63,7 @@ class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { InlinedRegion, /// Region with outlined function for standalone 'target' directive. TargetRegion, - - /// Handled by OpenMPIRBuilder + /// Handled by OpenMPIRBuilder. OpenMPIRBuilderRegion, }; @@ -130,9 +129,6 @@ class OpenMPIRBuilderRegionInfo final : public CGOpenMPRegionInfo { OpenMPIRBuilderRegion; } const VarDecl *getThreadIDVariable() const override { return nullptr; } - - // private: - // CodeGenFunction:: JumpDest CancelDest; }; /// API for captured statement code generation in OpenMP constructs. @@ -2184,6 +2180,43 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, } } +void CGOpenMPRuntime::emitIRBuilderParallel( + CodeGenFunction &CGF, const CapturedStmt *CS, + llvm::OpenMPIRBuilder::BodyGenCallbackTy BodyGenCB, + llvm::OpenMPIRBuilder::PrivatizeCallbackTy PrivCB, + llvm::OpenMPIRBuilder::LeaveRegionCallbackTy FiniCB, + // llvm:: OpenMPIRBuilder:: CancellationCallbackTy CancelCB, + llvm::Value *IfCond, llvm::Value *NumThreads, + llvm::omp::ProcBindKind ProcBind,bool IsCancellable) { + auto &Builder = CGF.Builder; + auto AllocaInsertPt = CGF.AllocaInsertPt; + + // FIXME: CGCapturedStmtInfo is an abstract class, CGOpenMPOutlinedRegionInfo + // would be correct here. + // CodeGenFunction:: CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + auto BodyGenCBWrapper = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + // CGF.OMPCancelStack.enter(CGF, OMPD_parallel, /* HasCancel*/ true); + + if (BodyGenCB) + BodyGenCB(AllocaIP, CodeGenIP); + + // CGF.Builder.ClearInsertionPoint(); + int a = 0; + }; + + OpenMPIRBuilderRegionInfo CGSI(*CS, OMPD_parallel); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); + + llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(AllocaInsertPt->getParent(), + AllocaInsertPt->getIterator()); + Builder.restoreIP(OMPBuilder.createParallel(Builder, AllocaIP, + BodyGenCBWrapper, PrivCB, FiniCB, + // CancelCB, + IfCond, NumThreads, ProcBind, IsCancellable)); +} + // If we're inside an (outlined) parallel region, use the region info's // thread-ID variable (it is passed in a first argument of the outlined function // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in @@ -2622,8 +2655,7 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, // level which happen to be the same OpenMPDirectiveKind. // CGOpenMPRegionInfo* OMPRegionInfo = // dyn_cast_or_null(CGF.CapturedStmtInfo); - if (auto *IRBuilderRegion = - dyn_cast_or_null(CGF.CapturedStmtInfo)) { + if (auto *IRBuilderRegion = dyn_cast_or_null(CGF.CapturedStmtInfo)) { // if (OMPBuilder.isLastFinalizationInfoCancellable(Kind)) { CGF.Builder.restoreIP(OMPBuilder.createBarrier( CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index f1419eb465b13..97d639b1cfb72 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1034,6 +1034,15 @@ class CGOpenMPRuntime { ArrayRef CapturedVars, const Expr *IfCond, llvm::Value *NumThreads); + void emitIRBuilderParallel( + CodeGenFunction &CGF, const CapturedStmt *CS, + llvm::OpenMPIRBuilder::BodyGenCallbackTy BodyGenCB, + llvm::OpenMPIRBuilder::PrivatizeCallbackTy PrivCB, + llvm::OpenMPIRBuilder::LeaveRegionCallbackTy FiniCB, + // llvm:: OpenMPIRBuilder:: CancellationCallbackTy CancelCB, + llvm::Value *IfCondition, llvm::Value *NumThreads, + llvm::omp::ProcBindKind ProcBind, bool IsCancellable); + /// Emits a critical region. /// \param CriticalName Name of the critical region. /// \param CriticalOpGen Generator for the statement associated with the given diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 8e12e9b0f5c05..3916399383739 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1713,7 +1713,7 @@ void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( } void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { - if (CGM.getLangOpts().OpenMPIRBuilder) { + if (CGM.getLangOpts().OpenMPIRBuilder && !IsInsideNonOpenMPIRBuilderHandledRegion ) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); // Check if we have any if clause associated with the directive. llvm::Value *IfCond = nullptr; @@ -1740,6 +1740,31 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; + + + auto CancelCB = [this](InsertPointTy IP, + llvm::omp::Directive CanceledDirective, + llvm::omp::Directive CanceledBy) { + llvm_unreachable("TODO"); +#if 0 + assert(CanceledDirective == OMPD_parallel); + if (CanceledBy == OMPD_unknown) + return; + + auto &Stack = OMPCancelStack.Stack; + + Builder.restoreIP(IP); + auto CurBB = IP.getBlock(); + llvm::BasicBlock *ContBB = nullptr; + ContBB = splitBBWithSuffix(Builder, /*CreateBranch*/ false, ".cnclsplit"); + + + auto &ExitDest = Stack.back().ExitBlock; + auto Dest = JumpDest(ContBB, ExitDest.getScopeDepth(), NextCleanupDestIndex++); + + EmitBranchThroughCleanup(Dest); +#endif + }; // Privatization callback that performs appropriate action for // shared/private/firstprivate/lastprivate/copyin/... variables. // @@ -1762,16 +1787,17 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel"); }; + CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); - llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( - AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); - Builder.restoreIP( - OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, - IfCond, NumThreads, ProcBind, S.hasCancel())); - return; + llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); + // Builder.restoreIP( OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, IfCond, NumThreads, ProcBind, S.hasCancel())); + CGM.getOpenMPRuntime().emitIRBuilderParallel(*this, CS, BodyGenCB, PrivCB, FiniCB, IfCond, NumThreads, ProcBind, S.hasCancel()); + return; } + CGNonOpenMPIRBuilderRegion RegionScope(*this); + // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); @@ -4072,22 +4098,21 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { } void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { - if (CGM.getLangOpts().OpenMPIRBuilder) { + if (CGM.getLangOpts().OpenMPIRBuilder && !IsInsideNonOpenMPIRBuilderHandledRegion) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - if (OMPBuilder.isTopmostBuilderManaged()) { const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); - auto FiniCB = [this](InsertPointTy IP, - llvm::omp::Directive LeavingConstruct, - llvm::omp::Directive CancelledBy) { - OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + auto FiniCB = [this]( InsertPointTy ExitingIP, + llvm:: omp::Directive LeaveReason, + llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, ExitingIP); }; auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { // , llvm::BasicBlock &FiniBB + InsertPointTy CodeGenIP) { Builder.restoreIP(CodeGenIP); auto FiniBB = splitBBWithSuffix(Builder, false, ".sectionfini"); @@ -4106,8 +4131,9 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); return; - } } + + CGNonOpenMPIRBuilderRegion NonOpenMPIRBuilderRegion(*this); LexicalScope Scope(*this, S.getSourceRange()); EmitStopPoint(&S); EmitStmt(S.getAssociatedStmt()); @@ -6882,18 +6908,16 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { } if (CGM.getLangOpts().OpenMPIRBuilder) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); - // TODO: This check is necessary as we only generate `omp parallel` through - // the OpenMPIRBuilder for now. - if (S.getCancelRegion() == OMPD_parallel || - S.getCancelRegion() == OMPD_sections || - S.getCancelRegion() == OMPD_section) { + llvm_unreachable("TODO"); + // auto DK = OMPBuilder.getTopmostDirective(); + // if (OMPBuilder.isTopmostBuilderManaged()) { llvm::Value *IfCondition = nullptr; if (IfCond) - IfCondition = EmitScalarExpr(IfCond, - /*IgnoreResultAssign=*/true); - return Builder.restoreIP( + IfCondition = EvaluateExprAsBool(IfCond); + Builder.restoreIP( OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); - } + return; + // } } CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 82220c7217d8f..5c7b6cf17c7c2 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -476,6 +476,8 @@ class CodeGenFunction : public CodeGenTypeCache { }; CGCapturedStmtInfo *CapturedStmtInfo = nullptr; + + /// RAII for correct setting/restoring of CapturedStmtInfo. class CGCapturedStmtRAII { private: @@ -490,6 +492,28 @@ class CodeGenFunction : public CodeGenTypeCache { ~CGCapturedStmtRAII() { CGF.CapturedStmtInfo = PrevCapturedStmtInfo; } }; + + + /// Required until everything can be handled by OpenMPIRBuilder. + /// Isn't the ultimate solution to mixing OpenMPIRBuilder and non-OpenMPIRBuilder codegen either, but works with the current regression tests so far. + bool IsInsideNonOpenMPIRBuilderHandledRegion = false; + class CGNonOpenMPIRBuilderRegion { + private: + CodeGenFunction &CGF; + bool PreviousIsInsideNonOpenMPIRBuilderHandledRegion; + public: + CGNonOpenMPIRBuilderRegion(CodeGenFunction &CGF) + : CGF(CGF), PreviousIsInsideNonOpenMPIRBuilderHandledRegion(CGF.IsInsideNonOpenMPIRBuilderHandledRegion) { + CGF.IsInsideNonOpenMPIRBuilderHandledRegion = true; + } + ~CGNonOpenMPIRBuilderRegion() { + CGF.IsInsideNonOpenMPIRBuilderHandledRegion = PreviousIsInsideNonOpenMPIRBuilderHandledRegion; + } + }; + + + + /// An abstract representation of regular/ObjC call/message targets. class AbstractCallee { /// The function declaration of the callee. From ca1700effbcfe81715a68c786dc226a1c4d59549 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 27 Apr 2022 23:32:43 -0500 Subject: [PATCH 06/50] try fix unittest --- .../Frontend/OpenMPIRBuilderTest.cpp | 81 +++++++++++++------ 1 file changed, 57 insertions(+), 24 deletions(-) diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 5f7e3228ae570..fcdd354a0bc3f 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -355,6 +355,7 @@ TEST_F(OpenMPIRBuilderTest, CreateBarrier) { EXPECT_FALSE(verifyModule(*M, &errs())); } +#if 0 TEST_F(OpenMPIRBuilderTest, CreateCancel) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); @@ -421,7 +422,9 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) { Builder.CreateUnreachable(); EXPECT_FALSE(verifyModule(*M, &errs())); } +#endif +#if 0 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); @@ -494,7 +497,9 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { Builder.CreateUnreachable(); EXPECT_FALSE(verifyModule(*M, &errs())); } +#endif +#if 0 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); @@ -549,6 +554,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { Builder.CreateUnreachable(); EXPECT_FALSE(verifyModule(*M, &errs())); } +#endif TEST_F(OpenMPIRBuilderTest, DbgLoc) { OpenMPIRBuilder OMPBuilder(*M); @@ -644,7 +650,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + auto FiniCB = [&](InsertPointTy CodeGenIP, + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region ) { ++NumFinalizationPoints; }; IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); @@ -723,7 +731,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + auto FiniCB = [&](InsertPointTy CodeGenIP , + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region) { ++NumFinalizationPoints; }; auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumOuterBodiesGenerated; @@ -817,7 +827,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + auto FiniCB = [&](InsertPointTy CodeGenIP , + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region) { ++NumFinalizationPoints; }; auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumOuterBodiesGenerated; @@ -952,7 +964,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) { + auto FiniCB = [&](InsertPointTy CodeGenIP , + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region) { ++NumFinalizationPoints; // No destructors. }; @@ -1084,7 +1098,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { auto *FakeDestructor = Function::Create( FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get()); - auto FiniCB = [&](InsertPointTy IP) { + auto FiniCB = [&](InsertPointTy IP , + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region) { ++NumFinalizationPoints; Builder.restoreIP(IP); Builder.CreateCall(FakeDestructor, @@ -1176,12 +1192,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { ReplacementValue = &Inner; return CodeGenIP; }; - auto FiniCB = [](InsertPointTy) {}; + IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); IRBuilder<>::InsertPoint AfterIP = - OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, + OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, {}, nullptr, nullptr, OMP_PROC_BIND_default, false); Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); @@ -2319,7 +2335,9 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP) { + auto FiniCB = [&](InsertPointTy IP , + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2396,7 +2414,9 @@ TEST_F(OpenMPIRBuilderTest, MaskedDirective) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP) { + auto FiniCB = [&](InsertPointTy IP , + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2462,7 +2482,9 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP) { + auto FiniCB = [&](InsertPointTy IP, + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2701,7 +2723,9 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP) { + auto FiniCB = [&](InsertPointTy IP, + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2772,7 +2796,9 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP) { + auto FiniCB = [&](InsertPointTy IP, + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2888,7 +2914,9 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP) { + auto FiniCB = [&](InsertPointTy IP, + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2978,7 +3006,9 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP) { + auto FiniCB = [&](InsertPointTy IP, + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -3722,7 +3752,9 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { }; // Do nothing in finalization. - auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; }; + auto FiniCB = [&](InsertPointTy CodeGenIP, + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region) { return CodeGenIP; }; InsertPointTy AfterIP = OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB, @@ -3977,16 +4009,15 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { return Builder.saveIP(); }; - // Do nothing in finalization. - auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; }; + Builder.restoreIP( OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB, - FiniCB, /* IfCondition */ nullptr, + {}, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false)); InsertPointTy AfterIP = OMPBuilder.createParallel( - {Builder.saveIP(), DL}, OuterAllocaIP, SecondBodyGenCB, PrivCB, FiniCB, + { Builder.saveIP(), DL }, OuterAllocaIP, SecondBodyGenCB, PrivCB, {}, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false); @@ -4075,7 +4106,7 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) { llvm::SmallVector SectionCBVector; llvm::SmallVector CaseBBs; - auto FiniCB = [&](InsertPointTy IP) {}; + auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; SectionCBVector.push_back(SectionCB); @@ -4085,7 +4116,7 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) { IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, - PrivCB, FiniCB, false, false)); + PrivCB, {}, false, false)); Builder.CreateRetVoid(); // Required at the end of the function EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr); EXPECT_FALSE(verifyModule(*M, &errs())); @@ -4111,7 +4142,9 @@ TEST_F(OpenMPIRBuilderTest, CreateSections) { unsigned NumFiniCBCalls = 0; PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); - auto FiniCB = [&](InsertPointTy IP) { + auto FiniCB = [&](InsertPointTy IP, + omp::Directive LeaveReason, + OpenMPIRBuilder:: OMPRegionInfo *Region) { ++NumFiniCBCalls; BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); @@ -4223,10 +4256,10 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) { auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy IP) {}; + Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, - PrivCB, FiniCB, false, true)); + PrivCB, {}, false, true)); Builder.CreateRetVoid(); // Required at the end of the function for (auto &Inst : instructions(*F)) { EXPECT_FALSE(isa(Inst) && From 2ffb6c35757bd079f0d114274f61531b646bcb3d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 28 Apr 2022 01:02:09 -0500 Subject: [PATCH 07/50] WIP --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 2 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 65 +++++++++++++++--- .../Frontend/OpenMPIRBuilderTest.cpp | 66 ++++++++++--------- 3 files changed, 90 insertions(+), 43 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index a6577b21e2074..f5096dbf4da39 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -106,7 +106,7 @@ class OpenMPIRBuilder { /// destruction, when the scope of the currently generated construct is left /// at the time, and location, the callback is invoked. using LeaveRegionCallbackTy = std::function; diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 2952cc4af1761..9e6a43bb896d2 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -881,43 +881,88 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, Value *CancelFlag, omp::Directive CanceledDirective, - omp::Directive CanceledBy) { + omp::Directive CancelledBy) { assert(isLastFinalizationInfoCancellable(CanceledDirective) && "Unexpected cancellation!"); // For a cancel barrier we create two new blocks. + // MK: This is garbage BasicBlock *BB = Builder.GetInsertBlock(); + +#if 0 BasicBlock *NonCancellationBlock; if (Builder.GetInsertPoint() == BB->end()) { - // TODO: This branch will not be needed once we moved to the - // OpenMPIRBuilder codegen completely. - NonCancellationBlock = BasicBlock::Create( - BB->getContext(), BB->getName() + ".cont", BB->getParent()); + // TODO: This branch will not be needed once we moved to the OpenMPIRBuilder codegen completely. + NonCancellationBlock = BasicBlock::Create(BB->getContext(), BB->getName() + ".cont", BB->getParent()); } else { NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint()); BB->getTerminator()->eraseFromParent(); Builder.SetInsertPoint(BB); } - BasicBlock *CancellationBlock = BasicBlock::Create( - BB->getContext(), BB->getName() + ".cncl", BB->getParent()); +#endif + // Avoid assertions around "fallthtrough" cleanups in clang. + // BasicBlock *NonCancellationCleanupBlock = splitBB(Builder, BB->getName() + + // ".cont.cleanup", true); + + BasicBlock *NonCancellationBlock = splitBBWithSuffix(Builder, false, ".cont"); + +#if 0 + FinalizationInfo &FI = FinalizationStack.back(); +#endif + + BasicBlock *PreCancellationBlock = + BasicBlock::Create(BB->getContext(), BB->getName() + ".cncl.fini", + BB->getParent(), NonCancellationBlock); + BasicBlock *CancellationBlock = + BasicBlock::Create(BB->getContext(), BB->getName() + ".cncl", + BB->getParent(), NonCancellationBlock); // Jump to them based on the return value. Value *Cmp = Builder.CreateIsNull(CancelFlag); - Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock, + Builder.CreateCondBr(Cmp, NonCancellationBlock, PreCancellationBlock, /* TODO weight */ nullptr, nullptr); + // Builder.CreateBr( NonCancellationBlock); // From the cancellation block we finalize all variables and go to the // post finalization block that is known to the FiniCB callback. + Builder.SetInsertPoint(PreCancellationBlock); + + Builder.CreateBr(CancellationBlock); + // if (ExitCB) + // ExitCB(Builder.saveIP(),CanceledDirective); + + // Unless cancellation has been detected by a barrier itself, need to + // synchronize between threads (after finalization). Builder.SetInsertPoint(CancellationBlock); + if (CanceledDirective == OMPD_parallel && CancelledBy != OMPD_barrier) + emitBarrierImpl(Loc, CancelledBy, false, false); + + auto CancellationIP = Builder.saveIP(); + + // CancellationIP.viewCFG(); + + // TODO: Clang's codegen emits finalization code only once and inserts a + // switch to jump back to the target code path (CGF.EmitBranchThroughCleanup). + // Currently in the OpenMPIRBuilder, we emit the finialization multiple times + // for each path exiting the region (non-cancellation and each cancellation + // check). #if 0 + if (FI.FiniCB) + FI.FiniCB(CancellationIP, CanceledDirective, CancelledBy); if (ExitCB) ExitCB(Builder.saveIP()); auto &FI = FinalizationStack.back(); FI.FiniCB(Builder.saveIP()); #endif - emitRegionExit({CancellationBlock, CancellationBlock->begin()}, getInnermostDirectionRegion(CanceledDirective), CanceledBy); + emitRegionExit({CancellationBlock, CancellationBlock->begin()}, getInnermostDirectionRegion(CanceledDirective), CancelledBy); + + + // Builder.SetInsertPoint(CancellationBlock); + // Builder.CreateBr( CancellationBlock); + // if (FI.CancelCB) + // FI.CancelCB(CancellationIP, CanceledDirective, CancelledBy); - // The continuation block is where code generation continues. + // The continuation block is where code generation continues.s Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); // MK: needed? } diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index fcdd354a0bc3f..7e7f2e243223a 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1034,11 +1034,13 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize(); F->setName("func"); + BB->setName("entry"); IRBuilder<> Builder(BB); BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); Builder.CreateBr(EnterBB); Builder.SetInsertPoint(EnterBB); + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); unsigned NumBodiesGenerated = 0; @@ -1052,38 +1054,38 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { Builder.restoreIP(CodeGenIP); // Create three barriers, two cancel barriers but only one checked. - Function *CBFn, *BFn; + // Function *CBFn, *BFn; Builder.restoreIP( OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel)); - CBFn = M->getFunction("__kmpc_cancel_barrier"); - BFn = M->getFunction("__kmpc_barrier"); - ASSERT_NE(CBFn, nullptr); - ASSERT_EQ(BFn, nullptr); - ASSERT_EQ(CBFn->getNumUses(), 1U); - ASSERT_TRUE(isa(CBFn->user_back())); - ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U); + Function *CBFn = M->getFunction("__kmpc_cancel_barrier"); + Function *BFn = M->getFunction("__kmpc_barrier"); + EXPECT_NE(CBFn, nullptr); + EXPECT_EQ(BFn, nullptr); + EXPECT_EQ(CBFn->getNumUses(), 2U); + EXPECT_TRUE(isa(CBFn->user_back())); + // EXPECT_EQ(CBFn->user_back()->getNumUses(), 0U); CheckedBarrier = cast(CBFn->user_back()); Builder.restoreIP( OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true)); - CBFn = M->getFunction("__kmpc_cancel_barrier"); + // CBFn = M->getFunction("__kmpc_cancel_barrier"); BFn = M->getFunction("__kmpc_barrier"); - ASSERT_NE(CBFn, nullptr); - ASSERT_NE(BFn, nullptr); - ASSERT_EQ(CBFn->getNumUses(), 1U); - ASSERT_EQ(BFn->getNumUses(), 1U); - ASSERT_TRUE(isa(BFn->user_back())); - ASSERT_EQ(BFn->user_back()->getNumUses(), 0U); + // EXPECT_NE(CBFn, nullptr); + EXPECT_NE(BFn, nullptr); + EXPECT_EQ(CBFn->getNumUses(), 2U); + EXPECT_EQ(BFn->getNumUses(), 1U); + // EXPECT_TRUE(isa(BFn->user_back())); + EXPECT_EQ(BFn->user_back()->getNumUses(), 0U); Builder.restoreIP(OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, false, false)); - ASSERT_EQ(CBFn->getNumUses(), 2U); - ASSERT_EQ(BFn->getNumUses(), 1U); - ASSERT_TRUE(CBFn->user_back() != CheckedBarrier); - ASSERT_TRUE(isa(CBFn->user_back())); - ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U); + EXPECT_EQ(CBFn->getNumUses(), 3U); + EXPECT_EQ(BFn->getNumUses(), 1U); + // EXPECT_TRUE(CBFn->user_back() != CheckedBarrier); + EXPECT_TRUE(isa(CBFn->user_back())); + EXPECT_EQ(CBFn->user_back()->getNumUses(), 0U); }; auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &, @@ -1124,25 +1126,25 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { OMPBuilder.finalize(); EXPECT_FALSE(verifyModule(*M, &errs())); - +#if 0 BasicBlock *ExitBB = nullptr; for (const User *Usr : FakeDestructor->users()) { - const CallInst *CI = dyn_cast(Usr); - ASSERT_EQ(CI->getCalledFunction(), FakeDestructor); - ASSERT_TRUE(isa(CI->getNextNode())); - ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U); + const CallInst *CI = cast(Usr); + EXPECT_EQ(CI->getCalledFunction(), FakeDestructor); + EXPECT_TRUE(isa(CI->getNextNode())); + EXPECT_EQ(CI->getNextNode()->getNumSuccessors(), 1U); if (ExitBB) - ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB); + EXPECT_EQ(CI->getNextNode()->getSuccessor(0)->getSingleSuccessor(), ExitBB); else - ExitBB = CI->getNextNode()->getSuccessor(0); - ASSERT_EQ(ExitBB->size(), 1U); + ExitBB = CI->getNextNode()->getSuccessor(0)->getUniqueSuccessor(); + EXPECT_EQ(ExitBB->size(), 1U); if (!isa(ExitBB->front())) { - ASSERT_TRUE(isa(ExitBB->front())); - ASSERT_EQ(cast(ExitBB->front()).getNumSuccessors(), 1U); - ASSERT_TRUE(isa( - cast(ExitBB->front()).getSuccessor(0)->front())); + EXPECT_TRUE(isa(ExitBB->front())); + EXPECT_EQ(cast(ExitBB->front()).getNumSuccessors(), 1U); + EXPECT_TRUE(isa(cast(ExitBB->front()).getSuccessor(0)->front())); } } +#endif } TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { From bb5c5448cdf1c961e6ef256e65612cebe16fbdcd Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 28 Apr 2022 01:15:42 -0500 Subject: [PATCH 08/50] clang-format --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 24 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 115 +++--- clang/lib/CodeGen/CodeGenFunction.h | 53 ++- llvm/include/llvm/Analysis/CFGPrinter.h | 20 +- .../llvm/Analysis/DOTGraphTraitsPass.h | 6 +- llvm/include/llvm/Analysis/RegionPrinter.h | 3 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 123 +++--- llvm/include/llvm/IR/IRBuilder.h | 9 +- llvm/lib/Analysis/CFGPrinter.cpp | 89 ++--- llvm/lib/Analysis/RegionPrinter.cpp | 374 +++++++++--------- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 245 ++++++------ llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 3 - .../Frontend/OpenMPIRBuilderTest.cpp | 104 +++-- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 8 +- 14 files changed, 552 insertions(+), 624 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index d4ff59faf7110..02d0af35aa83e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -112,7 +112,6 @@ class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { bool HasCancel; }; - class OpenMPIRBuilderRegionInfo final : public CGOpenMPRegionInfo { public: OpenMPIRBuilderRegionInfo(const CapturedStmt &CS, OpenMPDirectiveKind Kind) @@ -1241,16 +1240,16 @@ struct PushAndPopStackRAII { CGF.getOMPCancelDestination(OMPD_parallel); CGF.EmitBranchThroughCleanup(Dest); }; - + llvm_unreachable("TODO: set UserManaged=true"); // TODO: Remove this once we emit parallel regions through the // OpenMPIRBuilder as it can do this setup internally. - // llvm::OpenMPIRBuilder::FinalizationInfo FI{{}, Kind, HasCancel, /*UserManaged*/ true}; - // OMPBuilder->pushFinalizationCB(std::move(FI)); + // llvm::OpenMPIRBuilder::FinalizationInfo FI{{}, Kind, HasCancel, + // /*UserManaged*/ true}; OMPBuilder->pushFinalizationCB(std::move(FI)); } ~PushAndPopStackRAII() { - // if (OMPBuilder) - // OMPBuilder->popFinalizationCB(); + // if (OMPBuilder) + // OMPBuilder->popFinalizationCB(); } llvm::OpenMPIRBuilder *OMPBuilder; }; @@ -2187,7 +2186,7 @@ void CGOpenMPRuntime::emitIRBuilderParallel( llvm::OpenMPIRBuilder::LeaveRegionCallbackTy FiniCB, // llvm:: OpenMPIRBuilder:: CancellationCallbackTy CancelCB, llvm::Value *IfCond, llvm::Value *NumThreads, - llvm::omp::ProcBindKind ProcBind,bool IsCancellable) { + llvm::omp::ProcBindKind ProcBind, bool IsCancellable) { auto &Builder = CGF.Builder; auto AllocaInsertPt = CGF.AllocaInsertPt; @@ -2211,10 +2210,10 @@ void CGOpenMPRuntime::emitIRBuilderParallel( llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); - Builder.restoreIP(OMPBuilder.createParallel(Builder, AllocaIP, - BodyGenCBWrapper, PrivCB, FiniCB, - // CancelCB, - IfCond, NumThreads, ProcBind, IsCancellable)); + Builder.restoreIP(OMPBuilder.createParallel( + Builder, AllocaIP, BodyGenCBWrapper, PrivCB, FiniCB, + // CancelCB, + IfCond, NumThreads, ProcBind, IsCancellable)); } // If we're inside an (outlined) parallel region, use the region info's @@ -2655,7 +2654,8 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, // level which happen to be the same OpenMPDirectiveKind. // CGOpenMPRegionInfo* OMPRegionInfo = // dyn_cast_or_null(CGF.CapturedStmtInfo); - if (auto *IRBuilderRegion = dyn_cast_or_null(CGF.CapturedStmtInfo)) { + if (auto *IRBuilderRegion = + dyn_cast_or_null(CGF.CapturedStmtInfo)) { // if (OMPBuilder.isLastFinalizationInfoCancellable(Kind)) { CGF.Builder.restoreIP(OMPBuilder.createBarrier( CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 3916399383739..f6b5969414635 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1713,7 +1713,8 @@ void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( } void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { - if (CGM.getLangOpts().OpenMPIRBuilder && !IsInsideNonOpenMPIRBuilderHandledRegion ) { + if (CGM.getLangOpts().OpenMPIRBuilder && + !IsInsideNonOpenMPIRBuilderHandledRegion) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); // Check if we have any if clause associated with the directive. llvm::Value *IfCond = nullptr; @@ -1734,18 +1735,15 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // The cleanup callback that finalizes all variabels at the given location, // thus calls destructors etc. - auto FiniCB = [this]( InsertPointTy IP, - llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [this](InsertPointTy IP, llvm::omp::Directive LeaveReason, + llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; - - auto CancelCB = [this](InsertPointTy IP, llvm::omp::Directive CanceledDirective, llvm::omp::Directive CanceledBy) { - llvm_unreachable("TODO"); + llvm_unreachable("TODO"); #if 0 assert(CanceledDirective == OMPD_parallel); if (CanceledBy == OMPD_unknown) @@ -1787,13 +1785,17 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel"); }; - CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); - llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); - // Builder.restoreIP( OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, IfCond, NumThreads, ProcBind, S.hasCancel())); - CGM.getOpenMPRuntime().emitIRBuilderParallel(*this, CS, BodyGenCB, PrivCB, FiniCB, IfCond, NumThreads, ProcBind, S.hasCancel()); - return; + llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( + AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); + // Builder.restoreIP( OMPBuilder.createParallel(Builder, AllocaIP, + // BodyGenCB, PrivCB, FiniCB, IfCond, NumThreads, ProcBind, + // S.hasCancel())); + CGM.getOpenMPRuntime().emitIRBuilderParallel(*this, CS, BodyGenCB, PrivCB, + FiniCB, IfCond, NumThreads, + ProcBind, S.hasCancel()); + return; } CGNonOpenMPIRBuilderRegion RegionScope(*this); @@ -4032,9 +4034,8 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; - auto FiniCB = [this]( InsertPointTy IP, - llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [this](InsertPointTy IP, llvm::omp::Directive LeaveReason, + llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4098,39 +4099,37 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { } void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { - if (CGM.getLangOpts().OpenMPIRBuilder && !IsInsideNonOpenMPIRBuilderHandledRegion) { + if (CGM.getLangOpts().OpenMPIRBuilder && + !IsInsideNonOpenMPIRBuilderHandledRegion) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); - auto FiniCB = [this]( InsertPointTy ExitingIP, - llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { - OMPBuilderCBHelpers::FinalizeOMPRegion(*this, ExitingIP); - }; + const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); + auto FiniCB = [this](InsertPointTy ExitingIP, + llvm::omp::Directive LeaveReason, + llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, ExitingIP); + }; - auto BodyGenCB = - [SectionRegionBodyStmt, - this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { - Builder.restoreIP(CodeGenIP); - auto FiniBB = splitBBWithSuffix(Builder, false, ".sectionfini"); + auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP) { + Builder.restoreIP(CodeGenIP); + auto FiniBB = splitBBWithSuffix(Builder, false, ".sectionfini"); - OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, - *FiniBB); - // OMPBuilderCBHelpers::EmitOMPRegionBody(*this, - // SectionRegionBodyStmt, CodeGenIP, FiniBB); + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, *FiniBB); + // OMPBuilderCBHelpers::EmitOMPRegionBody(*this, + // SectionRegionBodyStmt, CodeGenIP, FiniBB); - EmitStmt(SectionRegionBodyStmt); + EmitStmt(SectionRegionBodyStmt); - Builder.CreateBr(FiniBB); - }; + Builder.CreateBr(FiniBB); + }; - LexicalScope Scope(*this, S.getSourceRange()); - EmitStopPoint(&S); - Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); + LexicalScope Scope(*this, S.getSourceRange()); + EmitStopPoint(&S); + Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); - return; + return; } CGNonOpenMPIRBuilderRegion NonOpenMPIRBuilderRegion(*this); @@ -4199,9 +4198,8 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); - auto FiniCB = [this]( InsertPointTy IP, - llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [this](InsertPointTy IP, llvm::omp::Directive LeaveReason, + llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4247,9 +4245,8 @@ void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { ? EmitScalarExpr(Filter, CGM.Int32Ty) : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); - auto FiniCB = [this]( InsertPointTy IP, - llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [this](InsertPointTy IP, llvm::omp::Directive LeaveReason, + llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4289,9 +4286,8 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { HintInst = Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); - auto FiniCB = [this]( InsertPointTy IP, - llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [this](InsertPointTy IP, llvm::omp::Directive LeaveReason, + llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -5629,9 +5625,8 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { // Without clause, it behaves as if the threads clause is specified. const auto *C = S.getSingleClause(); - auto FiniCB = [this]( InsertPointTy IP, - llvm:: omp::Directive LeaveReason, - llvm::OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [this](InsertPointTy IP, llvm::omp::Directive LeaveReason, + llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -6909,15 +6904,15 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { if (CGM.getLangOpts().OpenMPIRBuilder) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); llvm_unreachable("TODO"); - // auto DK = OMPBuilder.getTopmostDirective(); - // if (OMPBuilder.isTopmostBuilderManaged()) { - llvm::Value *IfCondition = nullptr; - if (IfCond) - IfCondition = EvaluateExprAsBool(IfCond); - Builder.restoreIP( - OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); - return; - // } + // auto DK = OMPBuilder.getTopmostDirective(); + // if (OMPBuilder.isTopmostBuilderManaged()) { + llvm::Value *IfCondition = nullptr; + if (IfCond) + IfCondition = EvaluateExprAsBool(IfCond); + Builder.restoreIP( + OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); + return; + // } } CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 5c7b6cf17c7c2..498cd7c1050f7 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -34,10 +34,10 @@ #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/Analysis/CFGPrinter.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/CFGPrinter.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" @@ -476,8 +476,6 @@ class CodeGenFunction : public CodeGenTypeCache { }; CGCapturedStmtInfo *CapturedStmtInfo = nullptr; - - /// RAII for correct setting/restoring of CapturedStmtInfo. class CGCapturedStmtRAII { private: @@ -492,28 +490,28 @@ class CodeGenFunction : public CodeGenTypeCache { ~CGCapturedStmtRAII() { CGF.CapturedStmtInfo = PrevCapturedStmtInfo; } }; - - /// Required until everything can be handled by OpenMPIRBuilder. - /// Isn't the ultimate solution to mixing OpenMPIRBuilder and non-OpenMPIRBuilder codegen either, but works with the current regression tests so far. + /// Isn't the ultimate solution to mixing OpenMPIRBuilder and + /// non-OpenMPIRBuilder codegen either, but works with the current regression + /// tests so far. bool IsInsideNonOpenMPIRBuilderHandledRegion = false; class CGNonOpenMPIRBuilderRegion { private: - CodeGenFunction &CGF; - bool PreviousIsInsideNonOpenMPIRBuilderHandledRegion; + CodeGenFunction &CGF; + bool PreviousIsInsideNonOpenMPIRBuilderHandledRegion; + public: - CGNonOpenMPIRBuilderRegion(CodeGenFunction &CGF) - : CGF(CGF), PreviousIsInsideNonOpenMPIRBuilderHandledRegion(CGF.IsInsideNonOpenMPIRBuilderHandledRegion) { - CGF.IsInsideNonOpenMPIRBuilderHandledRegion = true; - } - ~CGNonOpenMPIRBuilderRegion() { - CGF.IsInsideNonOpenMPIRBuilderHandledRegion = PreviousIsInsideNonOpenMPIRBuilderHandledRegion; - } + CGNonOpenMPIRBuilderRegion(CodeGenFunction &CGF) + : CGF(CGF), PreviousIsInsideNonOpenMPIRBuilderHandledRegion( + CGF.IsInsideNonOpenMPIRBuilderHandledRegion) { + CGF.IsInsideNonOpenMPIRBuilderHandledRegion = true; + } + ~CGNonOpenMPIRBuilderRegion() { + CGF.IsInsideNonOpenMPIRBuilderHandledRegion = + PreviousIsInsideNonOpenMPIRBuilderHandledRegion; + } }; - - - /// An abstract representation of regular/ObjC call/message targets. class AbstractCallee { /// The function declaration of the callee. @@ -1799,19 +1797,20 @@ class CodeGenFunction : public CodeGenTypeCache { /// Emit the Finalization for an OMP region /// \param CGF The Codegen function this belongs to /// \param IP Insertion point for generating the finalization code. - static void FinalizeOMPRegion(CodeGenFunction &CGF, InsertPointTy IP) { // TODO: move to .cpp file - CGBuilderTy::InsertPointGuard IPG(CGF.Builder); // MK: needed? - + static void FinalizeOMPRegion(CodeGenFunction &CGF, + InsertPointTy IP) { // TODO: move to .cpp file + CGBuilderTy::InsertPointGuard IPG(CGF.Builder); // MK: needed? + CGF.Builder.restoreIP(IP); - auto DestBB = llvm:: splitBB( CGF.Builder, false, ".ompfinalize"); + auto DestBB = llvm::splitBB(CGF.Builder, false, ".ompfinalize"); - // llvm::BasicBlock *IPBB = IP.getBlock(); - // llvm::BasicBlock *DestBB = IPBB->getUniqueSuccessor(); - // assert(DestBB && "Finalization block should have one successor!"); + // llvm::BasicBlock *IPBB = IP.getBlock(); + // llvm::BasicBlock *DestBB = IPBB->getUniqueSuccessor(); + // assert(DestBB && "Finalization block should have one successor!"); // erase and replace with cleanup branch. - // IPBB->getTerminator()->eraseFromParent(); // Don't do this! - // CGF.Builder.SetInsertPoint(IPBB); + // IPBB->getTerminator()->eraseFromParent(); // Don't do this! + // CGF.Builder.SetInsertPoint(IPBB); CodeGenFunction::JumpDest Dest = CGF.getJumpDestInCurrentScope(DestBB); CGF.EmitBranchThroughCleanup(Dest); } diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h index e2c55d66faf7b..1bb29af559c28 100644 --- a/llvm/include/llvm/Analysis/CFGPrinter.h +++ b/llvm/include/llvm/Analysis/CFGPrinter.h @@ -29,7 +29,6 @@ #include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/FormatVariadic.h" - namespace llvm { template struct GraphTraits; @@ -196,7 +195,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { }; } - //enum { MaxColumns = 80 }; + // enum { MaxColumns = 80 }; std::string Str; raw_string_ostream OS(Str); @@ -354,17 +353,16 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { void computeDeoptOrUnreachablePaths(const Function *F); }; - -void viewCFG(const Function *F) ; -void viewCFG(const Function &F) ; -void viewCFG(const BasicBlock *BB) ; -void viewCFG(const BasicBlock &BB) ; -void viewCFG(const Instruction *I) ; -void viewCFG(const Instruction &I) ; +void viewCFG(const Function *F); +void viewCFG(const Function &F); +void viewCFG(const BasicBlock *BB); +void viewCFG(const BasicBlock &BB); +void viewCFG(const Instruction *I); +void viewCFG(const Instruction &I); // RegionPrinter.cpp -void viewRegion(const Function *F) ; -void viewRegion(const Function &F) ; +void viewRegion(const Function *F); +void viewRegion(const Function &F); } // End llvm namespace namespace llvm { diff --git a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h index 9b265486b2788..470e008df06ec 100644 --- a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -27,14 +27,13 @@ struct DefaultAnalysisGraphTraits { static GraphT getGraph(AnalysisT *A) { return A; } }; - template < typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits > class DOTGraphTraitsViewer : public FunctionPass { public: - DOTGraphTraitsViewer(StringRef GraphName, char &ID) - : FunctionPass(ID), Name(GraphName) {} + DOTGraphTraitsViewer(StringRef GraphName, char &ID) + : FunctionPass(ID), Name(GraphName) {} /// Return true if this function should be processed. /// @@ -70,7 +69,6 @@ class DOTGraphTraitsViewer : public FunctionPass { std::string Name; }; - template < typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits > diff --git a/llvm/include/llvm/Analysis/RegionPrinter.h b/llvm/include/llvm/Analysis/RegionPrinter.h index d096e27687a0b..e2598e2390e47 100644 --- a/llvm/include/llvm/Analysis/RegionPrinter.h +++ b/llvm/include/llvm/Analysis/RegionPrinter.h @@ -22,7 +22,8 @@ namespace llvm { class Instruction; FunctionPass *createRegionViewerPass(); - FunctionPass *createRegionViewerPass(const BasicBlock *BB, const Instruction *Inst); + FunctionPass *createRegionViewerPass(const BasicBlock *BB, + const Instruction *Inst); FunctionPass *createRegionOnlyViewerPass(); FunctionPass *createRegionPrinterPass(); FunctionPass *createRegionOnlyPrinterPass(); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index f5096dbf4da39..e93afc2cce968 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -105,78 +105,67 @@ class OpenMPIRBuilder { /// A finalize callback knows about all objects that need finalization, e.g. /// destruction, when the scope of the currently generated construct is left /// at the time, and location, the callback is invoked. - using LeaveRegionCallbackTy = std::function; + using LeaveRegionCallbackTy = + std::function; + enum class RegionKind { + /// Sentinel object so we don't always have to check whether the stack is + /// empty. + Toplevel, - enum class RegionKind { - /// Sentinel object so we don't always have to check whether the stack is empty. - Toplevel, + /// Actions on loop-associated directives are deferred until all applyXYZ + /// actions have been applied to them. + CanonicalLoop, - /// Actions on loop-associated directives are deferred until all applyXYZ actions have been applied to them. - CanonicalLoop, - - /// Non-loop OpenMP regions. - Directive - }; + /// Non-loop OpenMP regions. + Directive + }; struct OMPRegionInfo { - RegionKind Kind; - omp::Directive DK; - bool IsCancellable; // TODO: remove; determine ourselves whether there was a cancelling construct inside - LeaveRegionCallbackTy FiniCB; - - OMPRegionInfo( - RegionKind Kind, - omp::Directive DK, - bool IsCancellable, - LeaveRegionCallbackTy FiniCB) : Kind(Kind), DK(DK), IsCancellable(IsCancellable), FiniCB(std::move(FiniCB)) { - assertOK(); + RegionKind Kind; + omp::Directive DK; + bool IsCancellable; // TODO: remove; determine ourselves whether there was a + // cancelling construct inside + LeaveRegionCallbackTy FiniCB; + + OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable, + LeaveRegionCallbackTy FiniCB) + : Kind(Kind), DK(DK), IsCancellable(IsCancellable), + FiniCB(std::move(FiniCB)) { + assertOK(); } #ifndef NDEBUG - ~OMPRegionInfo() { - assertOK(); - } + ~OMPRegionInfo() { assertOK(); } #endif - - /// Consistency self-check. void assertOK() const; }; - private: /// The finalization stack made up of finalize callbacks currently in-flight, /// wrapped into FinalizationInfo objects that reference also the finalization /// target block and the kind of cancellable directive. - SmallVector,8> RegionStack; - + SmallVector, 8> RegionStack; - OMPRegionInfo *getInnermostDirectionRegion(omp::Directive DK) ; + OMPRegionInfo *getInnermostDirectionRegion(omp::Directive DK); - OMPRegionInfo* pushRegion( omp::Directive DK, - bool IsCancellable, - LeaveRegionCallbackTy FiniCB = {}); + OMPRegionInfo *pushRegion(omp::Directive DK, bool IsCancellable, + LeaveRegionCallbackTy FiniCB = {}); - void emitRegionExit( InsertPointTy ExitingIP, OMPRegionInfo* RegionToLeave, omp::Directive LeaveReason =omp:: OMPD_unknown); + void emitRegionExit(InsertPointTy ExitingIP, OMPRegionInfo *RegionToLeave, + omp::Directive LeaveReason = omp::OMPD_unknown); void popRegion(omp::Directive DK); - - - /// Return true if the last entry in the finalization stack is of kind \p DK /// and cancellable. bool isLastFinalizationInfoCancellable(omp::Directive DK) { - // FIXME: Don't all the regions in-between also need to be cancellable? - return getInnermostDirectionRegion(DK)->IsCancellable; + // FIXME: Don't all the regions in-between also need to be cancellable? + return getInnermostDirectionRegion(DK)->IsCancellable; } - - public: /// Callback type for body (=inner region) code generation /// @@ -287,26 +276,24 @@ class OpenMPIRBuilder { /// Generator for '#omp parallel' /// /// \param Loc The insert and source location description. - /// \param OuterAllocaIP The insertion points to be used for alloca instructions. - /// \param BodyGenCB Callback that will generate the region code. - /// \param PrivCB Callback to copy a given variable (think copy constructor). - /// \param FiniCB Callback to finalize variable copies. - /// \param IfCondition The evaluated 'if' clause expression, if any. - /// \param NumThreads The evaluated 'num_threads' clause expression, if any. - /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). - /// + /// \param OuterAllocaIP The insertion points to be used for alloca + /// instructions. \param BodyGenCB Callback that will generate the region + /// code. \param PrivCB Callback to copy a given variable (think copy + /// constructor). \param FiniCB Callback to finalize variable copies. \param + /// IfCondition The evaluated 'if' clause expression, if any. \param + /// NumThreads The evaluated 'num_threads' clause expression, if any. \param + /// ProcBind The value of the 'proc_bind' clause (see ProcBindKind). + /// /// \param IsCancellable Flag to indicate a cancellable parallel region. /// MK: Remove? Any non-cancellable? Makes it a difference to the runtime? /// /// \returns The insertion position *after* the parallel. - IRBuilder<>::InsertPoint createParallel(const LocationDescription &Loc, - InsertPointTy OuterAllocaIP, - BodyGenCallbackTy BodyGenCB, - PrivatizeCallbackTy PrivCB, - LeaveRegionCallbackTy FiniCB, - Value *IfCondition, - Value *NumThreads, - omp::ProcBindKind ProcBind, bool IsCancellable); + IRBuilder<>::InsertPoint + createParallel(const LocationDescription &Loc, InsertPointTy OuterAllocaIP, + BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, + LeaveRegionCallbackTy FiniCB, Value *IfCondition, + Value *NumThreads, omp::ProcBindKind ProcBind, + bool IsCancellable); /// Generator for the control flow structure of an OpenMP canonical loop. /// @@ -980,7 +967,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the single call. InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, bool IsNowait, + LeaveRegionCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt); /// Generator for '#omp master' @@ -992,7 +979,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the master. InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB); + LeaveRegionCallbackTy FiniCB); /// Generator for '#omp masked' /// @@ -1003,7 +990,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the masked. InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, Value *Filter); + LeaveRegionCallbackTy FiniCB, Value *Filter); /// Generator for '#omp critical' /// @@ -1016,7 +1003,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the critical. InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, + LeaveRegionCallbackTy FiniCB, StringRef CriticalName, Value *HintInst); /// Generator for '#omp ordered depend (source | sink)' @@ -1045,7 +1032,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the ordered. InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, + LeaveRegionCallbackTy FiniCB, bool IsThreads); /// Generator for '#omp sections' @@ -1063,7 +1050,7 @@ class OpenMPIRBuilder { InsertPointTy AllocaIP, ArrayRef SectionCBs, PrivatizeCallbackTy PrivCB, - LeaveRegionCallbackTy FiniCB, bool IsCancellable, + LeaveRegionCallbackTy FiniCB, bool IsCancellable, bool IsNowait); /// Generator for '#omp section' @@ -1074,7 +1061,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the section. InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB); + LeaveRegionCallbackTy FiniCB); /// Generate conditional branch and relevant BasicBlocks through which private /// threads copy the 'copyin' variables from Master copy to threadprivate @@ -1278,7 +1265,7 @@ class OpenMPIRBuilder { InsertPointTy EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, bool Conditional = false, + LeaveRegionCallbackTy FiniCB, bool Conditional = false, bool HasFinalize = true, bool IsCancellable = false); /// Get the platform-specific name separator. diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 00544f64dae4d..b7e24e943da2d 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -2554,11 +2554,10 @@ class IRBuilder : public IRBuilderBase { // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRBuilder<>, LLVMBuilderRef) - -void viewCFG(const llvm::IRBuilderBase *Builder) ; -void viewCFG(const llvm::IRBuilderBase &Builder) ; -void viewCFG(const llvm::IRBuilderBase::InsertPoint *IP) ; -void viewCFG(const llvm::IRBuilderBase::InsertPoint &IP) ; +void viewCFG(const llvm::IRBuilderBase *Builder); +void viewCFG(const llvm::IRBuilderBase &Builder); +void viewCFG(const llvm::IRBuilderBase::InsertPoint *IP); +void viewCFG(const llvm::IRBuilderBase::InsertPoint &IP); } // end namespace llvm #endif // LLVM_IR_IRBUILDER_H diff --git a/llvm/lib/Analysis/CFGPrinter.cpp b/llvm/lib/Analysis/CFGPrinter.cpp index 55998c603cca0..d9c82b12128d5 100644 --- a/llvm/lib/Analysis/CFGPrinter.cpp +++ b/llvm/lib/Analysis/CFGPrinter.cpp @@ -271,7 +271,6 @@ void Function::viewCFG(bool ViewCFGOnly, const BlockFrequencyInfo *BFI, ViewGraph(&CFGInfo, "cfg" + getName(), ViewCFGOnly); } - /// viewCFGOnly - This function is meant for use from the debugger. It works /// just like viewCFG, but it does not include the contents of basic blocks /// into the nodes, just the label. If you are only interested in the CFG @@ -335,62 +334,58 @@ bool DOTGraphTraits::isNodeHidden(const BasicBlock *Node, return false; } - - - -void llvm::viewCFG(const Function* F) { - if (!F) return; -F->viewCFG(); -} -void llvm::viewCFG(const Function& F) { - return viewCFG(&F); +void llvm::viewCFG(const Function *F) { + if (!F) + return; + F->viewCFG(); } +void llvm::viewCFG(const Function &F) { return viewCFG(&F); } -void llvm::viewCFG(const BasicBlock* BB) { - if (!BB) return; - auto *F = BB->getParent(); - DOTFuncInfo CFGInfo(F, BB, nullptr); - ViewGraph(&CFGInfo, "cfg" + F->getName(), false); -} -void llvm::viewCFG(const BasicBlock& BB) { - return viewCFG(&BB); +void llvm::viewCFG(const BasicBlock *BB) { + if (!BB) + return; + auto *F = BB->getParent(); + DOTFuncInfo CFGInfo(F, BB, nullptr); + ViewGraph(&CFGInfo, "cfg" + F->getName(), false); } +void llvm::viewCFG(const BasicBlock &BB) { return viewCFG(&BB); } -void llvm::viewCFG(const Instruction* I) { - if (!I) return; - auto *BB =I-> getParent(); - auto *F = BB->getParent(); - DOTFuncInfo CFGInfo(F, BB, I); - ViewGraph(&CFGInfo, "cfg" + F->getName(), false); -} -void llvm::viewCFG(const Instruction& I) { - return viewCFG(&I); +void llvm::viewCFG(const Instruction *I) { + if (!I) + return; + auto *BB = I->getParent(); + auto *F = BB->getParent(); + DOTFuncInfo CFGInfo(F, BB, I); + ViewGraph(&CFGInfo, "cfg" + F->getName(), false); } +void llvm::viewCFG(const Instruction &I) { return viewCFG(&I); } -void llvm::viewCFG(const llvm::IRBuilderBase* Builder) { - if (!Builder) return; - return viewCFG(Builder->saveIP()); +void llvm::viewCFG(const llvm::IRBuilderBase *Builder) { + if (!Builder) + return; + return viewCFG(Builder->saveIP()); } -void llvm::viewCFG(const llvm::IRBuilderBase &Builder) { - return viewCFG(&Builder); +void llvm::viewCFG(const llvm::IRBuilderBase &Builder) { + return viewCFG(&Builder); } -void llvm::viewCFG(const llvm::IRBuilderBase::InsertPoint* IP) { - if (!IP) return; - if (!IP->isSet()) return; +void llvm::viewCFG(const llvm::IRBuilderBase::InsertPoint *IP) { + if (!IP) + return; + if (!IP->isSet()) + return; - assert(IP->isSet()); - BasicBlock* Block = IP->getBlock(); - BasicBlock::iterator Point = IP->getPoint(); - Function* F = Block->getParent(); + assert(IP->isSet()); + BasicBlock *Block = IP->getBlock(); + BasicBlock::iterator Point = IP->getPoint(); + Function *F = Block->getParent(); - // if (!CFGFuncName.empty() && !getName().contains(CFGFuncName)) - // return; - Instruction *Inst = (Point == Block->end()) ? nullptr : &*Point; - DOTFuncInfo CFGInfo(F, Block, Inst); - ViewGraph(&CFGInfo, "cfg" + F->getName(), false); + // if (!CFGFuncName.empty() && !getName().contains(CFGFuncName)) + // return; + Instruction *Inst = (Point == Block->end()) ? nullptr : &*Point; + DOTFuncInfo CFGInfo(F, Block, Inst); + ViewGraph(&CFGInfo, "cfg" + F->getName(), false); } -void llvm::viewCFG(const llvm::IRBuilderBase ::InsertPoint &IP) { - return viewCFG(&IP); +void llvm::viewCFG(const llvm::IRBuilderBase ::InsertPoint &IP) { + return viewCFG(&IP); } - diff --git a/llvm/lib/Analysis/RegionPrinter.cpp b/llvm/lib/Analysis/RegionPrinter.cpp index 6f591362c48e0..dd68c5c300088 100644 --- a/llvm/lib/Analysis/RegionPrinter.cpp +++ b/llvm/lib/Analysis/RegionPrinter.cpp @@ -30,25 +30,24 @@ onlySimpleRegions("only-simple-regions", cl::Hidden, cl::init(false)); - - namespace { - struct HighlightingRegionInfo { - RegionInfo* RI; - const Function* F; - const BasicBlock* HighlightBB; - const Instruction* HighlightInst; - - HighlightingRegionInfo() = delete; - - HighlightingRegionInfo(RegionInfo *RI, const Function* F, const BasicBlock* HighlightBB = nullptr, const Instruction* HighlightInst = nullptr) - : RI(RI), F(F), HighlightBB(HighlightBB), HighlightInst(HighlightInst) {} - - public: - RegionInfo* getRegionInfo() const {return RI;} - const Function* getFunction() const { return F; } - }; - +struct HighlightingRegionInfo { + RegionInfo *RI; + const Function *F; + const BasicBlock *HighlightBB; + const Instruction *HighlightInst; + + HighlightingRegionInfo() = delete; + + HighlightingRegionInfo(RegionInfo *RI, const Function *F, + const BasicBlock *HighlightBB = nullptr, + const Instruction *HighlightInst = nullptr) + : RI(RI), F(F), HighlightBB(HighlightBB), HighlightInst(HighlightInst) {} + +public: + RegionInfo *getRegionInfo() const { return RI; } + const Function *getFunction() const { return F; } +}; #if 0 struct HighlightingRegionInfoPassGraphTraits { @@ -58,29 +57,28 @@ namespace { } }; #endif -} +} // namespace namespace llvm { - template <> - struct GraphTraits : public GraphTraits> { - using nodes_iterator = - df_iterator, false, - GraphTraits>>; - - static NodeRef getEntryNode(HighlightingRegionInfo *G) { - return GraphTraits>::getEntryNode(G->RI->getTopLevelRegion()); - } - - static nodes_iterator nodes_begin(HighlightingRegionInfo* G) { - return nodes_iterator::begin(getEntryNode(G)); - } - - static nodes_iterator nodes_end(HighlightingRegionInfo *G) { - return nodes_iterator::end(getEntryNode(G)); - } - }; +template <> +struct GraphTraits + : public GraphTraits> { + using nodes_iterator = df_iterator, + false, GraphTraits>>; + + static NodeRef getEntryNode(HighlightingRegionInfo *G) { + return GraphTraits>::getEntryNode( + G->RI->getTopLevelRegion()); + } + static nodes_iterator nodes_begin(HighlightingRegionInfo *G) { + return nodes_iterator::begin(getEntryNode(G)); + } + static nodes_iterator nodes_end(HighlightingRegionInfo *G) { + return nodes_iterator::end(getEntryNode(G)); + } +}; template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { @@ -186,26 +184,19 @@ struct DOTGraphTraits : public DOTGraphTraits { } }; - } //end namespace llvm - - namespace { - struct RegionInfoPassGraphTraits { - static RegionInfo* getGraph(RegionInfoPass* RIP) { - return &RIP->getRegionInfo(); - } - }; - - - -} +struct RegionInfoPassGraphTraits { + static RegionInfo *getGraph(RegionInfoPass *RIP) { + return &RIP->getRegionInfo(); + } +}; +} // namespace namespace llvm { - #if 0 template <> struct GraphTraits : public GraphTraits { @@ -214,14 +205,13 @@ namespace llvm { }; #endif +template <> +struct DOTGraphTraits + : public DOTGraphTraits { + using Base = DOTGraphTraits; + using Traits = GraphTraits; - template <> - struct DOTGraphTraits : public DOTGraphTraits { - using Base = DOTGraphTraits; - using Traits = GraphTraits; - - DOTGraphTraits(bool IsSimple = false) : Base(IsSimple) {} - + DOTGraphTraits(bool IsSimple = false) : Base(IsSimple) {} #if 0 static std::string getGraphName(const HighlightingRegionInfo *G) { @@ -240,110 +230,110 @@ namespace llvm { } #endif - static std::string getGraphName(const HighlightingRegionInfo *) { return "Region Graph"; } - - std::string getNodeLabel(RegionNode *Node, HighlightingRegionInfo *G) { - //return Base::getNodeLabel(Node, reinterpret_cast(G->RI->getTopLevelRegion())); - - if (!Node->isSubRegion()) { - BasicBlock *BB = Node->getNodeAs(); - - DOTFuncInfo CFGInfo(G->F, G->HighlightBB, G->HighlightInst); - if (isSimple()) - return DOTGraphTraits - ::getSimpleNodeLabel(BB, &CFGInfo); - else - return DOTGraphTraits - ::getCompleteNodeLabel(BB, &CFGInfo); - } - - return "Not implemented"; - } - - - static std::string getNodeAttributes(RegionNode *R, HighlightingRegionInfo *G) { - auto HighlightBB = G->HighlightBB; - if (!R->isSubRegion() && R->getNodeAs() == HighlightBB) { - return "penwidth=5.0,style=filled"; - } + static std::string getGraphName(const HighlightingRegionInfo *) { + return "Region Graph"; + } - return ""; - } + std::string getNodeLabel(RegionNode *Node, HighlightingRegionInfo *G) { + // return Base::getNodeLabel(Node, reinterpret_cast(G->RI->getTopLevelRegion())); + if (!Node->isSubRegion()) { + BasicBlock *BB = Node->getNodeAs(); - std::string getEdgeAttributes(RegionNode *srcNode, - Traits::ChildIteratorType CI, - HighlightingRegionInfo *G) { - RegionNode *destNode = *CI; + DOTFuncInfo CFGInfo(G->F, G->HighlightBB, G->HighlightInst); + if (isSimple()) + return DOTGraphTraits::getSimpleNodeLabel(BB, &CFGInfo); + else + return DOTGraphTraits::getCompleteNodeLabel(BB, + &CFGInfo); + } - if (srcNode->isSubRegion() || destNode->isSubRegion()) - return ""; + return "Not implemented"; + } - // In case of a backedge, do not use it to define the layout of the nodes. - BasicBlock *srcBB = srcNode->getNodeAs(); - BasicBlock *destBB = destNode->getNodeAs(); + static std::string getNodeAttributes(RegionNode *R, + HighlightingRegionInfo *G) { + auto HighlightBB = G->HighlightBB; + if (!R->isSubRegion() && R->getNodeAs() == HighlightBB) { + return "penwidth=5.0,style=filled"; + } - Region *R = G->RI->getRegionFor(destBB); + return ""; + } - while (R && R->getParent()) - if (R->getParent()->getEntry() == destBB) - R = R->getParent(); - else - break; + std::string getEdgeAttributes(RegionNode *srcNode, + Traits::ChildIteratorType CI, + HighlightingRegionInfo *G) { + RegionNode *destNode = *CI; - if (R && R->getEntry() == destBB && R->contains(srcBB)) - return "constraint=false"; + if (srcNode->isSubRegion() || destNode->isSubRegion()) + return ""; - return ""; - } + // In case of a backedge, do not use it to define the layout of the nodes. + BasicBlock *srcBB = srcNode->getNodeAs(); + BasicBlock *destBB = destNode->getNodeAs(); - - static void printRegionCluster(const Region &R, GraphWriter &GW, unsigned depth = 0, const BasicBlock *HighlightBB=nullptr,const Instruction *HighlightInst = nullptr ) { - raw_ostream &O = GW.getOStream(); - O.indent(2 * depth) << "subgraph cluster_" << static_cast(&R) - << " {\n"; - O.indent(2 * (depth + 1)) << "label = \"\";\n"; + Region *R = G->RI->getRegionFor(destBB); - + while (R && R->getParent()) + if (R->getParent()->getEntry() == destBB) + R = R->getParent(); + else + break; - if (!onlySimpleRegions || R.isSimple()) { - O.indent(2 * (depth + 1)) << "style = filled;\n"; - O.indent(2 * (depth + 1)) << "color = " - << ((R.getDepth() * 2 % 12) + 1) << "\n"; + if (R && R->getEntry() == destBB && R->contains(srcBB)) + return "constraint=false"; - } else { - O.indent(2 * (depth + 1)) << "style = solid;\n"; - O.indent(2 * (depth + 1)) << "color = " - << ((R.getDepth() * 2 % 12) + 2) << "\n"; - } + return ""; + } - for (const auto &RI : R) - printRegionCluster(*RI, GW, depth + 1,HighlightBB, HighlightInst ); + static void printRegionCluster(const Region &R, + GraphWriter &GW, + unsigned depth = 0, + const BasicBlock *HighlightBB = nullptr, + const Instruction *HighlightInst = nullptr) { + raw_ostream &O = GW.getOStream(); + O.indent(2 * depth) << "subgraph cluster_" << static_cast(&R) + << " {\n"; + O.indent(2 * (depth + 1)) << "label = \"\";\n"; - const RegionInfo &RI = *static_cast(R.getRegionInfo()); + if (!onlySimpleRegions || R.isSimple()) { + O.indent(2 * (depth + 1)) << "style = filled;\n"; + O.indent(2 * (depth + 1)) + << "color = " << ((R.getDepth() * 2 % 12) + 1) << "\n"; - for (auto *BB : R.blocks()) - if (RI.getRegionFor(BB) == &R) - O.indent(2 * (depth + 1)) << "Node" - << static_cast(RI.getTopLevelRegion()->getBBNode(BB)) - << ";\n"; - + } else { + O.indent(2 * (depth + 1)) << "style = solid;\n"; + O.indent(2 * (depth + 1)) + << "color = " << ((R.getDepth() * 2 % 12) + 2) << "\n"; + } - O.indent(2 * depth) << "}\n"; - } + for (const auto &RI : R) + printRegionCluster(*RI, GW, depth + 1, HighlightBB, HighlightInst); + const RegionInfo &RI = *static_cast(R.getRegionInfo()); - static void addCustomGraphFeatures( - const HighlightingRegionInfo *G, - GraphWriter &GW) { - raw_ostream &O = GW.getOStream(); - O << "\tcolorscheme = \"paired12\"\n"; - printRegionCluster(*G->RI->getTopLevelRegion(), GW, 4, G->HighlightBB, G->HighlightInst); - } - }; -} + for (auto *BB : R.blocks()) + if (RI.getRegionFor(BB) == &R) + O.indent(2 * (depth + 1)) + << "Node" + << static_cast(RI.getTopLevelRegion()->getBBNode(BB)) + << ";\n"; + O.indent(2 * depth) << "}\n"; + } + static void + addCustomGraphFeatures(const HighlightingRegionInfo *G, + GraphWriter &GW) { + raw_ostream &O = GW.getOStream(); + O << "\tcolorscheme = \"paired12\"\n"; + printRegionCluster(*G->RI->getTopLevelRegion(), GW, 4, G->HighlightBB, + G->HighlightInst); + } +}; +} // namespace llvm namespace { struct RegionPrinter @@ -370,51 +360,46 @@ struct RegionOnlyPrinter }; char RegionOnlyPrinter::ID = 0; - - -struct RegionViewer: public FunctionPass { - using Base = FunctionPass; +struct RegionViewer : public FunctionPass { + using Base = FunctionPass; static char ID; - RegionViewer() : RegionViewer(nullptr,nullptr) {} + RegionViewer() : RegionViewer(nullptr, nullptr) {} - RegionViewer(const BasicBlock *HighlightBB, const Instruction *HighlightInst) : FunctionPass(ID), HighlightBB(HighlightBB), HighlightInst(HighlightInst) { - initializeRegionViewerPass(*PassRegistry::getPassRegistry()); + RegionViewer(const BasicBlock *HighlightBB, const Instruction *HighlightInst) + : FunctionPass(ID), HighlightBB(HighlightBB), + HighlightInst(HighlightInst) { + initializeRegionViewerPass(*PassRegistry::getPassRegistry()); } - virtual bool processFunction(Function &F, RegionInfo &Analysis) { - return true; + return true; } bool runOnFunction(Function &F) override { - auto &Analysis = getAnalysis().getRegionInfo(); - - if (!processFunction(F, Analysis)) - return false; + auto &Analysis = getAnalysis().getRegionInfo(); - + if (!processFunction(F, Analysis)) + return false; - HighlightingRegionInfo Graph(&Analysis, &F, HighlightBB, HighlightInst); - ViewGraph(&Graph, "reg", false, Twine("Region Graph for '") + F.getName().str() + "' function"); + HighlightingRegionInfo Graph(&Analysis, &F, HighlightBB, HighlightInst); + ViewGraph(&Graph, "reg", false, + Twine("Region Graph for '") + F.getName().str() + "' function"); - return false; + return false; } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - AU.addRequired(); + AU.setPreservesAll(); + AU.addRequired(); } - private: - const BasicBlock*HighlightBB ; - const Instruction*HighlightInst ; + const BasicBlock *HighlightBB; + const Instruction *HighlightInst; }; char RegionViewer::ID = 0; - - struct RegionOnlyViewer : public DOTGraphTraitsViewer { @@ -427,7 +412,7 @@ struct RegionOnlyViewer }; char RegionOnlyViewer::ID = 0; -} //end anonymous namespace +} // end anonymous namespace INITIALIZE_PASS(RegionPrinter, "dot-regions", "Print regions of function to 'dot' file", true, true) @@ -454,8 +439,9 @@ FunctionPass* llvm::createRegionViewerPass() { return new RegionViewer(); } -FunctionPass *llvm::createRegionViewerPass(const BasicBlock *BB, const Instruction *Inst) { - return new RegionViewer(BB,Inst); +FunctionPass *llvm::createRegionViewerPass(const BasicBlock *BB, + const Instruction *Inst) { + return new RegionViewer(BB, Inst); } FunctionPass* llvm::createRegionOnlyViewerPass() { @@ -463,7 +449,7 @@ FunctionPass* llvm::createRegionOnlyViewerPass() { } #ifndef NDEBUG -static void viewRegionInfo( RegionInfo *RI, bool ShortNames) { +static void viewRegionInfo(RegionInfo *RI, bool ShortNames) { assert(RI && "Argument must be non-null"); llvm::Function *F = RI->getTopLevelRegion()->getEntry()->getParent(); @@ -488,54 +474,48 @@ static void invokeFunctionPass(const Function *F, FunctionPass *ViewerPass) { FPM.doFinalization(); } -void llvm::viewRegion( RegionInfo *RI) { if (!RI) return; viewRegionInfo(RI, false); } -void llvm::viewRegion( RegionInfo &RI) { return viewRegion(&RI); } +void llvm::viewRegion(RegionInfo *RI) { + if (!RI) + return; + viewRegionInfo(RI, false); +} +void llvm::viewRegion(RegionInfo &RI) { return viewRegion(&RI); } void llvm::viewRegion(const Function *F) { - if (!F) return; + if (!F) + return; invokeFunctionPass(F, createRegionViewerPass()); } -void llvm::viewRegion(const Function &F) { - viewRegion(&F); -} +void llvm::viewRegion(const Function &F) { viewRegion(&F); } void llvm::viewRegionOnly(RegionInfo *RI) { viewRegionInfo(RI, true); } void llvm::viewRegionOnly(const Function *F) { - if (!F) return; + if (!F) + return; invokeFunctionPass(F, createRegionOnlyViewerPass()); } - - - - - - void llvm::viewRegion(const llvm::BasicBlock *BB) { - if (!BB) return ; + if (!BB) + return; - auto F =BB->getParent(); + auto F = BB->getParent(); - invokeFunctionPass(F, createRegionViewerPass( BB, nullptr )); -} -void llvm::viewRegion(const llvm::BasicBlock &BB) { - return viewRegion(&BB); + invokeFunctionPass(F, createRegionViewerPass(BB, nullptr)); } +void llvm::viewRegion(const llvm::BasicBlock &BB) { return viewRegion(&BB); } +void llvm::viewRegion(const llvm::Instruction *Inst) { + if (!Inst) + return; -void llvm::viewRegion(const llvm::Instruction *Inst){ - if (!Inst) return; - - auto Block = Inst->getParent(); - auto F = Inst->getFunction(); - - invokeFunctionPass(F, createRegionViewerPass( Block, Inst )); -} + auto Block = Inst->getParent(); + auto F = Inst->getFunction(); -void llvm:: viewRegion(const llvm::Instruction &I) { - return viewRegion(&I); + invokeFunctionPass(F, createRegionViewerPass(Block, Inst)); } +void llvm::viewRegion(const llvm::Instruction &I) { return viewRegion(&I); } #endif diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 9e6a43bb896d2..4c5985bddb369 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -20,9 +20,9 @@ #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/RegionPrinter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/RegionPrinter.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -323,9 +323,10 @@ BasicBlock *llvm::splitBB(IRBuilder<> &Builder, bool CreateBranch, return New; } -static BasicBlock *splitBBWithSuffix(IRBuilderBase::InsertPoint IP, bool CreateBranch, llvm::Twine Suffix) { - BasicBlock *Old = IP.getBlock(); - return splitBB(IP, CreateBranch, Old->getName() + Suffix); +static BasicBlock *splitBBWithSuffix(IRBuilderBase::InsertPoint IP, + bool CreateBranch, llvm::Twine Suffix) { + BasicBlock *Old = IP.getBlock(); + return splitBB(IP, CreateBranch, Old->getName() + Suffix); } BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, @@ -524,16 +525,19 @@ void OpenMPIRBuilder::finalize(Function *Fn) { } OpenMPIRBuilder::OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) { - RegionStack.emplace_back(new OMPRegionInfo(RegionKind::Toplevel, omp::OMPD_unknown,/*IsCancellable*/ false, []( InsertPointTy ExitingIP, - omp::Directive LeaveReason, - OMPRegionInfo *Region) { + RegionStack.emplace_back(new OMPRegionInfo( + RegionKind::Toplevel, omp::OMPD_unknown, /*IsCancellable*/ false, + [](InsertPointTy ExitingIP, omp::Directive LeaveReason, + OMPRegionInfo *Region) { llvm_unreachable("top-level is not finialized"); - })); - assert(RegionStack.size()==1); + })); + assert(RegionStack.size() == 1); } OpenMPIRBuilder::~OpenMPIRBuilder() { - assert(RegionStack.size() == 1 && RegionStack.back()->Kind == RegionKind::Toplevel && "OMPRegion push/pop must be balanced"); + assert(RegionStack.size() == 1 && + RegionStack.back()->Kind == RegionKind::Toplevel && + "OMPRegion push/pop must be balanced"); assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); } @@ -662,79 +666,79 @@ Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { "omp_global_thread_num"); } -OpenMPIRBuilder::OMPRegionInfo *OpenMPIRBuilder::getInnermostDirectionRegion(omp::Directive DK) { - for (auto& R : reverse(RegionStack)) { - if (R->Kind == RegionKind::Toplevel) - return R.get(); - if (R->Kind == RegionKind::Directive && R->DK == DK) - return R.get(); - } - llvm_unreachable("expected toplevel region"); +OpenMPIRBuilder::OMPRegionInfo * +OpenMPIRBuilder::getInnermostDirectionRegion(omp::Directive DK) { + for (auto &R : reverse(RegionStack)) { + if (R->Kind == RegionKind::Toplevel) + return R.get(); + if (R->Kind == RegionKind::Directive && R->DK == DK) + return R.get(); + } + llvm_unreachable("expected toplevel region"); } -OpenMPIRBuilder::OMPRegionInfo*OpenMPIRBuilder:: pushRegion( omp::Directive DK, - bool IsCancellable, - LeaveRegionCallbackTy FiniCB ) { - RegionStack.emplace_back(new OMPRegionInfo(RegionKind::Directive, DK, IsCancellable,std::move( FiniCB))); - return RegionStack.back().get(); +OpenMPIRBuilder::OMPRegionInfo * +OpenMPIRBuilder::pushRegion(omp::Directive DK, bool IsCancellable, + LeaveRegionCallbackTy FiniCB) { + RegionStack.emplace_back(new OMPRegionInfo(RegionKind::Directive, DK, + IsCancellable, std::move(FiniCB))); + return RegionStack.back().get(); } - -void OpenMPIRBuilder::emitRegionExit( InsertPointTy ExitingIP, OMPRegionInfo* RegionToLeave, omp::Directive LeaveReason) { +void OpenMPIRBuilder::emitRegionExit(InsertPointTy ExitingIP, + OMPRegionInfo *RegionToLeave, + omp::Directive LeaveReason) { #ifndef NDEBUG - switch(LeaveReason) { - case OMPD_unknown: - // Regular region exit - break; - case OMPD_cancellation_point: - case OMPD_barrier: - case OMPD_cancel: - // Cancellation // TODO: Also need need to know whether #pragma omp cancel for/#pragma omp cancel parallel/?? - break; - default: - llvm_unreachable("unrecognized reason to leave region"); - } + switch (LeaveReason) { + case OMPD_unknown: + // Regular region exit + break; + case OMPD_cancellation_point: + case OMPD_barrier: + case OMPD_cancel: + // Cancellation // TODO: Also need need to know whether #pragma omp cancel + // for/#pragma omp cancel parallel/?? + break; + default: + llvm_unreachable("unrecognized reason to leave region"); + } #endif - for (auto &R : reverse(RegionStack) ) { - if ( R->FiniCB) - R->FiniCB(ExitingIP, LeaveReason,R.get()); + for (auto &R : reverse(RegionStack)) { + if (R->FiniCB) + R->FiniCB(ExitingIP, LeaveReason, R.get()); - if (R.get() == RegionToLeave) return; - } - llvm_unreachable("region to exit not on stack?"); + if (R.get() == RegionToLeave) + return; + } + llvm_unreachable("region to exit not on stack?"); } - -void OpenMPIRBuilder::popRegion(omp::Directive DK) { - assert( RegionStack.back()->DK == DK && "unbalanced region push/pop" ); - RegionStack.back()->assertOK(); - RegionStack.pop_back(); +void OpenMPIRBuilder::popRegion(omp::Directive DK) { + assert(RegionStack.back()->DK == DK && "unbalanced region push/pop"); + RegionStack.back()->assertOK(); + RegionStack.pop_back(); } - - - - -void OpenMPIRBuilder::OMPRegionInfo:: assertOK() const { +void OpenMPIRBuilder::OMPRegionInfo::assertOK() const { #ifndef NDEBUG - switch (Kind) { - case RegionKind::Toplevel: - assert(DK == OMPD_unknown && "toplevel region is not a specific kind"); - assert(!IsCancellable && "top-level is not cancellable"); - break; - case RegionKind::CanonicalLoop: - break; - case RegionKind::Directive: - switch (DK) { - case OMPD_parallel: - case OMPD_sections: - break; - default: - llvm_unreachable("Not a recognized OpenMP region"); - } - break; + switch (Kind) { + case RegionKind::Toplevel: + assert(DK == OMPD_unknown && "toplevel region is not a specific kind"); + assert(!IsCancellable && "top-level is not cancellable"); + break; + case RegionKind::CanonicalLoop: + break; + case RegionKind::Directive: + switch (DK) { + case OMPD_parallel: + case OMPD_sections: + break; + default: + llvm_unreachable("Not a recognized OpenMP region"); } + break; + } #endif } @@ -879,9 +883,10 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, return Builder.saveIP(); } -void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, Value *CancelFlag, - omp::Directive CanceledDirective, - omp::Directive CancelledBy) { +void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, + Value *CancelFlag, + omp::Directive CanceledDirective, + omp::Directive CancelledBy) { assert(isLastFinalizationInfoCancellable(CanceledDirective) && "Unexpected cancellation!"); @@ -954,8 +959,8 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, Value *C auto &FI = FinalizationStack.back(); FI.FiniCB(Builder.saveIP()); #endif - emitRegionExit({CancellationBlock, CancellationBlock->begin()}, getInnermostDirectionRegion(CanceledDirective), CancelledBy); - + emitRegionExit({CancellationBlock, CancellationBlock->begin()}, + getInnermostDirectionRegion(CanceledDirective), CancelledBy); // Builder.SetInsertPoint(CancellationBlock); // Builder.CreateBr( CancellationBlock); @@ -963,16 +968,14 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, Value *C // FI.CancelCB(CancellationIP, CanceledDirective, CancelledBy); // The continuation block is where code generation continues.s - Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); // MK: needed? + Builder.SetInsertPoint(NonCancellationBlock, + NonCancellationBlock->begin()); // MK: needed? } -IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(const LocationDescription &Loc, - InsertPointTy OuterAllocaIP, - BodyGenCallbackTy BodyGenCB, - PrivatizeCallbackTy PrivCB, - LeaveRegionCallbackTy FiniCB, - Value *IfCondition, - Value *NumThreads, +IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( + const LocationDescription &Loc, InsertPointTy OuterAllocaIP, + BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, + LeaveRegionCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) { assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous"); @@ -1049,15 +1052,16 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(const LocationDescripti PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); #if 1 - auto FiniCBWrapper = [&](InsertPointTy IP, omp::Directive LeaveReason, OMPRegionInfo *Region) { - // FIXME: This is broken - // 1. Should be done after the FiniCB - // 2. It may deadlock - if (LeaveReason != OMPD_unknown) { - createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), - omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, - /* CheckCancelFlag */ false); - } + auto FiniCBWrapper = [&](InsertPointTy IP, omp::Directive LeaveReason, + OMPRegionInfo *Region) { + // FIXME: This is broken + // 1. Should be done after the FiniCB + // 2. It may deadlock + if (LeaveReason != OMPD_unknown) { + createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), + omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false); + } #if 0 // Hide "open-ended" blocks from the given FiniCB by setting the right jump @@ -1074,15 +1078,13 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(const LocationDescripti "Unexpected insertion point for finalization call!"); #endif - if (FiniCB) - FiniCB(IP, LeaveReason, Region); // Needed? - - + FiniCB(IP, LeaveReason, Region); // Needed? }; - //FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); - OMPRegionInfo* ParallelRegion = pushRegion(OMPD_parallel,IsCancellable, FiniCBWrapper); + // FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); + OMPRegionInfo *ParallelRegion = + pushRegion(OMPD_parallel, IsCancellable, FiniCBWrapper); #endif // Generate the privatization allocas in the block that will become the entry @@ -1224,14 +1226,13 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(const LocationDescripti "Unexpected finalization stack state!"); #endif - Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); #if 0 FiniCB(PreFiniIP); -#endif - emitRegionExit( PreFiniIP, ParallelRegion ); +#endif + emitRegionExit(PreFiniIP, ParallelRegion); popRegion(omp::OMPD_parallel); @@ -1420,12 +1421,10 @@ void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) { emitTaskyieldImpl(Loc); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(const LocationDescription &Loc, - InsertPointTy AllocaIP, - ArrayRef SectionCBs, - PrivatizeCallbackTy PrivCB, - LeaveRegionCallbackTy FiniCB, bool IsCancellable, - bool IsNowait) { +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( + const LocationDescription &Loc, InsertPointTy AllocaIP, + ArrayRef SectionCBs, PrivatizeCallbackTy PrivCB, + LeaveRegionCallbackTy FiniCB, bool IsCancellable, bool IsNowait) { assert(!isConflictIP(AllocaIP, Loc.IP) && "Dedicated IP allocas required"); if (!updateToLocation(Loc)) @@ -1455,7 +1454,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(const LocationDes // TODO: Use CanonicalLoopInfo finalization. FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable}); #endif -auto SectionsRegion = pushRegion(OMPD_sections, IsCancellable); + auto SectionsRegion = pushRegion(OMPD_sections, IsCancellable); // Each section is emitted as a switch case // Each finalization callback is handled from clang.EmitOMPSectionDirective() @@ -1500,7 +1499,8 @@ auto SectionsRegion = pushRegion(OMPD_sections, IsCancellable); Value *LB = ConstantInt::get(I32Ty, 0); Value *UB = ConstantInt::get(I32Ty, SectionCBs.size()); Value *ST = ConstantInt::get(I32Ty, 1); - llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); + llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop( + Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); auto AfterIP = LoopInfo->getAfterIP(); applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait); @@ -1517,20 +1517,20 @@ auto SectionsRegion = pushRegion(OMPD_sections, IsCancellable); AfterIP = {FiniBB, FiniBB->begin()}; } #endif - //Instruction *I = Builder.CreateBr(ExitBB); + // Instruction *I = Builder.CreateBr(ExitBB); Builder.restoreIP(AfterIP); - auto Finish = splitBB(Builder,true, "section_finish" ); - emitRegionExit(Builder.saveIP(),SectionsRegion ); + auto Finish = splitBB(Builder, true, "section_finish"); + emitRegionExit(Builder.saveIP(), SectionsRegion); popRegion(OMPD_sections); - return { Finish, Finish->begin() }; + return {Finish, Finish->begin()}; } OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB) { + LeaveRegionCallbackTy FiniCB) { if (!updateToLocation(Loc)) return Loc.IP; @@ -1758,7 +1758,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions( OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB) { + LeaveRegionCallbackTy FiniCB) { if (!updateToLocation(Loc)) return Loc.IP; @@ -1783,7 +1783,7 @@ OpenMPIRBuilder::createMaster(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, Value *Filter) { + LeaveRegionCallbackTy FiniCB, Value *Filter) { if (!updateToLocation(Loc)) return Loc.IP; @@ -1901,13 +1901,10 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, return CL; } -CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, - LoopBodyGenCallbackTy BodyGenCB, - Value *Start, Value *Stop, Value *Step, - bool IsSigned, bool InclusiveStop, - InsertPointTy ComputeIP , - const Twine &Name , - omp::Directive DK ) { +CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop( + const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, + Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, + InsertPointTy ComputeIP, const Twine &Name, omp::Directive DK) { // Consider the following difficulties (assuming 8-bit signed integers): // * Adding \p Step to the loop counter which passes \p Stop may overflow: @@ -3328,7 +3325,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( #if 0 if (HasFinalize) FinalizationStack.push_back({FiniCB, OMPD, IsCancellable}); -#endif +#endif // Create inlined region's entry and body blocks, in preparation // for conditional creation @@ -3417,7 +3414,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); Fi.FiniCB(FinIP); -#endif +#endif BasicBlock *FiniBB = FinIP.getBlock(); Instruction *FiniBBTI = FiniBB->getTerminator(); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 7418f9b8feef9..73037e16ca61f 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -951,8 +951,6 @@ struct OpenMPOpt { return CodeGenIP; }; - - /// Create a sequential execution region within a merged parallel region, /// encapsulated in a master construct with a barrier for synchronization. auto CreateSequentialRegion = [&](Function *OuterFn, @@ -983,7 +981,6 @@ struct OpenMPOpt { assert(SeqEndBB != nullptr && "SeqEndBB should not be null"); SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB); }; - // Find outputs from the sequential region to outside users and // broadcast their values to them. diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 7e7f2e243223a..b892f62ecf857 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -422,7 +422,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) { Builder.CreateUnreachable(); EXPECT_FALSE(verifyModule(*M, &errs())); } -#endif +#endif #if 0 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { @@ -650,9 +650,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP, - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region ) { ++NumFinalizationPoints; }; + auto FiniCB = [&](InsertPointTy CodeGenIP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { + ++NumFinalizationPoints; + }; IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); @@ -731,9 +732,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP , - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region) { ++NumFinalizationPoints; }; + auto FiniCB = [&](InsertPointTy CodeGenIP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { + ++NumFinalizationPoints; + }; auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumOuterBodiesGenerated; @@ -827,9 +829,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP , - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region) { ++NumFinalizationPoints; }; + auto FiniCB = [&](InsertPointTy CodeGenIP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { + ++NumFinalizationPoints; + }; auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumOuterBodiesGenerated; @@ -964,9 +967,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP , - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy CodeGenIP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { ++NumFinalizationPoints; // No destructors. }; @@ -1100,9 +1102,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { auto *FakeDestructor = Function::Create( FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get()); - auto FiniCB = [&](InsertPointTy IP , - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { ++NumFinalizationPoints; Builder.restoreIP(IP); Builder.CreateCall(FakeDestructor, @@ -1195,12 +1196,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { return CodeGenIP; }; - IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); IRBuilder<>::InsertPoint AfterIP = - OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, {}, - nullptr, nullptr, OMP_PROC_BIND_default, false); + OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, {}, nullptr, + nullptr, OMP_PROC_BIND_default, false); Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); @@ -2337,9 +2337,8 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP , - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2416,9 +2415,8 @@ TEST_F(OpenMPIRBuilderTest, MaskedDirective) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP , - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2484,9 +2482,8 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP, - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2725,9 +2722,8 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP, - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2798,9 +2794,8 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP, - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2916,9 +2911,8 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP, - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -3008,9 +3002,8 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP, - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -3754,9 +3747,10 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { }; // Do nothing in finalization. - auto FiniCB = [&](InsertPointTy CodeGenIP, - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region) { return CodeGenIP; }; + auto FiniCB = [&](InsertPointTy CodeGenIP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { + return CodeGenIP; + }; InsertPointTy AfterIP = OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB, @@ -4011,15 +4005,12 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { return Builder.saveIP(); }; - - - Builder.restoreIP( - OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB, - {}, /* IfCondition */ nullptr, - /* NumThreads */ nullptr, OMP_PROC_BIND_default, - /* IsCancellable */ false)); + Builder.restoreIP(OMPBuilder.createParallel( + Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB, {}, /* IfCondition */ nullptr, + /* NumThreads */ nullptr, OMP_PROC_BIND_default, + /* IsCancellable */ false)); InsertPointTy AfterIP = OMPBuilder.createParallel( - { Builder.saveIP(), DL }, OuterAllocaIP, SecondBodyGenCB, PrivCB, {}, + {Builder.saveIP(), DL}, OuterAllocaIP, SecondBodyGenCB, PrivCB, {}, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false); @@ -4108,7 +4099,6 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) { llvm::SmallVector SectionCBVector; llvm::SmallVector CaseBBs; - auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; SectionCBVector.push_back(SectionCB); @@ -4118,7 +4108,7 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) { IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, - PrivCB, {}, false, false)); + PrivCB, {}, false, false)); Builder.CreateRetVoid(); // Required at the end of the function EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr); EXPECT_FALSE(verifyModule(*M, &errs())); @@ -4144,9 +4134,8 @@ TEST_F(OpenMPIRBuilderTest, CreateSections) { unsigned NumFiniCBCalls = 0; PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); - auto FiniCB = [&](InsertPointTy IP, - omp::Directive LeaveReason, - OpenMPIRBuilder:: OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, + OpenMPIRBuilder::OMPRegionInfo *Region) { ++NumFiniCBCalls; BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); @@ -4259,9 +4248,8 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) { llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { return CodeGenIP; }; - Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, - PrivCB, {}, false, true)); + PrivCB, {}, false, true)); Builder.CreateRetVoid(); // Required at the end of the function for (auto &Inst : instructions(*F)) { EXPECT_FALSE(isa(Inst) && diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 4775f3447a71d..8e9b53f33da8c 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -282,7 +282,6 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - llvm::Value *ifCond = nullptr; if (auto ifExprVar = opInst.if_expr_var()) ifCond = moduleTranslation.lookupValue(ifExprVar); @@ -325,7 +324,6 @@ convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( ompLoc, bodyGenCB, {})); @@ -353,7 +351,6 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); llvm::Constant *hint = nullptr; @@ -584,7 +581,6 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP( moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( @@ -649,13 +645,11 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections( - ompLoc, allocaIP, sectionCBs, privCB, {}, false, - sectionsOp.nowait())); + ompLoc, allocaIP, sectionCBs, privCB, {}, false, sectionsOp.nowait())); return bodyGenStatus; } From c1ad3f1989e0bacb062be10878d4c8f9dcb65251 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 28 Apr 2022 14:33:11 -0500 Subject: [PATCH 09/50] OMPRegion with breaks --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 52 +++++++---- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 86 ++++++++++++++----- .../Frontend/OpenMPIRBuilderTest.cpp | 60 +++++-------- 3 files changed, 120 insertions(+), 78 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index e93afc2cce968..af75f17ce2340 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -106,8 +106,8 @@ class OpenMPIRBuilder { /// destruction, when the scope of the currently generated construct is left /// at the time, and location, the callback is invoked. using LeaveRegionCallbackTy = - std::function; + std::function; // TODO: make simpler again enum class RegionKind { /// Sentinel object so we don't always have to check whether the stack is @@ -122,23 +122,45 @@ class OpenMPIRBuilder { Directive }; + struct OMPRegionBreak { + BasicBlock *BB; + omp::Directive Reason; + omp::Directive Target; + + OMPRegionBreak(BasicBlock *BB, omp::Directive Reason, omp::Directive Target) : BB(BB), Reason(Reason),Target(Target) { } + }; + struct OMPRegionInfo { RegionKind Kind; omp::Directive DK; - bool IsCancellable; // TODO: remove; determine ourselves whether there was a - // cancelling construct inside - LeaveRegionCallbackTy FiniCB; - - OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable, - LeaveRegionCallbackTy FiniCB) - : Kind(Kind), DK(DK), IsCancellable(IsCancellable), - FiniCB(std::move(FiniCB)) { + // LeaveRegionCallbackTy FiniCB; + + /// Inside a parallel region, determines whether a barrier must check whether cancellation has occured. + // TODO: remove; determine ourselves whether there was a cancelling construct inside. + bool IsCancellable; + + SmallVectorBreaks; + + OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable + //, LeaveRegionCallbackTy FiniCB + ) : Kind(Kind), DK(DK), IsCancellable(IsCancellable) + // , FiniCB(std::move(FiniCB)) + { assertOK(); } + #ifndef NDEBUG - ~OMPRegionInfo() { assertOK(); } + ~OMPRegionInfo() { + assertOK(); + assert(Breaks.empty()); + } #endif + void addBreak(BasicBlock *BB, omp::Directive Reason, omp::Directive Target) { + assert(!BB->getTerminator()); + Breaks.emplace_back(BB,Reason,Target); + } + /// Consistency self-check. void assertOK() const; }; @@ -151,11 +173,11 @@ class OpenMPIRBuilder { OMPRegionInfo *getInnermostDirectionRegion(omp::Directive DK); - OMPRegionInfo *pushRegion(omp::Directive DK, bool IsCancellable, - LeaveRegionCallbackTy FiniCB = {}); + OMPRegionInfo *pushRegion(omp::Directive DK, bool IsCancellable + //, LeaveRegionCallbackTy FiniCB = {} + ); - void emitRegionExit(InsertPointTy ExitingIP, OMPRegionInfo *RegionToLeave, - omp::Directive LeaveReason = omp::OMPD_unknown); + // void emitRegionExit(InsertPointTy ExitingIP, OMPRegionInfo *RegionToLeave, omp::Directive LeaveReason = omp::OMPD_unknown); void popRegion(omp::Directive DK); diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 4c5985bddb369..e97b825433472 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -525,12 +525,15 @@ void OpenMPIRBuilder::finalize(Function *Fn) { } OpenMPIRBuilder::OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) { - RegionStack.emplace_back(new OMPRegionInfo( - RegionKind::Toplevel, omp::OMPD_unknown, /*IsCancellable*/ false, - [](InsertPointTy ExitingIP, omp::Directive LeaveReason, - OMPRegionInfo *Region) { - llvm_unreachable("top-level is not finialized"); - })); + RegionStack.emplace_back( + new OMPRegionInfo( + RegionKind::Toplevel, omp::OMPD_unknown, /*IsCancellable*/ false + // , [](InsertPointTy ExitingIP, omp::Directive LeaveReason, + // OMPRegionInfo *Region) { + // llvm_unreachable("top-level is not finialized"); + // } + ) + ); assert(RegionStack.size() == 1); } @@ -678,13 +681,16 @@ OpenMPIRBuilder::getInnermostDirectionRegion(omp::Directive DK) { } OpenMPIRBuilder::OMPRegionInfo * -OpenMPIRBuilder::pushRegion(omp::Directive DK, bool IsCancellable, - LeaveRegionCallbackTy FiniCB) { +OpenMPIRBuilder::pushRegion(omp::Directive DK, bool IsCancellable + //, LeaveRegionCallbackTy FiniCB +) { RegionStack.emplace_back(new OMPRegionInfo(RegionKind::Directive, DK, - IsCancellable, std::move(FiniCB))); + IsCancellable//, std::move(FiniCB) + )); return RegionStack.back().get(); } +#if 0 void OpenMPIRBuilder::emitRegionExit(InsertPointTy ExitingIP, OMPRegionInfo *RegionToLeave, omp::Directive LeaveReason) { @@ -705,18 +711,32 @@ void OpenMPIRBuilder::emitRegionExit(InsertPointTy ExitingIP, #endif for (auto &R : reverse(RegionStack)) { - if (R->FiniCB) - R->FiniCB(ExitingIP, LeaveReason, R.get()); + // if (R->FiniCB) + // R->FiniCB(ExitingIP, LeaveReason, R.get()); if (R.get() == RegionToLeave) return; } llvm_unreachable("region to exit not on stack?"); } +#endif + void OpenMPIRBuilder::popRegion(omp::Directive DK) { - assert(RegionStack.back()->DK == DK && "unbalanced region push/pop"); - RegionStack.back()->assertOK(); + assert(RegionStack.back()->DK == DK && "unbalanced region push/pop"); + RegionStack.back()->assertOK(); + + // Trickly down no yet handled breaks. + OMPRegionInfo* Innermost = RegionStack.back().get(); + OMPRegionInfo* NewInnermost = RegionStack.rbegin()[1].get(); + + for (OMPRegionBreak &B : Innermost->Breaks) { + assert(B.Target != DK && "Should have been handled"); + assert(!B.BB->getTerminator()); + NewInnermost->addBreak(B.BB, B.Reason, B.Target); + } + Innermost->Breaks.clear(); + RegionStack.pop_back(); } @@ -959,8 +979,8 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, auto &FI = FinalizationStack.back(); FI.FiniCB(Builder.saveIP()); #endif - emitRegionExit({CancellationBlock, CancellationBlock->begin()}, - getInnermostDirectionRegion(CanceledDirective), CancelledBy); + RegionStack.back()->addBreak(CancellationBlock, CancelledBy, CanceledDirective); + //emitRegionExit({CancellationBlock, CancellationBlock->begin()}, getInnermostDirectionRegion(CanceledDirective), CancelledBy); // Builder.SetInsertPoint(CancellationBlock); // Builder.CreateBr( CancellationBlock); @@ -1083,8 +1103,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( }; // FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); - OMPRegionInfo *ParallelRegion = - pushRegion(OMPD_parallel, IsCancellable, FiniCBWrapper); + OMPRegionInfo *ParallelRegion = pushRegion(OMPD_parallel, IsCancellable + //, FiniCBWrapper + ); #endif // Generate the privatization allocas in the block that will become the entry @@ -1226,13 +1247,32 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( "Unexpected finalization stack state!"); #endif + Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); - InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); -#if 0 - FiniCB(PreFiniIP); -#endif - emitRegionExit(PreFiniIP, ParallelRegion); + // TODO: move to utility function + if (FiniCB) { + InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); + FiniCB(PreFiniIP, OMPD_unknown, ParallelRegion); + } + for (auto& B : reverse(ParallelRegion->Breaks)) { + Builder.SetInsertPoint(B.BB); + + if (FiniCB) { + B.BB = splitBB(Builder, true, ".fini"); + FiniCB(Builder.saveIP(), B.Reason, ParallelRegion); + Builder.SetInsertPoint( B.BB); + } + + if (B.Target == OMPD_parallel) { + Builder.CreateBr(PRegExitBB); + B.BB = nullptr; + } + } + ParallelRegion->Breaks.erase( llvm::remove_if(ParallelRegion->Breaks, [](const OMPRegionBreak& B) { + return !B.BB; + }), ParallelRegion->Breaks.end() ); + //emitRegionExit(PreFiniIP, ParallelRegion); popRegion(omp::OMPD_parallel); @@ -1521,7 +1561,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( Builder.restoreIP(AfterIP); auto Finish = splitBB(Builder, true, "section_finish"); - emitRegionExit(Builder.saveIP(), SectionsRegion); + //emitRegionExit(Builder.saveIP(), SectionsRegion); popRegion(OMPD_sections); return {Finish, Finish->begin()}; diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index b892f62ecf857..ecac08744ea53 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1042,7 +1042,6 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); Builder.CreateBr(EnterBB); Builder.SetInsertPoint(EnterBB); - OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); unsigned NumBodiesGenerated = 0; @@ -1056,38 +1055,38 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { Builder.restoreIP(CodeGenIP); // Create three barriers, two cancel barriers but only one checked. - // Function *CBFn, *BFn; + Function *CBFn, *BFn; Builder.restoreIP( OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel)); - Function *CBFn = M->getFunction("__kmpc_cancel_barrier"); - Function *BFn = M->getFunction("__kmpc_barrier"); - EXPECT_NE(CBFn, nullptr); - EXPECT_EQ(BFn, nullptr); - EXPECT_EQ(CBFn->getNumUses(), 2U); - EXPECT_TRUE(isa(CBFn->user_back())); - // EXPECT_EQ(CBFn->user_back()->getNumUses(), 0U); + CBFn = M->getFunction("__kmpc_cancel_barrier"); + BFn = M->getFunction("__kmpc_barrier"); + ASSERT_NE(CBFn, nullptr); + ASSERT_EQ(BFn, nullptr); + ASSERT_EQ(CBFn->getNumUses(), 1U); + ASSERT_TRUE(isa(CBFn->user_back())); + ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U); CheckedBarrier = cast(CBFn->user_back()); Builder.restoreIP( OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true)); - // CBFn = M->getFunction("__kmpc_cancel_barrier"); + CBFn = M->getFunction("__kmpc_cancel_barrier"); BFn = M->getFunction("__kmpc_barrier"); - // EXPECT_NE(CBFn, nullptr); - EXPECT_NE(BFn, nullptr); - EXPECT_EQ(CBFn->getNumUses(), 2U); - EXPECT_EQ(BFn->getNumUses(), 1U); - // EXPECT_TRUE(isa(BFn->user_back())); - EXPECT_EQ(BFn->user_back()->getNumUses(), 0U); + ASSERT_NE(CBFn, nullptr); + ASSERT_NE(BFn, nullptr); + ASSERT_EQ(CBFn->getNumUses(), 1U); + ASSERT_EQ(BFn->getNumUses(), 1U); + ASSERT_TRUE(isa(BFn->user_back())); + ASSERT_EQ(BFn->user_back()->getNumUses(), 0U); Builder.restoreIP(OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, false, false)); - EXPECT_EQ(CBFn->getNumUses(), 3U); - EXPECT_EQ(BFn->getNumUses(), 1U); - // EXPECT_TRUE(CBFn->user_back() != CheckedBarrier); - EXPECT_TRUE(isa(CBFn->user_back())); - EXPECT_EQ(CBFn->user_back()->getNumUses(), 0U); + ASSERT_EQ(CBFn->getNumUses(), 2U); + ASSERT_EQ(BFn->getNumUses(), 1U); + ASSERT_TRUE(CBFn->user_back() != CheckedBarrier); + ASSERT_TRUE(isa(CBFn->user_back())); + ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U); }; auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &, @@ -1127,25 +1126,6 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { OMPBuilder.finalize(); EXPECT_FALSE(verifyModule(*M, &errs())); -#if 0 - BasicBlock *ExitBB = nullptr; - for (const User *Usr : FakeDestructor->users()) { - const CallInst *CI = cast(Usr); - EXPECT_EQ(CI->getCalledFunction(), FakeDestructor); - EXPECT_TRUE(isa(CI->getNextNode())); - EXPECT_EQ(CI->getNextNode()->getNumSuccessors(), 1U); - if (ExitBB) - EXPECT_EQ(CI->getNextNode()->getSuccessor(0)->getSingleSuccessor(), ExitBB); - else - ExitBB = CI->getNextNode()->getSuccessor(0)->getUniqueSuccessor(); - EXPECT_EQ(ExitBB->size(), 1U); - if (!isa(ExitBB->front())) { - EXPECT_TRUE(isa(ExitBB->front())); - EXPECT_EQ(cast(ExitBB->front()).getNumSuccessors(), 1U); - EXPECT_TRUE(isa(cast(ExitBB->front()).getSuccessor(0)->front())); - } - } -#endif } TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { From db7ea752a35ed2c0803ac777aea7b205d14975cf Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 28 Apr 2022 15:39:25 -0500 Subject: [PATCH 10/50] part fix sections --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 2 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 45 ++++++++++++++----- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index af75f17ce2340..1469f778f7ff0 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -179,7 +179,7 @@ class OpenMPIRBuilder { // void emitRegionExit(InsertPointTy ExitingIP, OMPRegionInfo *RegionToLeave, omp::Directive LeaveReason = omp::OMPD_unknown); - void popRegion(omp::Directive DK); + void popRegion(OMPRegionInfo*R, BasicBlock *ContinueBB, LeaveRegionCallbackTy &LeaveCb); /// Return true if the last entry in the finalization stack is of kind \p DK /// and cancellable. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index e97b825433472..bd83b1e99f880 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -722,18 +722,37 @@ void OpenMPIRBuilder::emitRegionExit(InsertPointTy ExitingIP, #endif -void OpenMPIRBuilder::popRegion(omp::Directive DK) { - assert(RegionStack.back()->DK == DK && "unbalanced region push/pop"); - RegionStack.back()->assertOK(); +void OpenMPIRBuilder::popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, LeaveRegionCallbackTy &LeaveCb) { + auto DK = R->DK; + assert(RegionStack.back().get() == R && "balannced region push/pop required"); + R->assertOK(); // Trickly down no yet handled breaks. OMPRegionInfo* Innermost = RegionStack.back().get(); OMPRegionInfo* NewInnermost = RegionStack.rbegin()[1].get(); - for (OMPRegionBreak &B : Innermost->Breaks) { - assert(B.Target != DK && "Should have been handled"); + + for (auto& B : reverse(Innermost->Breaks)) { assert(!B.BB->getTerminator()); - NewInnermost->addBreak(B.BB, B.Reason, B.Target); + Builder.SetInsertPoint(B.BB); + + if (B.Target == DK) { + Builder.SetInsertPoint(B.BB); + BranchInst * TI = Builder.CreateBr(ContinueBB); + if (LeaveCb) + LeaveCb( InsertPointTy(TI->getParent(), TI->getIterator()), B.Reason, Innermost); + B.BB = nullptr; + } else if (LeaveCb) { + B.BB = splitBB(Builder, true, ".fini"); + LeaveCb(Builder.saveIP(), B.Reason, Innermost); + Builder.SetInsertPoint( B.BB); + } + } + + + for (OMPRegionBreak &B : Innermost->Breaks) { + if (B.BB) + NewInnermost->addBreak(B.BB, B.Reason, B.Target); } Innermost->Breaks.clear(); @@ -1250,11 +1269,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); - // TODO: move to utility function + if (FiniCB) { InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); FiniCB(PreFiniIP, OMPD_unknown, ParallelRegion); } +#if 0 for (auto& B : reverse(ParallelRegion->Breaks)) { Builder.SetInsertPoint(B.BB); @@ -1273,8 +1293,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( return !B.BB; }), ParallelRegion->Breaks.end() ); //emitRegionExit(PreFiniIP, ParallelRegion); - - popRegion(omp::OMPD_parallel); +#endif + popRegion(ParallelRegion, PRegExitBB, FiniCB); OI.OuterAllocaBB = OuterAllocaBlock; OI.EntryBB = PRegEntryBB; @@ -1559,10 +1579,15 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( #endif // Instruction *I = Builder.CreateBr(ExitBB); + + Builder.restoreIP(AfterIP); auto Finish = splitBB(Builder, true, "section_finish"); + if (FiniCB) + FiniCB(Builder.saveAndClearIP(), OMPD_unknown, SectionsRegion ); + //emitRegionExit(Builder.saveIP(), SectionsRegion); - popRegion(OMPD_sections); + popRegion(SectionsRegion, Finish, FiniCB); return {Finish, Finish->begin()}; } From d625b12d52777ae94236446987fc8e9de13c5a01 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 28 Apr 2022 15:52:38 -0500 Subject: [PATCH 11/50] combine joining finalizations --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 16 ++++++++++------ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index bd83b1e99f880..57ef8be86f921 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -724,7 +724,7 @@ void OpenMPIRBuilder::emitRegionExit(InsertPointTy ExitingIP, void OpenMPIRBuilder::popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, LeaveRegionCallbackTy &LeaveCb) { auto DK = R->DK; - assert(RegionStack.back().get() == R && "balannced region push/pop required"); + assert(RegionStack.back().get() == R && "balanced region push/pop required"); R->assertOK(); // Trickly down no yet handled breaks. @@ -737,10 +737,11 @@ void OpenMPIRBuilder::popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, LeaveR Builder.SetInsertPoint(B.BB); if (B.Target == DK) { + // Join common finialization block Builder.SetInsertPoint(B.BB); BranchInst * TI = Builder.CreateBr(ContinueBB); - if (LeaveCb) - LeaveCb( InsertPointTy(TI->getParent(), TI->getIterator()), B.Reason, Innermost); + // if (LeaveCb) + // LeaveCb( InsertPointTy(TI->getParent(), TI->getIterator()), B.Reason, Innermost); B.BB = nullptr; } else if (LeaveCb) { B.BB = splitBB(Builder, true, ".fini"); @@ -1294,7 +1295,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( }), ParallelRegion->Breaks.end() ); //emitRegionExit(PreFiniIP, ParallelRegion); #endif - popRegion(ParallelRegion, PRegExitBB, FiniCB); + popRegion(ParallelRegion, PRegPreFiniBB, FiniCB); OI.OuterAllocaBB = OuterAllocaBlock; OI.EntryBB = PRegEntryBB; @@ -1583,8 +1584,11 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( Builder.restoreIP(AfterIP); auto Finish = splitBB(Builder, true, "section_finish"); - if (FiniCB) - FiniCB(Builder.saveAndClearIP(), OMPD_unknown, SectionsRegion ); + if (FiniCB) { + Builder.SetInsertPoint(Finish); + Finish = splitBB(Builder, true, "section_fini"); + FiniCB(Builder.saveAndClearIP(), OMPD_unknown, SectionsRegion); + } //emitRegionExit(Builder.saveIP(), SectionsRegion); popRegion(SectionsRegion, Finish, FiniCB); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index ecac08744ea53..ad16a7813f32f 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1119,7 +1119,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 0U); EXPECT_EQ(NumFinalizationPoints, 2U); - EXPECT_EQ(FakeDestructor->getNumUses(), 2U); + EXPECT_EQ(FakeDestructor->getNumUses(), 1U); Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); From c38dea4c980ca7ebcc2262dfc236429f03e700b6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 28 Apr 2022 15:56:27 -0500 Subject: [PATCH 12/50] unittest fix --- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index ad16a7813f32f..10a8f50ab731e 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1118,7 +1118,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 0U); - EXPECT_EQ(NumFinalizationPoints, 2U); + EXPECT_EQ(NumFinalizationPoints, 1U); EXPECT_EQ(FakeDestructor->getNumUses(), 1U); Builder.restoreIP(AfterIP); From 12d4334f0d2cf969d6883c18cd22f4747c7a5fbe Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 28 Apr 2022 16:22:18 -0500 Subject: [PATCH 13/50] avoid warning --- llvm/include/llvm/Analysis/CFGPrinter.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h index 1bb29af559c28..7afd2c67d8a20 100644 --- a/llvm/include/llvm/Analysis/CFGPrinter.h +++ b/llvm/include/llvm/Analysis/CFGPrinter.h @@ -212,17 +212,17 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { // Process string output to make it nicer... unsigned ColNum = 0; - unsigned LastSpace = 0; + //unsigned LastSpace = 0; for (unsigned i = 0; i != OutStr.length(); ++i) { if (OutStr[i] == '\n') { // Left justify OutStr[i] = '\\'; OutStr.insert(OutStr.begin() + i + 1, 'l'); ColNum = 0; - LastSpace = 0; + // LastSpace = 0; } else if (OutStr[i] == ';') { // Delete comments! unsigned Idx = OutStr.find('\n', i + 1); // Find end of line - HandleComment(OutStr, i, Idx); -#if 0 + HandleComment(OutStr, i, Idx); +#if 0 } else if (ColNum == MaxColumns) { // Wrap lines. // Wrap very long names even though we can't find a space. if (!LastSpace) @@ -235,8 +235,8 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } else ++ColNum; LongCol = std::max(LongCol, ColNum); - if (OutStr[i] == ' ') - LastSpace = i; + // if (OutStr[i] == ' ') + // LastSpace = i; } if (!HandleBasicBlock && CFGInfo && CFGInfo->HighlightBB && !LongestCol) { From c004d84c93f0169fe09add67755c4950019a8c17 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 28 Apr 2022 16:50:12 -0500 Subject: [PATCH 14/50] fix clang crash --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 5 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 14 +- .../Frontend/OpenMPIRBuilderTest.cpp | 201 ------------------ 3 files changed, 12 insertions(+), 208 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 02d0af35aa83e..2be291be45a31 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1208,9 +1208,10 @@ namespace { // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR // Builder if one is present. struct PushAndPopStackRAII { + CodeGenFunction:: CGNonOpenMPIRBuilderRegion NonOMPBuilderScope; PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, bool HasCancel, llvm::omp::Directive Kind) - : OMPBuilder(OMPBuilder) { + : OMPBuilder(OMPBuilder), NonOMPBuilderScope(CGF) { if (!OMPBuilder) return; @@ -1241,7 +1242,7 @@ struct PushAndPopStackRAII { CGF.EmitBranchThroughCleanup(Dest); }; - llvm_unreachable("TODO: set UserManaged=true"); + //llvm_unreachable("TODO: set UserManaged=true"); // TODO: Remove this once we emit parallel regions through the // OpenMPIRBuilder as it can do this setup internally. // llvm::OpenMPIRBuilder::FinalizationInfo FI{{}, Kind, HasCancel, diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index f6b5969414635..2302fd4db81c4 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3834,6 +3834,7 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { return; } + CGNonOpenMPIRBuilderRegion Scope(*this); HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); }; { @@ -4475,6 +4476,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, OMPTaskDataTy &Data) { + CGNonOpenMPIRBuilderRegion NonIrBuilderScope(*this); + // Emit outlined function for task construct. const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); auto I = CS->getCapturedDecl()->param_begin(); @@ -5017,6 +5020,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { + CGNonOpenMPIRBuilderRegion Scope(*this); + // Emit outlined function for task construct. const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); Address CapturedStruct = GenerateCapturedStmtArgument(*CS); @@ -5034,6 +5039,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Check if we should emit tied or untied task. Data.Tied = !S.getSingleClause(); auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { + CGNonOpenMPIRBuilderRegion Scope(CGF); CGF.EmitStmt(CS->getCapturedStmt()); }; auto &&TaskGen = [&S, SharedsTy, CapturedStruct, @@ -6892,6 +6898,8 @@ void CodeGenFunction::EmitOMPCancellationPointDirective( S.getCancelRegion()); } + + void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { const Expr *IfCond = nullptr; for (const auto *C : S.getClausesOfKind()) { @@ -6901,18 +6909,14 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { break; } } - if (CGM.getLangOpts().OpenMPIRBuilder) { + if (CGM.getLangOpts().OpenMPIRBuilder && !IsInsideNonOpenMPIRBuilderHandledRegion) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); - llvm_unreachable("TODO"); - // auto DK = OMPBuilder.getTopmostDirective(); - // if (OMPBuilder.isTopmostBuilderManaged()) { llvm::Value *IfCondition = nullptr; if (IfCond) IfCondition = EvaluateExprAsBool(IfCond); Builder.restoreIP( OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); return; - // } } CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 10a8f50ab731e..446264f84b5d4 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -355,207 +355,6 @@ TEST_F(OpenMPIRBuilderTest, CreateBarrier) { EXPECT_FALSE(verifyModule(*M, &errs())); } -#if 0 -TEST_F(OpenMPIRBuilderTest, CreateCancel) { - using InsertPointTy = OpenMPIRBuilder::InsertPointTy; - OpenMPIRBuilder OMPBuilder(*M); - OMPBuilder.initialize(); - - BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); - new UnreachableInst(Ctx, CBB); - auto FiniCB = [&](InsertPointTy IP) { - ASSERT_NE(IP.getBlock(), nullptr); - ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); - BranchInst::Create(CBB, IP.getBlock()); - }; - OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); - - IRBuilder<> Builder(BB); - - OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); - auto NewIP = OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel); - Builder.restoreIP(NewIP); - EXPECT_FALSE(M->global_empty()); - EXPECT_EQ(M->size(), 4U); - EXPECT_EQ(F->size(), 4U); - EXPECT_EQ(BB->size(), 4U); - - CallInst *GTID = dyn_cast(&BB->front()); - EXPECT_NE(GTID, nullptr); - EXPECT_EQ(GTID->arg_size(), 1U); - EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); - EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); - - CallInst *Cancel = dyn_cast(GTID->getNextNode()); - EXPECT_NE(Cancel, nullptr); - EXPECT_EQ(Cancel->arg_size(), 3U); - EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel"); - EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory()); - EXPECT_EQ(Cancel->getNumUses(), 1U); - Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); - EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); - EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock()); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); - CallInst *GTID1 = dyn_cast(&CancelBBTI->getSuccessor(1)->front()); - EXPECT_NE(GTID1, nullptr); - EXPECT_EQ(GTID1->arg_size(), 1U); - EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); - EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); - CallInst *Barrier = dyn_cast(GTID1->getNextNode()); - EXPECT_NE(Barrier, nullptr); - EXPECT_EQ(Barrier->arg_size(), 2U); - EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); - EXPECT_EQ(Barrier->getNumUses(), 0U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), - 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); - - EXPECT_EQ(cast(Cancel)->getArgOperand(1), GTID); - - OMPBuilder.popFinalizationCB(); - - Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M, &errs())); -} -#endif - -#if 0 -TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { - using InsertPointTy = OpenMPIRBuilder::InsertPointTy; - OpenMPIRBuilder OMPBuilder(*M); - OMPBuilder.initialize(); - - BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); - new UnreachableInst(Ctx, CBB); - auto FiniCB = [&](InsertPointTy IP) { - ASSERT_NE(IP.getBlock(), nullptr); - ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); - BranchInst::Create(CBB, IP.getBlock()); - }; - OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); - - IRBuilder<> Builder(BB); - - OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); - auto NewIP = OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel); - Builder.restoreIP(NewIP); - EXPECT_FALSE(M->global_empty()); - EXPECT_EQ(M->size(), 4U); - EXPECT_EQ(F->size(), 7U); - EXPECT_EQ(BB->size(), 1U); - ASSERT_TRUE(isa(BB->getTerminator())); - ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U); - BB = BB->getTerminator()->getSuccessor(0); - EXPECT_EQ(BB->size(), 4U); - - CallInst *GTID = dyn_cast(&BB->front()); - EXPECT_NE(GTID, nullptr); - EXPECT_EQ(GTID->arg_size(), 1U); - EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); - EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); - - CallInst *Cancel = dyn_cast(GTID->getNextNode()); - EXPECT_NE(Cancel, nullptr); - EXPECT_EQ(Cancel->arg_size(), 3U); - EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel"); - EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory()); - EXPECT_EQ(Cancel->getNumUses(), 1U); - Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); - EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); - EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), - NewIP.getBlock()); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); - CallInst *GTID1 = dyn_cast(&CancelBBTI->getSuccessor(1)->front()); - EXPECT_NE(GTID1, nullptr); - EXPECT_EQ(GTID1->arg_size(), 1U); - EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); - EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); - CallInst *Barrier = dyn_cast(GTID1->getNextNode()); - EXPECT_NE(Barrier, nullptr); - EXPECT_EQ(Barrier->arg_size(), 2U); - EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); - EXPECT_EQ(Barrier->getNumUses(), 0U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), - 1U); - EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); - - EXPECT_EQ(cast(Cancel)->getArgOperand(1), GTID); - - OMPBuilder.popFinalizationCB(); - - Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M, &errs())); -} -#endif - -#if 0 -TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { - using InsertPointTy = OpenMPIRBuilder::InsertPointTy; - OpenMPIRBuilder OMPBuilder(*M); - OMPBuilder.initialize(); - - BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); - new UnreachableInst(Ctx, CBB); - auto FiniCB = [&](InsertPointTy IP) { - ASSERT_NE(IP.getBlock(), nullptr); - ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); - BranchInst::Create(CBB, IP.getBlock()); - }; - OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); - - IRBuilder<> Builder(BB); - - OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); - auto NewIP = OMPBuilder.createBarrier(Loc, OMPD_for); - Builder.restoreIP(NewIP); - EXPECT_FALSE(M->global_empty()); - EXPECT_EQ(M->size(), 3U); - EXPECT_EQ(F->size(), 4U); - EXPECT_EQ(BB->size(), 4U); - - CallInst *GTID = dyn_cast(&BB->front()); - EXPECT_NE(GTID, nullptr); - EXPECT_EQ(GTID->arg_size(), 1U); - EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); - EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); - - CallInst *Barrier = dyn_cast(GTID->getNextNode()); - EXPECT_NE(Barrier, nullptr); - EXPECT_EQ(Barrier->arg_size(), 2U); - EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); - EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); - EXPECT_EQ(Barrier->getNumUses(), 1U); - Instruction *BarrierBBTI = Barrier->getParent()->getTerminator(); - EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U); - EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock()); - EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U); - EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), - 1U); - EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), - CBB); - - EXPECT_EQ(cast(Barrier)->getArgOperand(1), GTID); - - OMPBuilder.popFinalizationCB(); - - Builder.CreateUnreachable(); - EXPECT_FALSE(verifyModule(*M, &errs())); -} -#endif - TEST_F(OpenMPIRBuilderTest, DbgLoc) { OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize(); From 23627ec567d96462b1ef4e70b2b7b37c26b749d3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 29 Apr 2022 01:47:17 -0500 Subject: [PATCH 15/50] fix some tests --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 2 - .../irbuilder_nested_openmp_parallel_empty.c | 36 +-- .../OpenMP/irbuilder_nested_parallel_for.c | 280 +++++++++--------- ...ilder_unroll_partial_factor_for_collapse.c | 12 +- ...er_unroll_partial_heuristic_for_collapse.c | 12 +- 5 files changed, 170 insertions(+), 172 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 2be291be45a31..8332ef774d29f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1215,8 +1215,6 @@ struct PushAndPopStackRAII { if (!OMPBuilder) return; - // auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {}; - // The following callback is the crucial part of clangs cleanup process. // // NOTE: diff --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c index 3c8d3dd3127aa..2c2dce00ae717 100644 --- a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c +++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c @@ -16,8 +16,8 @@ // ALL-NEXT: br label [[OMP_PARALLEL:%.*]] // ALL: omp_parallel: // ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z17nested_parallel_0v..omp_par.1 to void (i32*, i32*, ...)*)) -// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT12:%.*]] -// ALL: omp.par.outlined.exit12: +// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT13:%.*]] +// ALL: omp.par.outlined.exit13: // ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // ALL: omp.par.exit.split: // ALL-NEXT: ret void @@ -33,7 +33,7 @@ void nested_parallel_0(void) { // ALL-LABEL: @_Z17nested_parallel_1Pfid( // ALL-NEXT: entry: -// ALL-NEXT: [[STRUCTARG14:%.*]] = alloca { i32*, double*, float** }, align 8 +// ALL-NEXT: [[STRUCTARG15:%.*]] = alloca { i32*, double*, float** }, align 8 // ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 @@ -43,15 +43,15 @@ void nested_parallel_0(void) { // ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // ALL-NEXT: br label [[OMP_PARALLEL:%.*]] // ALL: omp_parallel: -// ALL-NEXT: [[GEP_A_ADDR15:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 0 -// ALL-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR15]], align 8 -// ALL-NEXT: [[GEP_B_ADDR16:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 1 -// ALL-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR16]], align 8 -// ALL-NEXT: [[GEP_R_ADDR17:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 2 -// ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR17]], align 8 -// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG14]]) -// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT13:%.*]] -// ALL: omp.par.outlined.exit13: +// ALL-NEXT: [[GEP_A_ADDR16:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG15]], i32 0, i32 0 +// ALL-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR16]], align 8 +// ALL-NEXT: [[GEP_B_ADDR17:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG15]], i32 0, i32 1 +// ALL-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR17]], align 8 +// ALL-NEXT: [[GEP_R_ADDR18:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG15]], i32 0, i32 2 +// ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR18]], align 8 +// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG15]]) +// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT14:%.*]] +// ALL: omp.par.outlined.exit14: // ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // ALL: omp.par.exit.split: // ALL-NEXT: ret void @@ -85,17 +85,17 @@ void nested_parallel_1(float *r, int a, double b) { // ALL-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2 // ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8 // ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]]) -// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT55:%.*]] -// ALL: omp.par.outlined.exit55: +// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT58:%.*]] +// ALL: omp.par.outlined.exit58: // ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // ALL: omp.par.exit.split: // ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// ALL-NEXT: [[CONV56:%.*]] = sitofp i32 [[TMP0]] to double +// ALL-NEXT: [[CONV59:%.*]] = sitofp i32 [[TMP0]] to double // ALL-NEXT: [[TMP1:%.*]] = load double, double* [[B_ADDR]], align 8 -// ALL-NEXT: [[ADD57:%.*]] = fadd double [[CONV56]], [[TMP1]] -// ALL-NEXT: [[CONV58:%.*]] = fptrunc double [[ADD57]] to float +// ALL-NEXT: [[ADD60:%.*]] = fadd double [[CONV59]], [[TMP1]] +// ALL-NEXT: [[CONV61:%.*]] = fptrunc double [[ADD60]] to float // ALL-NEXT: [[TMP2:%.*]] = load float*, float** [[R_ADDR]], align 8 -// ALL-NEXT: store float [[CONV58]], float* [[TMP2]], align 4 +// ALL-NEXT: store float [[CONV61]], float* [[TMP2]], align 4 // ALL-NEXT: ret void // void nested_parallel_2(float *r, int a, double b) { diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c index b7c3f98aa0f9a..6477306064140 100644 --- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -44,7 +44,7 @@ void parallel_for_0(void) { // CHECK-LABEL: @_Z14parallel_for_1Pfid( // CHECK-NEXT: entry: -// CHECK-NEXT: [[STRUCTARG17:%.*]] = alloca { i32*, double*, float** }, align 8 +// CHECK-NEXT: [[STRUCTARG18:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 @@ -54,46 +54,46 @@ void parallel_for_0(void) { // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK: omp_parallel: -// CHECK-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0 -// CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8 -// CHECK-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1 -// CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8 -// CHECK-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2 -// CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8 -// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG17]]) -// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]] -// CHECK: omp.par.outlined.exit16: +// CHECK-NEXT: [[GEP_A_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG18]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR19]], align 8 +// CHECK-NEXT: [[GEP_B_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG18]], i32 0, i32 1 +// CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR20]], align 8 +// CHECK-NEXT: [[GEP_R_ADDR21:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG18]], i32 0, i32 2 +// CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR21]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG18]]) +// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT17:%.*]] +// CHECK: omp.par.outlined.exit17: // CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK: omp.par.exit.split: // CHECK-NEXT: ret void // // CHECK-DEBUG-LABEL: @_Z14parallel_for_1Pfid( // CHECK-DEBUG-NEXT: entry: -// CHECK-DEBUG-NEXT: [[STRUCTARG17:%.*]] = alloca { i32*, double*, float** }, align 8 +// CHECK-DEBUG-NEXT: [[STRUCTARG18:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 // CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]] // CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] // CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG78:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG79:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1 -// CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2 -// CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8 -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG17]]), !dbg [[DBG79:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]] -// CHECK-DEBUG: omp.par.outlined.exit16: +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG18]], i32 0, i32 0 +// CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR19]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_B_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG18]], i32 0, i32 1 +// CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR20]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR21:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG18]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR21]], align 8 +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG18]]), !dbg [[DBG80:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT17:%.*]] +// CHECK-DEBUG: omp.par.outlined.exit17: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG81:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG82:![0-9]+]] // void parallel_for_1(float *r, int a, double b) { #pragma omp parallel @@ -114,14 +114,14 @@ void parallel_for_1(float *r, int a, double b) { // CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[I185:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AGG_CAPTURED186:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 -// CHECK-NEXT: [[AGG_CAPTURED187:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 -// CHECK-NEXT: [[DOTCOUNT_ADDR188:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER203:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND204:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I188:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED189:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED190:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR191:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER206:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND207:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND208:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE209:%.*]] = alloca i32, align 4 // CHECK-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 // CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 // CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 @@ -135,57 +135,57 @@ void parallel_for_1(float *r, int a, double b) { // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2 // CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8 // CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]]) -// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]] -// CHECK: omp.par.outlined.exit184: +// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT187:%.*]] +// CHECK: omp.par.outlined.exit187: // CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK: omp.par.exit.split: -// CHECK-NEXT: store i32 0, i32* [[I185]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0 -// CHECK-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4 +// CHECK-NEXT: store i32 0, i32* [[I188]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED189]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[I188]], i32** [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED190]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I188]], align 4 // CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]) -// CHECK-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]] -// CHECK: omp_loop.preheader190: -// CHECK-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1 -// CHECK-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4 -// CHECK-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4 -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 0) -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4 +// CHECK-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR191]], %struct.anon.17* [[AGG_CAPTURED189]]) +// CHECK-NEXT: [[DOTCOUNT192:%.*]] = load i32, i32* [[DOTCOUNT_ADDR191]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER193:%.*]] +// CHECK: omp_loop.preheader193: +// CHECK-NEXT: store i32 0, i32* [[P_LOWERBOUND207]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT192]], 1 +// CHECK-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND208]], align 4 +// CHECK-NEXT: store i32 1, i32* [[P_STRIDE209]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM210:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM210]], i32 34, i32* [[P_LASTITER206]], i32* [[P_LOWERBOUND207]], i32* [[P_UPPERBOUND208]], i32* [[P_STRIDE209]], i32 1, i32 0) +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND207]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND208]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]] // CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER191:%.*]] -// CHECK: omp_loop.header191: -// CHECK-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ] -// CHECK-NEXT: br label [[OMP_LOOP_COND192:%.*]] -// CHECK: omp_loop.cond192: -// CHECK-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]] -// CHECK-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]] -// CHECK: omp_loop.body193: -// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]] -// CHECK-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]) +// CHECK-NEXT: br label [[OMP_LOOP_HEADER194:%.*]] +// CHECK: omp_loop.header194: +// CHECK-NEXT: [[OMP_LOOP_IV200:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER193]] ], [ [[OMP_LOOP_NEXT202:%.*]], [[OMP_LOOP_INC197:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND195:%.*]] +// CHECK: omp_loop.cond195: +// CHECK-NEXT: [[OMP_LOOP_CMP201:%.*]] = icmp ult i32 [[OMP_LOOP_IV200]], [[TMP7]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP201]], label [[OMP_LOOP_BODY196:%.*]], label [[OMP_LOOP_EXIT198:%.*]] +// CHECK: omp_loop.body196: +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV200]], [[TMP4]] +// CHECK-NEXT: call void @__captured_stmt.20(i32* [[I188]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED190]]) // CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double +// CHECK-NEXT: [[CONV203:%.*]] = sitofp i32 [[TMP9]] to double // CHECK-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8 -// CHECK-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]] -// CHECK-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float +// CHECK-NEXT: [[ADD204:%.*]] = fadd double [[CONV203]], [[TMP10]] +// CHECK-NEXT: [[CONV205:%.*]] = fptrunc double [[ADD204]] to float // CHECK-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8 -// CHECK-NEXT: store float [[CONV202]], float* [[TMP11]], align 4 -// CHECK-NEXT: br label [[OMP_LOOP_INC194]] -// CHECK: omp_loop.inc194: -// CHECK-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1 -// CHECK-NEXT: br label [[OMP_LOOP_HEADER191]] -// CHECK: omp_loop.exit195: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM207]]) -// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]) -// CHECK-NEXT: br label [[OMP_LOOP_AFTER196:%.*]] -// CHECK: omp_loop.after196: +// CHECK-NEXT: store float [[CONV205]], float* [[TMP11]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC197]] +// CHECK: omp_loop.inc197: +// CHECK-NEXT: [[OMP_LOOP_NEXT202]] = add nuw i32 [[OMP_LOOP_IV200]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER194]] +// CHECK: omp_loop.exit198: +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM210]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM211:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM211]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER199:%.*]] +// CHECK: omp_loop.after199: // CHECK-NEXT: ret void // // CHECK-DEBUG-LABEL: @_Z14parallel_for_2Pfid( @@ -194,21 +194,21 @@ void parallel_for_1(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 -// CHECK-DEBUG-NEXT: [[I185:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED186:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 -// CHECK-DEBUG-NEXT: [[AGG_CAPTURED187:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 -// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR188:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER203:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND204:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[I188:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED189:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 +// CHECK-DEBUG-NEXT: [[AGG_CAPTURED190:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 +// CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR191:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LASTITER206:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND207:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND208:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE209:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]] // CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]] // CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG140:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 0 @@ -217,60 +217,60 @@ void parallel_for_1(float *r, int a, double b) { // CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]] -// CHECK-DEBUG: omp.par.outlined.exit184: +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]]), !dbg [[DBG141:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT187:%.*]] +// CHECK-DEBUG: omp.par.outlined.exit187: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, i32* [[I185]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG149:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.preheader190: -// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.header191: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.cond192: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.body193: -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG150:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.inc194: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.exit195: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]] -// CHECK-DEBUG: omp_loop.after196: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I188]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 0, i32* [[I188]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED189]], i32 0, i32 0, !dbg [[DBG149:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32* [[I188]], i32** [[TMP0]], align 8, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED190]], i32 0, i32 0, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I188]], align 4, !dbg [[DBG150:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR191]], %struct.anon.17* [[AGG_CAPTURED189]]), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT192:%.*]] = load i32, i32* [[DOTCOUNT_ADDR191]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER193:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.preheader193: +// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND207]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT192]], 1, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND208]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE209]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM210:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM210]], i32 34, i32* [[P_LASTITER206]], i32* [[P_LOWERBOUND207]], i32* [[P_UPPERBOUND208]], i32* [[P_STRIDE209]], i32 1, i32 0), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND207]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND208]], align 4, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER194:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.header194: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV200:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER193]] ], [ [[OMP_LOOP_NEXT202:%.*]], [[OMP_LOOP_INC197:%.*]] ], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND195:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.cond195: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP201:%.*]] = icmp ult i32 [[OMP_LOOP_IV200]], [[TMP7]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP201]], label [[OMP_LOOP_BODY196:%.*]], label [[OMP_LOOP_EXIT198:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.body196: +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV200]], [[TMP4]], !dbg [[DBG151:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I188]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED190]]), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG152:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV203:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG152]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: [[ADD204:%.*]] = fadd double [[CONV203]], [[TMP10]], !dbg [[DBG153:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV205:%.*]] = fptrunc double [[ADD204]] to float, !dbg [[DBG152]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG154:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV205]], float* [[TMP11]], align 4, !dbg [[DBG155:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC197]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.inc197: +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT202]] = add nuw i32 [[OMP_LOOP_IV200]], 1, !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER194]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.exit198: +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM210]]), !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM211:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM211]]), !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER199:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG: omp_loop.after199: +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG156:![0-9]+]] // void parallel_for_2(float *r, int a, double b) { #pragma omp parallel diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c index 0bfed911077bf..bc63ff5a04362 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c @@ -54,8 +54,8 @@ // CHECK-NEXT: store i64 %[[TMP3]], i64* %[[DOTOMP_UB]], align 8 // CHECK-NEXT: store i64 1, i64* %[[DOTOMP_STRIDE]], align 8 // CHECK-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @3) -// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:.+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:.+]], i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1) // CHECK-NEXT: %[[TMP4:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8 // CHECK-NEXT: %[[TMP5:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8 // CHECK-NEXT: %[[CMP8:.+]] = icmp sgt i64 %[[TMP4]], %[[TMP5]] @@ -171,13 +171,13 @@ // CHECK-NEXT: br label %[[OMP_LOOP_EXIT:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM38:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @5) -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM38]]) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM38:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:.+]]) +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 %[[OMP_GLOBAL_THREAD_NUM38]]) // CHECK-NEXT: br label %[[OMP_PRECOND_END]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_PRECOND_END]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM39:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @7) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @6, i32 %[[OMP_GLOBAL_THREAD_NUM39]]) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM39:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB6:.+]], i32 %[[OMP_GLOBAL_THREAD_NUM39]]) // CHECK-NEXT: ret void // CHECK-NEXT: } void unroll_partial_factor_for_collapse(int m, float *a, float *b, float *c, float *d) { diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c index 1a2bd117bf98c..2819eaf1f02cf 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c @@ -62,8 +62,8 @@ double sind(double); // CHECK-NEXT: store i64 %[[TMP3]], i64* %[[DOTOMP_UB]], align 8 // CHECK-NEXT: store i64 1, i64* %[[DOTOMP_STRIDE]], align 8 // CHECK-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @3) -// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:.+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:.+]], i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1) // CHECK-NEXT: %[[TMP4:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8 // CHECK-NEXT: %[[TMP5:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8 // CHECK-NEXT: %[[CMP8:.+]] = icmp sgt i64 %[[TMP4]], %[[TMP5]] @@ -195,13 +195,13 @@ double sind(double); // CHECK-NEXT: br label %[[OMP_LOOP_EXIT:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM49:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @5) -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM49:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:.+]]) +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 %[[OMP_GLOBAL_THREAD_NUM49]]) // CHECK-NEXT: br label %[[OMP_PRECOND_END]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_PRECOND_END]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM50:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @7) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @6, i32 %[[OMP_GLOBAL_THREAD_NUM50]]) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM50:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB6:.+]], i32 %[[OMP_GLOBAL_THREAD_NUM50]]) // CHECK-NEXT: ret void // CHECK-NEXT: } From 5f5340346dc52b3b43127016b4bd5880ad60a3e1 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 29 Apr 2022 01:58:41 -0500 Subject: [PATCH 16/50] fix other tests --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 24 +- clang/test/OpenMP/cancel_codegen.cpp | 1304 +++++++++-------- ...ilder_unroll_partial_factor_for_collapse.c | 12 +- ...er_unroll_partial_heuristic_for_collapse.c | 12 +- clang/test/OpenMP/parallel_codegen.cpp | 24 +- 5 files changed, 719 insertions(+), 657 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 8332ef774d29f..82dc518b22cfb 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2644,32 +2644,14 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks, bool ForceSimpleCall) { // Check if we should use the OMPBuilder - - // FIXME: The OpenMPIRBuilder finalization stack does not necessarily - // correspond the scope structure expected by CGOpenMPRuntime because until - // OpenMPIRBuilder implementation is complete, some directives will still be - // emitted by OpenMPIRBuilder itself. Note that - // isLastFinalizationInfoCancellable may also be wrong and match the wrong - // level which happen to be the same OpenMPDirectiveKind. - // CGOpenMPRegionInfo* OMPRegionInfo = - // dyn_cast_or_null(CGF.CapturedStmtInfo); - if (auto *IRBuilderRegion = - dyn_cast_or_null(CGF.CapturedStmtInfo)) { - // if (OMPBuilder.isLastFinalizationInfoCancellable(Kind)) { + auto *OMPRegionInfo = + dyn_cast_or_null(CGF.CapturedStmtInfo); + if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { CGF.Builder.restoreIP(OMPBuilder.createBarrier( CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); return; - // } - // FIXME: CGF.CapturedStmtInfo is unreliable when using OpenMPIRBuilder. - // OMPRegionInfo = nullptr; - //} else { - // OMPRegionInfo = - // dyn_cast_or_null(CGF.CapturedStmtInfo); } - auto *OMPRegionInfo = - dyn_cast_or_null(CGF.CapturedStmtInfo); - if (!CGF.HaveInsertPoint()) return; // Build call __kmpc_cancel_barrier(loc, thread_id); diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp index 7cfcd563cf576..1d1bdfd534929 100644 --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -1336,13 +1336,13 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I39:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -1389,8 +1389,12 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case.split: +// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] +// CHECK3: omp_section_loop.body.case.cncl.fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case.cncl: +// CHECK3-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK3: omp_section_loop.body.case.cont: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK3: omp_section_loop.body.case.section.after: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -1405,10 +1409,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK3: omp_section_loop.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK3: omp_section_loop.aftersections.fini: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK3: omp_section_loop.preheader13: +// CHECK3-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK3: section_finish: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE13:%.*]] +// CHECK3: .ompfinalize13: +// CHECK3-NEXT: br label [[SECTION_FINI]] +// CHECK3: section_fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] +// CHECK3: omp_section_loop.preheader14: // CHECK3-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK3-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK3-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -1418,79 +1426,91 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK3-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK3: omp_section_loop.header14: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK3: omp_section_loop.cond15: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK3: omp_section_loop.body16: -// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] +// CHECK3: omp_section_loop.header15: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] +// CHECK3: omp_section_loop.cond16: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] +// CHECK3: omp_section_loop.body17: +// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] // CHECK3-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK3-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ +// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] +// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] // CHECK3-NEXT: ] -// CHECK3: omp_section_loop.body.case23: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) +// CHECK3: omp_section_loop.body.case24: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) // CHECK3-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case23.split: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case23.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body.case25: +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] +// CHECK3: omp_section_loop.body.case24.cncl.fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case24.cncl: +// CHECK3-NEXT: br label [[SECTION_FINI35:%.*]] +// CHECK3: omp_section_loop.body.case24.cont: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case24.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body.case26: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case25.split: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK3: omp_section_loop.body.case25.section.after26: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case25.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body16.sections.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK3: omp_section_loop.inc17: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK3: omp_section_loop.exit18: +// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] +// CHECK3: omp_section_loop.body.case26.cncl.fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case26.cncl: +// CHECK3-NEXT: br label [[SECTION_FINI35]] +// CHECK3: omp_section_loop.body.case26.cont: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] +// CHECK3: omp_section_loop.body.case26.sectionfini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case26.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body17.sections.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC18]] +// CHECK3: omp_section_loop.inc18: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] +// CHECK3: omp_section_loop.exit19: // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK3: omp_section_loop.after19: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK3: omp_section_loop.after19sections.fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] +// CHECK3: omp_section_loop.after20: +// CHECK3-NEXT: br label [[SECTION_FINISH34:%.*]] +// CHECK3: section_finish34: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE36:%.*]] +// CHECK3: .ompfinalize36: +// CHECK3-NEXT: br label [[SECTION_FINI35]] +// CHECK3: section_fini35: // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK3-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK3-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: // CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -1504,29 +1524,23 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK3-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK3-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK3-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK3-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK3-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) // CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK3-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK3: omp_section_loop.body.case.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK3: omp_section_loop.body.case23.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK3: omp_section_loop.body.case25.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -1537,30 +1551,30 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK3-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: omp.precond.end: // CHECK3-NEXT: br label [[CANCEL_CONT]] // CHECK3: cancel.cont: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK3-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK3-NEXT: store i32 0, i32* [[R]], align 4 @@ -1584,50 +1598,54 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.par.region: // CHECK3-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK3: 3: -// CHECK3-NEXT: br label [[TMP4:%.*]] -// CHECK3: 4: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 -// CHECK3-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK3: .cncl5: +// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK3: omp.par.region1: +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK3-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK3-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK3-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] +// CHECK3: omp.par.region1.cncl.fini: +// CHECK3-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK3: omp.par.region1.cncl: +// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK3: omp.par.pre_finalize: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3: .ompfinalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK3: .cont: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// CHECK3: omp.par.region1.cont: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] // CHECK3-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK3-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK3: omp.par.region.parallel.after: -// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK3: omp.par.pre_finalize: -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK3: 14: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK3: .cncl: +// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK3: omp.par.region.if: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK3: .split: -// CHECK3-NEXT: br label [[TMP4]] +// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK3-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] +// CHECK3: omp.par.region.if.cncl.fini: +// CHECK3-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK3: omp.par.region.if.cncl: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK3: omp.par.region.if.cont: +// CHECK3-NEXT: br label [[OMP_PAR_REGION1]] // CHECK3: omp.par.outlined.exit.exitStub: // CHECK3-NEXT: ret void // @@ -1716,14 +1734,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK3: .omp.sections.case.split: +// CHECK3-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3: .cancel.exit: +// CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case.cncl: -// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1732,13 +1750,13 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK3-NEXT: br label [[CANCEL_CONT]] +// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // @@ -1759,7 +1777,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -1780,25 +1798,23 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK3: .omp.sections.case.split: +// CHECK3-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3: .cancel.exit: +// CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case.cncl: -// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.case2: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK3: .omp.sections.case2.split: -// CHECK3-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK3: .omp.sections.case2.section.after: +// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK3: .cancel.exit4: +// CHECK3-NEXT: br label [[CANCEL_EXIT]] +// CHECK3: .cancel.continue5: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case2.cncl: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1807,14 +1823,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK3-NEXT: br label [[CANCEL_CONT]] +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // // @@ -1861,7 +1877,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -1889,7 +1905,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK3-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -1911,14 +1927,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK3-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -1928,10 +1944,10 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.reduction.case2: @@ -1988,13 +2004,13 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I39:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -2041,8 +2057,12 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case.split: +// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] +// CHECK4: omp_section_loop.body.case.cncl.fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case.cncl: +// CHECK4-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK4: omp_section_loop.body.case.cont: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK4: omp_section_loop.body.case.section.after: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -2057,10 +2077,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK4: omp_section_loop.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK4: omp_section_loop.aftersections.fini: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK4: omp_section_loop.preheader13: +// CHECK4-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK4: section_finish: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE13:%.*]] +// CHECK4: .ompfinalize13: +// CHECK4-NEXT: br label [[SECTION_FINI]] +// CHECK4: section_fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] +// CHECK4: omp_section_loop.preheader14: // CHECK4-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK4-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK4-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -2070,79 +2094,91 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK4-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK4: omp_section_loop.header14: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK4: omp_section_loop.cond15: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK4-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK4: omp_section_loop.body16: -// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] +// CHECK4: omp_section_loop.header15: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] +// CHECK4: omp_section_loop.cond16: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] +// CHECK4-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] +// CHECK4: omp_section_loop.body17: +// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] // CHECK4-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK4-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK4-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK4-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK4-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ +// CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] +// CHECK4-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] // CHECK4-NEXT: ] -// CHECK4: omp_section_loop.body.case23: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) +// CHECK4: omp_section_loop.body.case24: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) // CHECK4-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK4-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case23.split: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK4: omp_section_loop.body.case23.section.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK4: omp_section_loop.body.case25: +// CHECK4-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] +// CHECK4: omp_section_loop.body.case24.cncl.fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case24.cncl: +// CHECK4-NEXT: br label [[SECTION_FINI35:%.*]] +// CHECK4: omp_section_loop.body.case24.cont: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case24.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body.case26: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK4-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK4-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case25.split: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK4: omp_section_loop.body.case25.section.after26: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK4: omp_section_loop.body.case25.section.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK4: omp_section_loop.body16.sections.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK4: omp_section_loop.inc17: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK4: omp_section_loop.exit18: +// CHECK4-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] +// CHECK4: omp_section_loop.body.case26.cncl.fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case26.cncl: +// CHECK4-NEXT: br label [[SECTION_FINI35]] +// CHECK4: omp_section_loop.body.case26.cont: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] +// CHECK4: omp_section_loop.body.case26.sectionfini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case26.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body17.sections.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC18]] +// CHECK4: omp_section_loop.inc18: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] +// CHECK4: omp_section_loop.exit19: // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK4: omp_section_loop.after19: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK4: omp_section_loop.after19sections.fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] +// CHECK4: omp_section_loop.after20: +// CHECK4-NEXT: br label [[SECTION_FINISH34:%.*]] +// CHECK4: section_finish34: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE36:%.*]] +// CHECK4: .ompfinalize36: +// CHECK4-NEXT: br label [[SECTION_FINI35]] +// CHECK4: section_fini35: // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK4-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK4-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK4-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK4-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK4-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: // CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -2156,29 +2192,23 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.inner.for.cond: // CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK4-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK4-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK4-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK4-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 // CHECK4-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK4-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK4-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK4-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK4-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) // CHECK4-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK4-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK4: .cancel.exit: // CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK4: omp_section_loop.body.case.cncl: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK4: omp_section_loop.body.case23.cncl: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK4: omp_section_loop.body.case25.cncl: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -2189,30 +2219,30 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: // CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK4-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK4-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: omp.precond.end: // CHECK4-NEXT: br label [[CANCEL_CONT]] // CHECK4: cancel.cont: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK4-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK4-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK4-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK4-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK4-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK4-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK4-NEXT: store i32 0, i32* [[R]], align 4 @@ -2236,50 +2266,54 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.par.region: // CHECK4-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK4-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK4-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK4: 3: -// CHECK4-NEXT: br label [[TMP4:%.*]] -// CHECK4: 4: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK4-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 -// CHECK4-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK4-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK4-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK4: .cncl5: +// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK4: omp.par.region1: +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK4-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK4-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK4-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK4-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] +// CHECK4: omp.par.region1.cncl.fini: +// CHECK4-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK4: omp.par.region1.cncl: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK4: omp.par.pre_finalize: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4: .ompfinalize: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK4: .cont: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK4-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK4-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// CHECK4: omp.par.region1.cont: +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK4-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] // CHECK4-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK4-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK4: omp.par.region.parallel.after: -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK4: 14: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK4-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK4-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK4: .cncl: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK4: omp.par.region.if: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK4: .split: -// CHECK4-NEXT: br label [[TMP4]] +// CHECK4-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK4-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] +// CHECK4: omp.par.region.if.cncl.fini: +// CHECK4-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK4: omp.par.region.if.cncl: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK4: omp.par.region.if.cont: +// CHECK4-NEXT: br label [[OMP_PAR_REGION1]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // @@ -2368,14 +2402,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK4: .omp.sections.case.split: +// CHECK4-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK4: .cancel.exit: +// CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case.cncl: -// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: @@ -2384,13 +2418,13 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK4-NEXT: br label [[CANCEL_CONT]] +// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: cancel.cont: // CHECK4-NEXT: ret void // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK4-NEXT: br label [[CANCEL_CONT]] // @@ -2411,7 +2445,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -2432,25 +2466,23 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK4: .omp.sections.case.split: +// CHECK4-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK4: .cancel.exit: +// CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case.cncl: -// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.sections.case2: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK4-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK4-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK4-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK4: .omp.sections.case2.split: -// CHECK4-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK4: .omp.sections.case2.section.after: +// CHECK4-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK4-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK4: .cancel.exit4: +// CHECK4-NEXT: br label [[CANCEL_EXIT]] +// CHECK4: .cancel.continue5: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case2.cncl: -// CHECK4-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: @@ -2459,14 +2491,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK4-NEXT: br label [[CANCEL_CONT]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: cancel.cont: // CHECK4-NEXT: ret void // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK4-NEXT: br label [[CANCEL_CONT]] // // @@ -2513,7 +2545,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -2541,7 +2573,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK4-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK4-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -2563,14 +2595,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK4-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK4-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK4-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2580,10 +2612,10 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK4-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.reduction.case2: @@ -3880,13 +3912,13 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I39:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -3933,8 +3965,12 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case.split: +// CHECK9-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] +// CHECK9: omp_section_loop.body.case.cncl.fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case.cncl: +// CHECK9-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK9: omp_section_loop.body.case.cont: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK9: omp_section_loop.body.case.section.after: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -3949,10 +3985,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK9: omp_section_loop.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK9: omp_section_loop.aftersections.fini: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK9: omp_section_loop.preheader13: +// CHECK9-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK9: section_finish: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE13:%.*]] +// CHECK9: .ompfinalize13: +// CHECK9-NEXT: br label [[SECTION_FINI]] +// CHECK9: section_fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] +// CHECK9: omp_section_loop.preheader14: // CHECK9-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK9-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK9-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -3962,79 +4002,91 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK9-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK9: omp_section_loop.header14: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK9: omp_section_loop.cond15: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK9-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK9: omp_section_loop.body16: -// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] +// CHECK9: omp_section_loop.header15: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] +// CHECK9: omp_section_loop.cond16: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] +// CHECK9-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] +// CHECK9: omp_section_loop.body17: +// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] // CHECK9-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK9-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK9-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK9-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK9-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ +// CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] +// CHECK9-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] // CHECK9-NEXT: ] -// CHECK9: omp_section_loop.body.case23: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) +// CHECK9: omp_section_loop.body.case24: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) // CHECK9-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case23.split: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK9: omp_section_loop.body.case23.section.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK9: omp_section_loop.body.case25: +// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] +// CHECK9: omp_section_loop.body.case24.cncl.fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case24.cncl: +// CHECK9-NEXT: br label [[SECTION_FINI35:%.*]] +// CHECK9: omp_section_loop.body.case24.cont: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case24.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body.case26: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK9-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case25.split: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK9: omp_section_loop.body.case25.section.after26: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK9: omp_section_loop.body.case25.section.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK9: omp_section_loop.body16.sections.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK9: omp_section_loop.inc17: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK9: omp_section_loop.exit18: +// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] +// CHECK9: omp_section_loop.body.case26.cncl.fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case26.cncl: +// CHECK9-NEXT: br label [[SECTION_FINI35]] +// CHECK9: omp_section_loop.body.case26.cont: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] +// CHECK9: omp_section_loop.body.case26.sectionfini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case26.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body17.sections.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC18]] +// CHECK9: omp_section_loop.inc18: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] +// CHECK9: omp_section_loop.exit19: // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK9: omp_section_loop.after19: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK9: omp_section_loop.after19sections.fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] +// CHECK9: omp_section_loop.after20: +// CHECK9-NEXT: br label [[SECTION_FINISH34:%.*]] +// CHECK9: section_finish34: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE36:%.*]] +// CHECK9: .ompfinalize36: +// CHECK9-NEXT: br label [[SECTION_FINI35]] +// CHECK9: section_fini35: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK9-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK9-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK9-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -4048,29 +4100,23 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK9-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK9-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK9-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK9-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK9-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) // CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK9-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: // CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK9: omp_section_loop.body.case.cncl: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK9: omp_section_loop.body.case23.cncl: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK9: omp_section_loop.body.case25.cncl: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: @@ -4081,30 +4127,30 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK9-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK9-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: omp.precond.end: // CHECK9-NEXT: br label [[CANCEL_CONT]] // CHECK9: cancel.cont: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK9-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK9-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK9-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK9-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK9-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: store i32 0, i32* [[R]], align 4 @@ -4128,50 +4174,54 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.par.region: // CHECK9-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK9: 3: -// CHECK9-NEXT: br label [[TMP4:%.*]] -// CHECK9: 4: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK9-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 -// CHECK9-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK9-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK9-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK9: .cncl5: +// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK9: omp.par.region1: +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK9-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK9-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK9-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] +// CHECK9: omp.par.region1.cncl.fini: +// CHECK9-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK9: omp.par.region1.cncl: +// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK9: omp.par.pre_finalize: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK9: .ompfinalize: // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK9: .cont: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK9-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK9-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// CHECK9: omp.par.region1.cont: +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK9-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] // CHECK9-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK9-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK9-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK9: omp.par.region.parallel.after: -// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK9: omp.par.pre_finalize: -// CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK9: 14: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK9-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK9: .cncl: +// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK9: omp.par.region.if: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK9: .split: -// CHECK9-NEXT: br label [[TMP4]] +// CHECK9-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK9-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] +// CHECK9: omp.par.region.if.cncl.fini: +// CHECK9-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK9: omp.par.region.if.cncl: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK9: omp.par.region.if.cont: +// CHECK9-NEXT: br label [[OMP_PAR_REGION1]] // CHECK9: omp.par.outlined.exit.exitStub: // CHECK9-NEXT: ret void // @@ -4260,14 +4310,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.sections.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK9: .omp.sections.case.split: +// CHECK9-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9: .cancel.exit: +// CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK9: .omp.sections.case.cncl: -// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.sections.exit: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: @@ -4276,13 +4326,13 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK9-NEXT: br label [[CANCEL_CONT]] +// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // @@ -4303,7 +4353,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -4324,25 +4374,23 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.sections.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK9: .omp.sections.case.split: +// CHECK9-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9: .cancel.exit: +// CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK9: .omp.sections.case.cncl: -// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.sections.case2: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK9-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK9: .omp.sections.case2.split: -// CHECK9-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK9: .omp.sections.case2.section.after: +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK9: .cancel.exit4: +// CHECK9-NEXT: br label [[CANCEL_EXIT]] +// CHECK9: .cancel.continue5: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK9: .omp.sections.case2.cncl: -// CHECK9-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK9: .omp.sections.exit: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: @@ -4351,14 +4399,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK9-NEXT: br label [[CANCEL_CONT]] +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // // @@ -4405,7 +4453,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -4433,7 +4481,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK9-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -4455,14 +4503,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK9-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK9-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK9-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -4472,10 +4520,10 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.reduction.case2: @@ -4532,13 +4580,13 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I39:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK10-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -4585,8 +4633,12 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case.split: +// CHECK10-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] +// CHECK10: omp_section_loop.body.case.cncl.fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case.cncl: +// CHECK10-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK10: omp_section_loop.body.case.cont: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK10: omp_section_loop.body.case.section.after: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -4601,10 +4653,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK10: omp_section_loop.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK10: omp_section_loop.aftersections.fini: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK10: omp_section_loop.preheader13: +// CHECK10-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK10: section_finish: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE13:%.*]] +// CHECK10: .ompfinalize13: +// CHECK10-NEXT: br label [[SECTION_FINI]] +// CHECK10: section_fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] +// CHECK10: omp_section_loop.preheader14: // CHECK10-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK10-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK10-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -4614,79 +4670,91 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK10-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK10: omp_section_loop.header14: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK10: omp_section_loop.cond15: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK10-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK10: omp_section_loop.body16: -// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] +// CHECK10: omp_section_loop.header15: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] +// CHECK10: omp_section_loop.cond16: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] +// CHECK10-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] +// CHECK10: omp_section_loop.body17: +// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] // CHECK10-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK10-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK10-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK10-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK10-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ +// CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] +// CHECK10-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] // CHECK10-NEXT: ] -// CHECK10: omp_section_loop.body.case23: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) +// CHECK10: omp_section_loop.body.case24: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) // CHECK10-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK10-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case23.split: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK10: omp_section_loop.body.case23.section.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK10: omp_section_loop.body.case25: +// CHECK10-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] +// CHECK10: omp_section_loop.body.case24.cncl.fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case24.cncl: +// CHECK10-NEXT: br label [[SECTION_FINI35:%.*]] +// CHECK10: omp_section_loop.body.case24.cont: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case24.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body.case26: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK10-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK10-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case25.split: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK10: omp_section_loop.body.case25.section.after26: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK10: omp_section_loop.body.case25.section.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK10: omp_section_loop.body16.sections.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK10: omp_section_loop.inc17: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK10: omp_section_loop.exit18: +// CHECK10-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] +// CHECK10: omp_section_loop.body.case26.cncl.fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case26.cncl: +// CHECK10-NEXT: br label [[SECTION_FINI35]] +// CHECK10: omp_section_loop.body.case26.cont: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] +// CHECK10: omp_section_loop.body.case26.sectionfini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case26.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body17.sections.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC18]] +// CHECK10: omp_section_loop.inc18: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] +// CHECK10: omp_section_loop.exit19: // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK10: omp_section_loop.after19: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK10: omp_section_loop.after19sections.fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] +// CHECK10: omp_section_loop.after20: +// CHECK10-NEXT: br label [[SECTION_FINISH34:%.*]] +// CHECK10: section_finish34: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE36:%.*]] +// CHECK10: .ompfinalize36: +// CHECK10-NEXT: br label [[SECTION_FINI35]] +// CHECK10: section_fini35: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK10-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK10-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK10-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK10-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK10-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK10-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK10-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK10-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -4700,29 +4768,23 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK10-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK10-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK10-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] +// CHECK10-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 // CHECK10-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK10-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK10-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK10-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK10-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) // CHECK10-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK10-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK10: .cancel.exit: // CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK10: omp_section_loop.body.case.cncl: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK10: omp_section_loop.body.case23.cncl: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK10: omp_section_loop.body.case25.cncl: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: @@ -4733,30 +4795,30 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK10-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK10-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: omp.precond.end: // CHECK10-NEXT: br label [[CANCEL_CONT]] // CHECK10: cancel.cont: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK10-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK10-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK10-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK10-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK10-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK10-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: store i32 0, i32* [[R]], align 4 @@ -4780,50 +4842,54 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.par.region: // CHECK10-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK10-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK10: 3: -// CHECK10-NEXT: br label [[TMP4:%.*]] -// CHECK10: 4: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK10-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK10-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 -// CHECK10-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK10-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK10-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK10: .cncl5: +// CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK10: omp.par.region1: +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK10-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK10-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK10-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK10-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK10-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] +// CHECK10: omp.par.region1.cncl.fini: +// CHECK10-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK10: omp.par.region1.cncl: +// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK10: omp.par.pre_finalize: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK10: .ompfinalize: // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK10: .cont: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK10-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK10-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK10-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// CHECK10: omp.par.region1.cont: +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK10-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK10-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] // CHECK10-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK10-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK10-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK10: omp.par.region.parallel.after: -// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK10: omp.par.pre_finalize: -// CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK10: 14: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK10-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK10-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK10: .cncl: +// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK10: omp.par.region.if: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK10: .split: -// CHECK10-NEXT: br label [[TMP4]] +// CHECK10-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK10-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK10-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] +// CHECK10: omp.par.region.if.cncl.fini: +// CHECK10-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK10: omp.par.region.if.cncl: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK10: omp.par.region.if.cont: +// CHECK10-NEXT: br label [[OMP_PAR_REGION1]] // CHECK10: omp.par.outlined.exit.exitStub: // CHECK10-NEXT: ret void // @@ -4912,14 +4978,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK10-NEXT: ] // CHECK10: .omp.sections.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK10: .omp.sections.case.split: +// CHECK10-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK10: .cancel.exit: +// CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK10: .omp.sections.case.cncl: -// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.sections.exit: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: @@ -4928,13 +4994,13 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK10-NEXT: br label [[CANCEL_CONT]] +// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: cancel.cont: // CHECK10-NEXT: ret void // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK10-NEXT: br label [[CANCEL_CONT]] // @@ -4955,7 +5021,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -4976,25 +5042,23 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK10-NEXT: ] // CHECK10: .omp.sections.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK10: .omp.sections.case.split: +// CHECK10-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK10: .cancel.exit: +// CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK10: .omp.sections.case.cncl: -// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.sections.case2: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK10-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK10-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK10-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK10: .omp.sections.case2.split: -// CHECK10-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK10: .omp.sections.case2.section.after: +// CHECK10-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK10-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK10: .cancel.exit4: +// CHECK10-NEXT: br label [[CANCEL_EXIT]] +// CHECK10: .cancel.continue5: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK10: .omp.sections.case2.cncl: -// CHECK10-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK10: .omp.sections.exit: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: @@ -5003,14 +5067,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK10-NEXT: br label [[CANCEL_CONT]] +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: cancel.cont: // CHECK10-NEXT: ret void // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK10-NEXT: br label [[CANCEL_CONT]] // // @@ -5057,7 +5121,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK10-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -5085,7 +5149,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK10-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK10-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK10-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -5107,14 +5171,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK10-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK10-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK10-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK10-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK10-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK10-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK10-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK10-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK10-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -5124,10 +5188,10 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK10-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK10-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK10-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK10-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK10-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.reduction.case2: diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c index bc63ff5a04362..0bfed911077bf 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c @@ -54,8 +54,8 @@ // CHECK-NEXT: store i64 %[[TMP3]], i64* %[[DOTOMP_UB]], align 8 // CHECK-NEXT: store i64 1, i64* %[[DOTOMP_STRIDE]], align 8 // CHECK-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:.+]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:.+]], i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @3) +// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1) // CHECK-NEXT: %[[TMP4:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8 // CHECK-NEXT: %[[TMP5:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8 // CHECK-NEXT: %[[CMP8:.+]] = icmp sgt i64 %[[TMP4]], %[[TMP5]] @@ -171,13 +171,13 @@ // CHECK-NEXT: br label %[[OMP_LOOP_EXIT:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM38:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:.+]]) -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 %[[OMP_GLOBAL_THREAD_NUM38]]) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM38:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @5) +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM38]]) // CHECK-NEXT: br label %[[OMP_PRECOND_END]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_PRECOND_END]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM39:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB6:.+]], i32 %[[OMP_GLOBAL_THREAD_NUM39]]) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM39:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @7) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @6, i32 %[[OMP_GLOBAL_THREAD_NUM39]]) // CHECK-NEXT: ret void // CHECK-NEXT: } void unroll_partial_factor_for_collapse(int m, float *a, float *b, float *c, float *d) { diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c index 2819eaf1f02cf..1a2bd117bf98c 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c @@ -62,8 +62,8 @@ double sind(double); // CHECK-NEXT: store i64 %[[TMP3]], i64* %[[DOTOMP_UB]], align 8 // CHECK-NEXT: store i64 1, i64* %[[DOTOMP_STRIDE]], align 8 // CHECK-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:.+]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:.+]], i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @3) +// CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1) // CHECK-NEXT: %[[TMP4:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8 // CHECK-NEXT: %[[TMP5:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8 // CHECK-NEXT: %[[CMP8:.+]] = icmp sgt i64 %[[TMP4]], %[[TMP5]] @@ -195,13 +195,13 @@ double sind(double); // CHECK-NEXT: br label %[[OMP_LOOP_EXIT:.+]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM49:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:.+]]) -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 %[[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM49:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @5) +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM49]]) // CHECK-NEXT: br label %[[OMP_PRECOND_END]] // CHECK-EMPTY: // CHECK-NEXT: [[OMP_PRECOND_END]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM50:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB6:.+]], i32 %[[OMP_GLOBAL_THREAD_NUM50]]) +// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM50:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @7) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @6, i32 %[[OMP_GLOBAL_THREAD_NUM50]]) // CHECK-NEXT: ret void // CHECK-NEXT: } diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp index 9e934fceddf93..4f451fa3e46ca 100644 --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -765,8 +765,12 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4 // CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4 +// CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] +// CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3: .ompfinalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK3: omp.par.outlined.exit.exitStub: // CHECK3-NEXT: ret void @@ -830,8 +834,12 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]] // CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 [[TMP5]] // CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX2]], i64 0 +// CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] +// CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3: .ompfinalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK3: omp.par.outlined.exit.exitStub: // CHECK3-NEXT: ret void @@ -903,9 +911,13 @@ int main (int argc, char **argv) { // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG35]] // CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1, !dbg [[DBG35]] // CHECK4-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG35]] -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG35]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG35]] +// CHECK4: omp.par.region.parallel.after: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG35]] +// CHECK4-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4: .ompfinalize: +// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // @@ -971,9 +983,13 @@ int main (int argc, char **argv) { // CHECK4-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]], !dbg [[DBG66]] // CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 [[TMP5]], !dbg [[DBG66]] // CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX2]], i64 0, !dbg [[DBG66]] -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG67:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG67:![0-9]+]] +// CHECK4: omp.par.region.parallel.after: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG67]] +// CHECK4-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4: .ompfinalize: +// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // From 45d91a2dace2e24001eb2bf3c84f6caaf7a5112c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 29 Apr 2022 02:00:20 -0500 Subject: [PATCH 17/50] clang-format --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 12 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 11 +- llvm/include/llvm/Analysis/CFGPrinter.h | 10 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 53 +++++---- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 109 +++++++++--------- 5 files changed, 98 insertions(+), 97 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 82dc518b22cfb..d05a39297ba15 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1208,7 +1208,7 @@ namespace { // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR // Builder if one is present. struct PushAndPopStackRAII { - CodeGenFunction:: CGNonOpenMPIRBuilderRegion NonOMPBuilderScope; + CodeGenFunction::CGNonOpenMPIRBuilderRegion NonOMPBuilderScope; PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, bool HasCancel, llvm::omp::Directive Kind) : OMPBuilder(OMPBuilder), NonOMPBuilderScope(CGF) { @@ -1240,11 +1240,11 @@ struct PushAndPopStackRAII { CGF.EmitBranchThroughCleanup(Dest); }; - //llvm_unreachable("TODO: set UserManaged=true"); - // TODO: Remove this once we emit parallel regions through the - // OpenMPIRBuilder as it can do this setup internally. - // llvm::OpenMPIRBuilder::FinalizationInfo FI{{}, Kind, HasCancel, - // /*UserManaged*/ true}; OMPBuilder->pushFinalizationCB(std::move(FI)); + // llvm_unreachable("TODO: set UserManaged=true"); + // TODO: Remove this once we emit parallel regions through the + // OpenMPIRBuilder as it can do this setup internally. + // llvm::OpenMPIRBuilder::FinalizationInfo FI{{}, Kind, HasCancel, + // /*UserManaged*/ true}; OMPBuilder->pushFinalizationCB(std::move(FI)); } ~PushAndPopStackRAII() { // if (OMPBuilder) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 2302fd4db81c4..f87efc8d369ae 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4476,7 +4476,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, OMPTaskDataTy &Data) { - CGNonOpenMPIRBuilderRegion NonIrBuilderScope(*this); + CGNonOpenMPIRBuilderRegion NonIrBuilderScope(*this); // Emit outlined function for task construct. const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); @@ -5020,7 +5020,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { - CGNonOpenMPIRBuilderRegion Scope(*this); + CGNonOpenMPIRBuilderRegion Scope(*this); // Emit outlined function for task construct. const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); @@ -5039,7 +5039,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Check if we should emit tied or untied task. Data.Tied = !S.getSingleClause(); auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { - CGNonOpenMPIRBuilderRegion Scope(CGF); + CGNonOpenMPIRBuilderRegion Scope(CGF); CGF.EmitStmt(CS->getCapturedStmt()); }; auto &&TaskGen = [&S, SharedsTy, CapturedStruct, @@ -6898,8 +6898,6 @@ void CodeGenFunction::EmitOMPCancellationPointDirective( S.getCancelRegion()); } - - void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { const Expr *IfCond = nullptr; for (const auto *C : S.getClausesOfKind()) { @@ -6909,7 +6907,8 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { break; } } - if (CGM.getLangOpts().OpenMPIRBuilder && !IsInsideNonOpenMPIRBuilderHandledRegion) { + if (CGM.getLangOpts().OpenMPIRBuilder && + !IsInsideNonOpenMPIRBuilderHandledRegion) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); llvm::Value *IfCondition = nullptr; if (IfCond) diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h index 7afd2c67d8a20..0e2fa81b7bd8e 100644 --- a/llvm/include/llvm/Analysis/CFGPrinter.h +++ b/llvm/include/llvm/Analysis/CFGPrinter.h @@ -212,16 +212,16 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { // Process string output to make it nicer... unsigned ColNum = 0; - //unsigned LastSpace = 0; + // unsigned LastSpace = 0; for (unsigned i = 0; i != OutStr.length(); ++i) { if (OutStr[i] == '\n') { // Left justify OutStr[i] = '\\'; OutStr.insert(OutStr.begin() + i + 1, 'l'); ColNum = 0; - // LastSpace = 0; + // LastSpace = 0; } else if (OutStr[i] == ';') { // Delete comments! unsigned Idx = OutStr.find('\n', i + 1); // Find end of line - HandleComment(OutStr, i, Idx); + HandleComment(OutStr, i, Idx); #if 0 } else if (ColNum == MaxColumns) { // Wrap lines. // Wrap very long names even though we can't find a space. @@ -235,8 +235,8 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } else ++ColNum; LongCol = std::max(LongCol, ColNum); - // if (OutStr[i] == ' ') - // LastSpace = i; + // if (OutStr[i] == ' ') + // LastSpace = i; } if (!HandleBasicBlock && CFGInfo && CFGInfo->HighlightBB && !LongestCol) { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 1469f778f7ff0..2e4e827cb0d8d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -106,8 +106,8 @@ class OpenMPIRBuilder { /// destruction, when the scope of the currently generated construct is left /// at the time, and location, the callback is invoked. using LeaveRegionCallbackTy = - std::function; // TODO: make simpler again + std::function; // TODO: make simpler again enum class RegionKind { /// Sentinel object so we don't always have to check whether the stack is @@ -123,42 +123,47 @@ class OpenMPIRBuilder { }; struct OMPRegionBreak { - BasicBlock *BB; - omp::Directive Reason; - omp::Directive Target; + BasicBlock *BB; + omp::Directive Reason; + omp::Directive Target; - OMPRegionBreak(BasicBlock *BB, omp::Directive Reason, omp::Directive Target) : BB(BB), Reason(Reason),Target(Target) { } + OMPRegionBreak(BasicBlock *BB, omp::Directive Reason, omp::Directive Target) + : BB(BB), Reason(Reason), Target(Target) {} }; struct OMPRegionInfo { RegionKind Kind; omp::Directive DK; - // LeaveRegionCallbackTy FiniCB; + // LeaveRegionCallbackTy FiniCB; - /// Inside a parallel region, determines whether a barrier must check whether cancellation has occured. - // TODO: remove; determine ourselves whether there was a cancelling construct inside. - bool IsCancellable; + /// Inside a parallel region, determines whether a barrier must check + /// whether cancellation has occured. + // TODO: remove; determine ourselves whether there was a cancelling + // construct inside. + bool IsCancellable; - SmallVectorBreaks; + SmallVector Breaks; OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable - //, LeaveRegionCallbackTy FiniCB - ) : Kind(Kind), DK(DK), IsCancellable(IsCancellable) - // , FiniCB(std::move(FiniCB)) + //, LeaveRegionCallbackTy FiniCB + ) + : Kind(Kind), DK(DK), IsCancellable(IsCancellable) + // , FiniCB(std::move(FiniCB)) { assertOK(); } #ifndef NDEBUG - ~OMPRegionInfo() { - assertOK(); - assert(Breaks.empty()); + ~OMPRegionInfo() { + assertOK(); + assert(Breaks.empty()); } #endif - void addBreak(BasicBlock *BB, omp::Directive Reason, omp::Directive Target) { - assert(!BB->getTerminator()); - Breaks.emplace_back(BB,Reason,Target); + void addBreak(BasicBlock *BB, omp::Directive Reason, + omp::Directive Target) { + assert(!BB->getTerminator()); + Breaks.emplace_back(BB, Reason, Target); } /// Consistency self-check. @@ -174,12 +179,14 @@ class OpenMPIRBuilder { OMPRegionInfo *getInnermostDirectionRegion(omp::Directive DK); OMPRegionInfo *pushRegion(omp::Directive DK, bool IsCancellable - //, LeaveRegionCallbackTy FiniCB = {} + //, LeaveRegionCallbackTy FiniCB = {} ); - // void emitRegionExit(InsertPointTy ExitingIP, OMPRegionInfo *RegionToLeave, omp::Directive LeaveReason = omp::OMPD_unknown); + // void emitRegionExit(InsertPointTy ExitingIP, OMPRegionInfo *RegionToLeave, + // omp::Directive LeaveReason = omp::OMPD_unknown); - void popRegion(OMPRegionInfo*R, BasicBlock *ContinueBB, LeaveRegionCallbackTy &LeaveCb); + void popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, + LeaveRegionCallbackTy &LeaveCb); /// Return true if the last entry in the finalization stack is of kind \p DK /// and cancellable. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 57ef8be86f921..53e28636123f7 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -525,15 +525,13 @@ void OpenMPIRBuilder::finalize(Function *Fn) { } OpenMPIRBuilder::OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) { - RegionStack.emplace_back( - new OMPRegionInfo( + RegionStack.emplace_back(new OMPRegionInfo( RegionKind::Toplevel, omp::OMPD_unknown, /*IsCancellable*/ false - // , [](InsertPointTy ExitingIP, omp::Directive LeaveReason, - // OMPRegionInfo *Region) { - // llvm_unreachable("top-level is not finialized"); - // } - ) - ); + // , [](InsertPointTy ExitingIP, omp::Directive LeaveReason, + // OMPRegionInfo *Region) { + // llvm_unreachable("top-level is not finialized"); + // } + )); assert(RegionStack.size() == 1); } @@ -682,11 +680,11 @@ OpenMPIRBuilder::getInnermostDirectionRegion(omp::Directive DK) { OpenMPIRBuilder::OMPRegionInfo * OpenMPIRBuilder::pushRegion(omp::Directive DK, bool IsCancellable - //, LeaveRegionCallbackTy FiniCB + //, LeaveRegionCallbackTy FiniCB ) { RegionStack.emplace_back(new OMPRegionInfo(RegionKind::Directive, DK, - IsCancellable//, std::move(FiniCB) - )); + IsCancellable //, std::move(FiniCB) + )); return RegionStack.back().get(); } @@ -721,41 +719,40 @@ void OpenMPIRBuilder::emitRegionExit(InsertPointTy ExitingIP, } #endif +void OpenMPIRBuilder::popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, + LeaveRegionCallbackTy &LeaveCb) { + auto DK = R->DK; + assert(RegionStack.back().get() == R && "balanced region push/pop required"); + R->assertOK(); -void OpenMPIRBuilder::popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, LeaveRegionCallbackTy &LeaveCb) { - auto DK = R->DK; - assert(RegionStack.back().get() == R && "balanced region push/pop required"); - R->assertOK(); - - // Trickly down no yet handled breaks. - OMPRegionInfo* Innermost = RegionStack.back().get(); - OMPRegionInfo* NewInnermost = RegionStack.rbegin()[1].get(); - - - for (auto& B : reverse(Innermost->Breaks)) { - assert(!B.BB->getTerminator()); - Builder.SetInsertPoint(B.BB); - - if (B.Target == DK) { - // Join common finialization block - Builder.SetInsertPoint(B.BB); - BranchInst * TI = Builder.CreateBr(ContinueBB); - // if (LeaveCb) - // LeaveCb( InsertPointTy(TI->getParent(), TI->getIterator()), B.Reason, Innermost); - B.BB = nullptr; - } else if (LeaveCb) { - B.BB = splitBB(Builder, true, ".fini"); - LeaveCb(Builder.saveIP(), B.Reason, Innermost); - Builder.SetInsertPoint( B.BB); - } - } + // Trickly down no yet handled breaks. + OMPRegionInfo *Innermost = RegionStack.back().get(); + OMPRegionInfo *NewInnermost = RegionStack.rbegin()[1].get(); + for (auto &B : reverse(Innermost->Breaks)) { + assert(!B.BB->getTerminator()); + Builder.SetInsertPoint(B.BB); - for (OMPRegionBreak &B : Innermost->Breaks) { - if (B.BB) - NewInnermost->addBreak(B.BB, B.Reason, B.Target); + if (B.Target == DK) { + // Join common finialization block + Builder.SetInsertPoint(B.BB); + BranchInst *TI = Builder.CreateBr(ContinueBB); + // if (LeaveCb) + // LeaveCb( InsertPointTy(TI->getParent(), TI->getIterator()), + // B.Reason, Innermost); + B.BB = nullptr; + } else if (LeaveCb) { + B.BB = splitBB(Builder, true, ".fini"); + LeaveCb(Builder.saveIP(), B.Reason, Innermost); + Builder.SetInsertPoint(B.BB); } - Innermost->Breaks.clear(); + } + + for (OMPRegionBreak &B : Innermost->Breaks) { + if (B.BB) + NewInnermost->addBreak(B.BB, B.Reason, B.Target); + } + Innermost->Breaks.clear(); RegionStack.pop_back(); } @@ -999,8 +996,10 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, auto &FI = FinalizationStack.back(); FI.FiniCB(Builder.saveIP()); #endif - RegionStack.back()->addBreak(CancellationBlock, CancelledBy, CanceledDirective); - //emitRegionExit({CancellationBlock, CancellationBlock->begin()}, getInnermostDirectionRegion(CanceledDirective), CancelledBy); + RegionStack.back()->addBreak(CancellationBlock, CancelledBy, + CanceledDirective); + // emitRegionExit({CancellationBlock, CancellationBlock->begin()}, + // getInnermostDirectionRegion(CanceledDirective), CancelledBy); // Builder.SetInsertPoint(CancellationBlock); // Builder.CreateBr( CancellationBlock); @@ -1124,7 +1123,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( // FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); OMPRegionInfo *ParallelRegion = pushRegion(OMPD_parallel, IsCancellable - //, FiniCBWrapper + //, FiniCBWrapper ); #endif @@ -1267,13 +1266,11 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( "Unexpected finalization stack state!"); #endif - Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); - if (FiniCB) { - InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); - FiniCB(PreFiniIP, OMPD_unknown, ParallelRegion); + InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); + FiniCB(PreFiniIP, OMPD_unknown, ParallelRegion); } #if 0 for (auto& B : reverse(ParallelRegion->Breaks)) { @@ -1294,7 +1291,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( return !B.BB; }), ParallelRegion->Breaks.end() ); //emitRegionExit(PreFiniIP, ParallelRegion); -#endif +#endif popRegion(ParallelRegion, PRegPreFiniBB, FiniCB); OI.OuterAllocaBB = OuterAllocaBlock; @@ -1580,17 +1577,15 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( #endif // Instruction *I = Builder.CreateBr(ExitBB); - - Builder.restoreIP(AfterIP); auto Finish = splitBB(Builder, true, "section_finish"); if (FiniCB) { - Builder.SetInsertPoint(Finish); - Finish = splitBB(Builder, true, "section_fini"); - FiniCB(Builder.saveAndClearIP(), OMPD_unknown, SectionsRegion); + Builder.SetInsertPoint(Finish); + Finish = splitBB(Builder, true, "section_fini"); + FiniCB(Builder.saveAndClearIP(), OMPD_unknown, SectionsRegion); } - - //emitRegionExit(Builder.saveIP(), SectionsRegion); + + // emitRegionExit(Builder.saveIP(), SectionsRegion); popRegion(SectionsRegion, Finish, FiniCB); return {Finish, Finish->begin()}; From 3f36680df7ba46dc4c6df37c1b85ef48b0565c3c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 29 Apr 2022 02:33:19 -0500 Subject: [PATCH 18/50] simplify callback --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 22 +++------- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 7 +-- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 10 ++--- .../Frontend/OpenMPIRBuilderTest.cpp | 43 +++++++------------ 4 files changed, 31 insertions(+), 51 deletions(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index f87efc8d369ae..c60c3fd5ff32e 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1735,8 +1735,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // The cleanup callback that finalizes all variabels at the given location, // thus calls destructors etc. - auto FiniCB = [this](InsertPointTy IP, llvm::omp::Directive LeaveReason, - llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4035,8 +4034,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; - auto FiniCB = [this](InsertPointTy IP, llvm::omp::Directive LeaveReason, - llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4106,9 +4104,7 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); - auto FiniCB = [this](InsertPointTy ExitingIP, - llvm::omp::Directive LeaveReason, - llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [this](InsertPointTy ExitingIP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, ExitingIP); }; @@ -4199,8 +4195,7 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); - auto FiniCB = [this](InsertPointTy IP, llvm::omp::Directive LeaveReason, - llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4246,8 +4241,7 @@ void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { ? EmitScalarExpr(Filter, CGM.Int32Ty) : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); - auto FiniCB = [this](InsertPointTy IP, llvm::omp::Directive LeaveReason, - llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -4287,8 +4281,7 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { HintInst = Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); - auto FiniCB = [this](InsertPointTy IP, llvm::omp::Directive LeaveReason, - llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; @@ -5631,8 +5624,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { // Without clause, it behaves as if the threads clause is specified. const auto *C = S.getSingleClause(); - auto FiniCB = [this](InsertPointTy IP, llvm::omp::Directive LeaveReason, - llvm::OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 2e4e827cb0d8d..446964a985222 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -106,8 +106,7 @@ class OpenMPIRBuilder { /// destruction, when the scope of the currently generated construct is left /// at the time, and location, the callback is invoked. using LeaveRegionCallbackTy = - std::function; // TODO: make simpler again + std::function; enum class RegionKind { /// Sentinel object so we don't always have to check whether the stack is @@ -178,6 +177,7 @@ class OpenMPIRBuilder { OMPRegionInfo *getInnermostDirectionRegion(omp::Directive DK); +private: OMPRegionInfo *pushRegion(omp::Directive DK, bool IsCancellable //, LeaveRegionCallbackTy FiniCB = {} ); @@ -186,8 +186,9 @@ class OpenMPIRBuilder { // omp::Directive LeaveReason = omp::OMPD_unknown); void popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, - LeaveRegionCallbackTy &LeaveCb); + function_ref LeaveCb); +private: /// Return true if the last entry in the finalization stack is of kind \p DK /// and cancellable. bool isLastFinalizationInfoCancellable(omp::Directive DK) { diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 53e28636123f7..e4495d89188b7 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -720,7 +720,7 @@ void OpenMPIRBuilder::emitRegionExit(InsertPointTy ExitingIP, #endif void OpenMPIRBuilder::popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, - LeaveRegionCallbackTy &LeaveCb) { + function_ref LeaveCb) { auto DK = R->DK; assert(RegionStack.back().get() == R && "balanced region push/pop required"); R->assertOK(); @@ -743,7 +743,7 @@ void OpenMPIRBuilder::popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, B.BB = nullptr; } else if (LeaveCb) { B.BB = splitBB(Builder, true, ".fini"); - LeaveCb(Builder.saveIP(), B.Reason, Innermost); + LeaveCb(Builder.saveIP()); Builder.SetInsertPoint(B.BB); } } @@ -1118,7 +1118,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( #endif if (FiniCB) - FiniCB(IP, LeaveReason, Region); // Needed? + FiniCB(IP); // Needed? }; // FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); @@ -1270,7 +1270,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( if (FiniCB) { InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); - FiniCB(PreFiniIP, OMPD_unknown, ParallelRegion); + FiniCB(PreFiniIP ); } #if 0 for (auto& B : reverse(ParallelRegion->Breaks)) { @@ -1582,7 +1582,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( if (FiniCB) { Builder.SetInsertPoint(Finish); Finish = splitBB(Builder, true, "section_fini"); - FiniCB(Builder.saveAndClearIP(), OMPD_unknown, SectionsRegion); + FiniCB(Builder.saveAndClearIP() ); } // emitRegionExit(Builder.saveIP(), SectionsRegion); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 446264f84b5d4..309203578626e 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -355,6 +355,7 @@ TEST_F(OpenMPIRBuilderTest, CreateBarrier) { EXPECT_FALSE(verifyModule(*M, &errs())); } + TEST_F(OpenMPIRBuilderTest, DbgLoc) { OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize(); @@ -449,8 +450,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; @@ -531,8 +531,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; @@ -628,8 +627,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; @@ -766,8 +764,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; // No destructors. }; @@ -900,8 +897,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { auto *FakeDestructor = Function::Create( FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get()); - auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP) { ++NumFinalizationPoints; Builder.restoreIP(IP); Builder.CreateCall(FakeDestructor, @@ -2116,8 +2112,7 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2194,8 +2189,7 @@ TEST_F(OpenMPIRBuilderTest, MaskedDirective) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2261,8 +2255,7 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2501,8 +2494,7 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2573,8 +2565,7 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2690,8 +2681,7 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -2781,8 +2771,7 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) { Builder.CreateICmpNE(F->arg_begin(), PrivLoad); }; - auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP) { BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); }; @@ -3526,8 +3515,7 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { }; // Do nothing in finalization. - auto FiniCB = [&](InsertPointTy CodeGenIP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; }; @@ -3913,8 +3901,7 @@ TEST_F(OpenMPIRBuilderTest, CreateSections) { unsigned NumFiniCBCalls = 0; PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); - auto FiniCB = [&](InsertPointTy IP, omp::Directive LeaveReason, - OpenMPIRBuilder::OMPRegionInfo *Region) { + auto FiniCB = [&](InsertPointTy IP) { ++NumFiniCBCalls; BasicBlock *IPBB = IP.getBlock(); EXPECT_NE(IPBB->end(), IP.getPoint()); From 87ef1ec420026681b1a4c225e95369568333b9d9 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 29 Apr 2022 02:38:40 -0500 Subject: [PATCH 19/50] undo callback rename --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 2 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 20 +++++++++---------- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 18 ++++++++--------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index d05a39297ba15..80b332f8cece0 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2182,7 +2182,7 @@ void CGOpenMPRuntime::emitIRBuilderParallel( CodeGenFunction &CGF, const CapturedStmt *CS, llvm::OpenMPIRBuilder::BodyGenCallbackTy BodyGenCB, llvm::OpenMPIRBuilder::PrivatizeCallbackTy PrivCB, - llvm::OpenMPIRBuilder::LeaveRegionCallbackTy FiniCB, + llvm::OpenMPIRBuilder::FinalizeCallbackTy FiniCB, // llvm:: OpenMPIRBuilder:: CancellationCallbackTy CancelCB, llvm::Value *IfCond, llvm::Value *NumThreads, llvm::omp::ProcBindKind ProcBind, bool IsCancellable) { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 446964a985222..5ca8605cf8c35 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -105,7 +105,7 @@ class OpenMPIRBuilder { /// A finalize callback knows about all objects that need finalization, e.g. /// destruction, when the scope of the currently generated construct is left /// at the time, and location, the callback is invoked. - using LeaveRegionCallbackTy = + using FinalizeCallbackTy = std::function; enum class RegionKind { @@ -321,7 +321,7 @@ class OpenMPIRBuilder { IRBuilder<>::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy OuterAllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, - LeaveRegionCallbackTy FiniCB, Value *IfCondition, + FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable); @@ -997,7 +997,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the single call. InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, bool IsNowait, + FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt); /// Generator for '#omp master' @@ -1009,7 +1009,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the master. InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB); + FinalizeCallbackTy FiniCB); /// Generator for '#omp masked' /// @@ -1020,7 +1020,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the masked. InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, Value *Filter); + FinalizeCallbackTy FiniCB, Value *Filter); /// Generator for '#omp critical' /// @@ -1033,7 +1033,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the critical. InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, + FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst); /// Generator for '#omp ordered depend (source | sink)' @@ -1062,7 +1062,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the ordered. InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, + FinalizeCallbackTy FiniCB, bool IsThreads); /// Generator for '#omp sections' @@ -1080,7 +1080,7 @@ class OpenMPIRBuilder { InsertPointTy AllocaIP, ArrayRef SectionCBs, PrivatizeCallbackTy PrivCB, - LeaveRegionCallbackTy FiniCB, bool IsCancellable, + FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait); /// Generator for '#omp section' @@ -1091,7 +1091,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the section. InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB); + FinalizeCallbackTy FiniCB); /// Generate conditional branch and relevant BasicBlocks through which private /// threads copy the 'copyin' variables from Master copy to threadprivate @@ -1295,7 +1295,7 @@ class OpenMPIRBuilder { InsertPointTy EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, bool Conditional = false, + FinalizeCallbackTy FiniCB, bool Conditional = false, bool HasFinalize = true, bool IsCancellable = false); /// Get the platform-specific name separator. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index e4495d89188b7..41c2d9cdd3dad 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1014,7 +1014,7 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( const LocationDescription &Loc, InsertPointTy OuterAllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, - LeaveRegionCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, + FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) { assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous"); @@ -1482,7 +1482,7 @@ void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) { OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef SectionCBs, PrivatizeCallbackTy PrivCB, - LeaveRegionCallbackTy FiniCB, bool IsCancellable, bool IsNowait) { + FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) { assert(!isConflictIP(AllocaIP, Loc.IP) && "Dedicated IP allocas required"); if (!updateToLocation(Loc)) @@ -1594,7 +1594,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB) { + FinalizeCallbackTy FiniCB) { if (!updateToLocation(Loc)) return Loc.IP; @@ -1822,7 +1822,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions( OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB) { + FinalizeCallbackTy FiniCB) { if (!updateToLocation(Loc)) return Loc.IP; @@ -1847,7 +1847,7 @@ OpenMPIRBuilder::createMaster(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, Value *Filter) { + FinalizeCallbackTy FiniCB, Value *Filter) { if (!updateToLocation(Loc)) return Loc.IP; @@ -3232,7 +3232,7 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt) { + FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt) { if (!updateToLocation(Loc)) return Loc.IP; @@ -3274,7 +3274,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { + FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { if (!updateToLocation(Loc)) return Loc.IP; @@ -3354,7 +3354,7 @@ OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd( const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - LeaveRegionCallbackTy FiniCB, bool IsThreads) { + FinalizeCallbackTy FiniCB, bool IsThreads) { if (!updateToLocation(Loc)) return Loc.IP; @@ -3383,7 +3383,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd( OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, - BodyGenCallbackTy BodyGenCB, LeaveRegionCallbackTy FiniCB, bool Conditional, + BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, bool HasFinalize, bool IsCancellable) { #if 0 From 3bfa8b4ecf1a6a68eb040225dbe0f3a05cf9d0d5 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 29 Apr 2022 03:02:20 -0500 Subject: [PATCH 20/50] some comments --- clang/lib/CodeGen/CGOpenMPRuntime.h | 2 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 17 ++++++++----- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 24 +++++++++++++++++++ 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 97d639b1cfb72..b90d1eb4559ef 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1038,7 +1038,7 @@ class CGOpenMPRuntime { CodeGenFunction &CGF, const CapturedStmt *CS, llvm::OpenMPIRBuilder::BodyGenCallbackTy BodyGenCB, llvm::OpenMPIRBuilder::PrivatizeCallbackTy PrivCB, - llvm::OpenMPIRBuilder::LeaveRegionCallbackTy FiniCB, + llvm::OpenMPIRBuilder::FinalizeCallbackTy FiniCB, // llvm:: OpenMPIRBuilder:: CancellationCallbackTy CancelCB, llvm::Value *IfCondition, llvm::Value *NumThreads, llvm::omp::ProcBindKind ProcBind, bool IsCancellable); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 5ca8605cf8c35..99159002df59f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -95,18 +95,15 @@ class OpenMPIRBuilder { /// Type used throughout for insertion points. using InsertPointTy = IRBuilder<>::InsertPoint; - struct OMPRegionInfo; - /// Callback type for variable finalization (think destructors). /// - /// \param ExitingIP is the insertion point at which the finalization code + /// \param CodeGenIP is the insertion point at which the finalization code /// should be placed. /// /// A finalize callback knows about all objects that need finalization, e.g. /// destruction, when the scope of the currently generated construct is left /// at the time, and location, the callback is invoked. - using FinalizeCallbackTy = - std::function; + using FinalizeCallbackTy = std::function; enum class RegionKind { /// Sentinel object so we don't always have to check whether the stack is @@ -121,19 +118,27 @@ class OpenMPIRBuilder { Directive }; +/// An irregular exit out of a region, such as by cancellation. struct OMPRegionBreak { +/// The end of this basic block is current end of the path for breaking out of the region. Must have no terminator so finializations (eg. destructors) can be appended until rejoining at the end of the target region. BasicBlock *BB; + + /// What triggered the break out of a region, such as a canecellation point. omp::Directive Reason; + + /// The kind of region that is being exited. Control flow will rejoin after the innermost region of this kind. omp::Directive Target; OMPRegionBreak(BasicBlock *BB, omp::Directive Reason, omp::Directive Target) : BB(BB), Reason(Reason), Target(Target) {} + + /// Consistency self-check. + void assertOK() const; }; struct OMPRegionInfo { RegionKind Kind; omp::Directive DK; - // LeaveRegionCallbackTy FiniCB; /// Inside a parallel region, determines whether a barrier must check /// whether cancellation has occured. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 41c2d9cdd3dad..a3c3d55bd2a31 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -757,6 +757,26 @@ void OpenMPIRBuilder::popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, RegionStack.pop_back(); } +void OpenMPIRBuilder ::OMPRegionBreak:: assertOK() const { +#ifndef NDEBUG + assert(!BB || !BB->getTerminator()); + switch (Reason) { + case OMPD_cancellation_point: + case OMPD_barrier: + break; + default: + llvm_unreachable("unexpected region break reason"); + } + switch (Target) { + case OMPD_parallel: + case OMPD_sections: + break; + default: + llvm_unreachable("unexpected region break target"); + } +#endif +} + void OpenMPIRBuilder::OMPRegionInfo::assertOK() const { #ifndef NDEBUG switch (Kind) { @@ -776,6 +796,10 @@ void OpenMPIRBuilder::OMPRegionInfo::assertOK() const { } break; } + + for (auto &B:Breaks) { + B.assertOK(); + } #endif } From feccee75e74fe4e3420af15da3add82419c31093 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 29 Apr 2022 16:03:10 -0500 Subject: [PATCH 21/50] push/pop all regions --- clang/test/OpenMP/cancel_codegen.cpp | 16 ++++-- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 15 +++--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 53 +++++++++++++------ 3 files changed, 59 insertions(+), 25 deletions(-) diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp index 1d1bdfd534929..c47e667e65de7 100644 --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -1462,10 +1462,12 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp_section_loop.body.case26.cncl.fini: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] // CHECK3: omp_section_loop.body.case26.cncl: -// CHECK3-NEXT: br label [[SECTION_FINI35]] +// CHECK3-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] // CHECK3: omp_section_loop.body.case26.cont: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] // CHECK3: omp_section_loop.body.case26.sectionfini: +// CHECK3-NEXT: br label [[OMP_REGION_FINALIZE]] +// CHECK3: omp_region.finalize: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] // CHECK3: omp_section_loop.body.case26.section.after: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] @@ -2130,10 +2132,12 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp_section_loop.body.case26.cncl.fini: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] // CHECK4: omp_section_loop.body.case26.cncl: -// CHECK4-NEXT: br label [[SECTION_FINI35]] +// CHECK4-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] // CHECK4: omp_section_loop.body.case26.cont: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] // CHECK4: omp_section_loop.body.case26.sectionfini: +// CHECK4-NEXT: br label [[OMP_REGION_FINALIZE]] +// CHECK4: omp_region.finalize: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] // CHECK4: omp_section_loop.body.case26.section.after: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] @@ -4038,10 +4042,12 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp_section_loop.body.case26.cncl.fini: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] // CHECK9: omp_section_loop.body.case26.cncl: -// CHECK9-NEXT: br label [[SECTION_FINI35]] +// CHECK9-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] // CHECK9: omp_section_loop.body.case26.cont: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] // CHECK9: omp_section_loop.body.case26.sectionfini: +// CHECK9-NEXT: br label [[OMP_REGION_FINALIZE]] +// CHECK9: omp_region.finalize: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] // CHECK9: omp_section_loop.body.case26.section.after: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] @@ -4706,10 +4712,12 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp_section_loop.body.case26.cncl.fini: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] // CHECK10: omp_section_loop.body.case26.cncl: -// CHECK10-NEXT: br label [[SECTION_FINI35]] +// CHECK10-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] // CHECK10: omp_section_loop.body.case26.cont: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] // CHECK10: omp_section_loop.body.case26.sectionfini: +// CHECK10-NEXT: br label [[OMP_REGION_FINALIZE]] +// CHECK10: omp_region.finalize: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] // CHECK10: omp_section_loop.body.case26.section.after: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 99159002df59f..afb4f28f2b250 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -118,6 +118,8 @@ class OpenMPIRBuilder { Directive }; + struct OMPRegionInfo; + /// An irregular exit out of a region, such as by cancellation. struct OMPRegionBreak { /// The end of this basic block is current end of the path for breaking out of the region. Must have no terminator so finializations (eg. destructors) can be appended until rejoining at the end of the target region. @@ -127,9 +129,9 @@ class OpenMPIRBuilder { omp::Directive Reason; /// The kind of region that is being exited. Control flow will rejoin after the innermost region of this kind. - omp::Directive Target; + OMPRegionInfo* Target; - OMPRegionBreak(BasicBlock *BB, omp::Directive Reason, omp::Directive Target) + OMPRegionBreak(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo* Target) : BB(BB), Reason(Reason), Target(Target) {} /// Consistency self-check. @@ -165,7 +167,7 @@ class OpenMPIRBuilder { #endif void addBreak(BasicBlock *BB, omp::Directive Reason, - omp::Directive Target) { + OMPRegionInfo* Target) { assert(!BB->getTerminator()); Breaks.emplace_back(BB, Reason, Target); } @@ -183,9 +185,10 @@ class OpenMPIRBuilder { OMPRegionInfo *getInnermostDirectionRegion(omp::Directive DK); private: - OMPRegionInfo *pushRegion(omp::Directive DK, bool IsCancellable - //, LeaveRegionCallbackTy FiniCB = {} - ); + OMPRegionInfo *pushRegion(RegionKind Kind, omp::Directive DK, bool IsCancellable); + OMPRegionInfo *pushRegion(omp::Directive DK, bool IsCancellable) { + return pushRegion(RegionKind::Directive, DK, IsCancellable); + } // void emitRegionExit(InsertPointTy ExitingIP, OMPRegionInfo *RegionToLeave, // omp::Directive LeaveReason = omp::OMPD_unknown); diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index a3c3d55bd2a31..b398503fbac18 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -679,10 +679,10 @@ OpenMPIRBuilder::getInnermostDirectionRegion(omp::Directive DK) { } OpenMPIRBuilder::OMPRegionInfo * -OpenMPIRBuilder::pushRegion(omp::Directive DK, bool IsCancellable +OpenMPIRBuilder::pushRegion(RegionKind Kind, omp::Directive DK, bool IsCancellable //, LeaveRegionCallbackTy FiniCB ) { - RegionStack.emplace_back(new OMPRegionInfo(RegionKind::Directive, DK, + RegionStack.emplace_back(new OMPRegionInfo(Kind, DK, IsCancellable //, std::move(FiniCB) )); return RegionStack.back().get(); @@ -733,7 +733,7 @@ void OpenMPIRBuilder::popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, assert(!B.BB->getTerminator()); Builder.SetInsertPoint(B.BB); - if (B.Target == DK) { + if (B.Target == Innermost) { // Join common finialization block Builder.SetInsertPoint(B.BB); BranchInst *TI = Builder.CreateBr(ContinueBB); @@ -759,15 +759,19 @@ void OpenMPIRBuilder::popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, void OpenMPIRBuilder ::OMPRegionBreak:: assertOK() const { #ifndef NDEBUG - assert(!BB || !BB->getTerminator()); + assert(!BB->getTerminator()); + switch (Reason) { case OMPD_cancellation_point: + case OMPD_cancel: case OMPD_barrier: break; default: llvm_unreachable("unexpected region break reason"); } - switch (Target) { + + assert(Target); + switch (Target->DK) { case OMPD_parallel: case OMPD_sections: break; @@ -789,7 +793,11 @@ void OpenMPIRBuilder::OMPRegionInfo::assertOK() const { case RegionKind::Directive: switch (DK) { case OMPD_parallel: - case OMPD_sections: + case OMPD_sections: case OMPD_single: + case OMPD_master: + case OMPD_masked: + case OMPD_critical: + case OMPD_ordered: break; default: llvm_unreachable("Not a recognized OpenMP region"); @@ -946,9 +954,9 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, Value *CancelFlag, - omp::Directive CanceledDirective, + omp::Directive CancelledDirective, omp::Directive CancelledBy) { - assert(isLastFinalizationInfoCancellable(CanceledDirective) && + assert(isLastFinalizationInfoCancellable(CancelledDirective) && "Unexpected cancellation!"); // For a cancel barrier we create two new blocks. @@ -1000,7 +1008,7 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, // Unless cancellation has been detected by a barrier itself, need to // synchronize between threads (after finalization). Builder.SetInsertPoint(CancellationBlock); - if (CanceledDirective == OMPD_parallel && CancelledBy != OMPD_barrier) + if (CancelledDirective == OMPD_parallel && CancelledBy != OMPD_barrier) emitBarrierImpl(Loc, CancelledBy, false, false); auto CancellationIP = Builder.saveIP(); @@ -1020,8 +1028,7 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, auto &FI = FinalizationStack.back(); FI.FiniCB(Builder.saveIP()); #endif - RegionStack.back()->addBreak(CancellationBlock, CancelledBy, - CanceledDirective); + RegionStack.back()->addBreak(CancellationBlock, CancelledBy, getInnermostDirectionRegion(CancelledDirective) ); // emitRegionExit({CancellationBlock, CancellationBlock->begin()}, // getInnermostDirectionRegion(CanceledDirective), CancelledBy); @@ -1979,10 +1986,14 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, Builder.CreateBr(CL->getPreheader()); } + auto LoopRegion = pushRegion(RegionKind::CanonicalLoop, OMPD_unknown, false); + // Emit the body content. We do it after connecting the loop to the CFG to // avoid that the callback encounters degenerate BBs. BodyGenCB(CL->getBodyIP(), CL->getIndVar()); + popRegion(LoopRegion, nullptr, {}); + #ifndef NDEBUG CL->assertOK(); #endif @@ -3286,6 +3297,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( // } // __kmpc_barrier + + EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, /*Conditional*/ true, /*hasFinalize*/ true); @@ -3414,6 +3427,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( if (HasFinalize) FinalizationStack.push_back({FiniCB, OMPD, IsCancellable}); #endif + // Create inlined region's entry and body blocks, in preparation // for conditional creation @@ -3425,13 +3439,19 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( BasicBlock *FiniBB = EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize"); + auto Region = pushRegion(OMPD, IsCancellable); Builder.SetInsertPoint(EntryBB->getTerminator()); emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); + + // generate body BodyGenCB(/* AllocaIP */ InsertPointTy(), /* CodeGenIP */ Builder.saveIP()); + + + // emit exit call and do any needed finalization. auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && @@ -3440,19 +3460,22 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && "Unexpected Control Flow State!"); - MergeBlockIntoPredecessor(FiniBB); + popRegion(Region, FiniBB, FiniCB); + MergeBlockIntoPredecessor(FiniBB); // stop doing that // If we are skipping the region of a non conditional, remove the exit // block, and clear the builder's insertion point. assert(SplitPos->getParent() == ExitBB && "Unexpected Insertion point location!"); - auto merged = MergeBlockIntoPredecessor(ExitBB); + auto merged = MergeBlockIntoPredecessor(ExitBB); // stop doing that BasicBlock *ExitPredBB = SplitPos->getParent(); auto InsertBB = merged ? ExitPredBB : ExitBB; if (!isa_and_nonnull(SplitPos)) SplitPos->eraseFromParent(); Builder.SetInsertPoint(InsertBB); + + return Builder.saveIP(); } @@ -3465,7 +3488,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( BasicBlock *EntryBB = Builder.GetInsertBlock(); Value *CallBool = Builder.CreateIsNotNull(EntryCall); auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body"); - auto *UI = new UnreachableInst(Builder.getContext(), ThenBB); + auto *UI = new UnreachableInst(Builder.getContext(), ThenBB); // stop doing that // Emit thenBB and set the Builder's insertion point there for // body generation next. Place the block after the current block. @@ -3741,7 +3764,7 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, // else // return; - auto *UI = Builder.CreateUnreachable(); + auto *UI = Builder.CreateUnreachable(); // Don't do that BasicBlock *CheckBB = UI->getParent(); BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry"); From 2e77b0029f7f637543aa052fb163b806afe248d4 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 1 May 2022 02:29:57 -0500 Subject: [PATCH 22/50] yerstay's work --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 77 +++--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 228 ++++++++++-------- 2 files changed, 157 insertions(+), 148 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index afb4f28f2b250..223163f0c359f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -131,43 +131,36 @@ class OpenMPIRBuilder { /// The kind of region that is being exited. Control flow will rejoin after the innermost region of this kind. OMPRegionInfo* Target; - OMPRegionBreak(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo* Target) - : BB(BB), Reason(Reason), Target(Target) {} + OMPRegionBreak(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo* Target); /// Consistency self-check. void assertOK() const; }; + + /// An OpenMP region with a single entry and single exit (unless containing a irregular exit) that may be associated with a construct. struct OMPRegionInfo { + /// The kind of region: topmost sentinel, loop, or directive. RegionKind Kind; + + /// If this region represents a directive-associated region, the kind of directive. omp::Directive DK; /// Inside a parallel region, determines whether a barrier must check /// whether cancellation has occured. - // TODO: remove; determine ourselves whether there was a cancelling - // construct inside. + // TODO: Do not rely on the frontend to know whether a region contains a cancellation construct, but determine within OpenMPIRBuilder itself. bool IsCancellable; + /// Irregular exits (such as cancellation points) out of this region. SmallVector Breaks; - OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable - //, LeaveRegionCallbackTy FiniCB - ) - : Kind(Kind), DK(DK), IsCancellable(IsCancellable) - // , FiniCB(std::move(FiniCB)) - { - assertOK(); - } + OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable ); -#ifndef NDEBUG - ~OMPRegionInfo() { - assertOK(); - assert(Breaks.empty()); - } -#endif + /// Register an irregular exit to this region. void addBreak(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo* Target) { + assert(IsCancellable && "Only cancellable region may have irregular exits"); assert(!BB->getTerminator()); Breaks.emplace_back(BB, Reason, Target); } @@ -177,32 +170,36 @@ class OpenMPIRBuilder { }; private: - /// The finalization stack made up of finalize callbacks currently in-flight, - /// wrapped into FinalizationInfo objects that reference also the finalization - /// target block and the kind of cancellable directive. + /// The stack of regions surrounding the current in-progress code generation location. Regions are pushed and popped when entering/leaving a region. Constructs/directives that are sensitive to surrounding regions (such as cancellation) must be emitted inside the BodyGenCallbackTy of the surrounding constructs. SmallVector, 8> RegionStack; - OMPRegionInfo *getInnermostDirectionRegion(omp::Directive DK); + /// Return the innermost surrounding region of a specific directive kind, or the toplevel region if not present. + OMPRegionInfo *getInnermostRegion(omp::Directive DK); -private: - OMPRegionInfo *pushRegion(RegionKind Kind, omp::Directive DK, bool IsCancellable); - OMPRegionInfo *pushRegion(omp::Directive DK, bool IsCancellable) { - return pushRegion(RegionKind::Directive, DK, IsCancellable); + /// Return true if the last entry in the finalization stack is of kind \p DK + /// and cancellable. + bool isLastFinalizationInfoCancellable(omp::Directive DK); + + + /// @{ + /// Push a new region to the region stack. Must eventually be popped again using exitRegion. + OMPRegionInfo *enterRegion(RegionKind Kind, omp::Directive DK, bool IsCancellable); + OMPRegionInfo *enterRegion(omp::Directive DK, bool IsCancellable) { + return enterRegion(RegionKind::Directive, DK, IsCancellable); } + /// @} - // void emitRegionExit(InsertPointTy ExitingIP, OMPRegionInfo *RegionToLeave, - // omp::Directive LeaveReason = omp::OMPD_unknown); + /// Pop a region from the region stack. Exits are handled the following way: + /// + /// 1. For the regular region exit, \p FinCB is used by the caller to emit finalization code somehwere on the control path exiting the region. exitRegion itself does nothing. + /// + /// 2. For irregular region exits that rejoing with the control flow after this region, exitRegion emits a branch to \p FinalizationBB containing the finalization code. This is typically that same code as for case 1 avoiding emitting the same finialization code multiple times. + /// + /// 3. For irregular region exits that rejoin a surrounding region, exitRegion calls \p FinCB to insert the finalization code into the exiting control path. The irregular exit is then added as an irregular exit of the sourrounding loop that, opon its exit, can add its own finialization code and/or rejoin the control flow there. + void exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, + function_ref FinCB); - void popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, - function_ref LeaveCb); -private: - /// Return true if the last entry in the finalization stack is of kind \p DK - /// and cancellable. - bool isLastFinalizationInfoCancellable(omp::Directive DK) { - // FIXME: Don't all the regions in-between also need to be cancellable? - return getInnermostDirectionRegion(DK)->IsCancellable; - } public: /// Callback type for body (=inner region) code generation @@ -410,8 +407,7 @@ class OpenMPIRBuilder { Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP = {}, - const Twine &Name = "loop", - omp::Directive DK = omp::OMPD_unknown); + const Twine &Name = "loop"); /// Collapse a loop nest into a single loop. /// @@ -497,8 +493,7 @@ class OpenMPIRBuilder { /// /// \returns Point where to insert code after the workshare construct. InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, - bool NeedsBarrier); + InsertPointTy AllocaIP, bool NeedsBarrier); /// Modifies the canonical loop a statically-scheduled workshare loop with a /// user-specified chunk size. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index b398503fbac18..6194efbffcc6e 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -661,66 +661,133 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc, Loc.IP.getBlock()->getParent()); } + + Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { return Builder.CreateCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident, "omp_global_thread_num"); } + + +OpenMPIRBuilder::OMPRegionBreak::OMPRegionBreak(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo* Target) + : BB(BB), Reason(Reason), Target(Target) { +#ifndef NDEBUG + assertOK(); +#endif +} + + + +void OpenMPIRBuilder ::OMPRegionBreak:: assertOK() const { +#ifndef NDEBUG + assert(!BB->getTerminator()); + + switch (Reason) { + case OMPD_cancellation_point: + case OMPD_cancel: + case OMPD_barrier: + break; + default: + llvm_unreachable("unexpected region break reason"); + } + + assert(Target); + switch (Target->DK) { + case OMPD_parallel: + case OMPD_sections: + break; + default: + llvm_unreachable("unexpected region break target"); + } +#endif +} + + + + +OpenMPIRBuilder::OMPRegionInfo::OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable ) + : Kind(Kind), DK(DK), IsCancellable(IsCancellable) { +#ifndef NDEBUF + assertOK(); +#endif +} + + +void OpenMPIRBuilder::OMPRegionInfo::assertOK() const { +#ifndef NDEBUG + switch (Kind) { + case RegionKind::Toplevel: + assert(DK == OMPD_unknown && "toplevel region is not a specific kind"); + assert(!IsCancellable && "top-level is not cancellable"); + break; + case RegionKind::CanonicalLoop: + break; + case RegionKind::Directive: + switch (DK) { + case OMPD_parallel: + case OMPD_sections: case OMPD_single: + case OMPD_master: + case OMPD_masked: + case OMPD_critical: + case OMPD_ordered: + break; + default: + llvm_unreachable("Not a recognized OpenMP region"); + } + break; + } + + for (auto &B:Breaks) { + B.assertOK(); + } +#endif +} + + + OpenMPIRBuilder::OMPRegionInfo * -OpenMPIRBuilder::getInnermostDirectionRegion(omp::Directive DK) { +OpenMPIRBuilder::getInnermostRegion(omp::Directive DK) { for (auto &R : reverse(RegionStack)) { - if (R->Kind == RegionKind::Toplevel) - return R.get(); if (R->Kind == RegionKind::Directive && R->DK == DK) return R.get(); } - llvm_unreachable("expected toplevel region"); + return RegionStack.front().get(); } -OpenMPIRBuilder::OMPRegionInfo * -OpenMPIRBuilder::pushRegion(RegionKind Kind, omp::Directive DK, bool IsCancellable - //, LeaveRegionCallbackTy FiniCB -) { - RegionStack.emplace_back(new OMPRegionInfo(Kind, DK, - IsCancellable //, std::move(FiniCB) - )); - return RegionStack.back().get(); -} -#if 0 -void OpenMPIRBuilder::emitRegionExit(InsertPointTy ExitingIP, - OMPRegionInfo *RegionToLeave, - omp::Directive LeaveReason) { + +bool OpenMPIRBuilder::isLastFinalizationInfoCancellable(omp::Directive DK) { + auto MatchingRegion = getInnermostRegion(DK); #ifndef NDEBUG - switch (LeaveReason) { - case OMPD_unknown: - // Regular region exit - break; - case OMPD_cancellation_point: - case OMPD_barrier: - case OMPD_cancel: - // Cancellation // TODO: Also need need to know whether #pragma omp cancel - // for/#pragma omp cancel parallel/?? - break; - default: - llvm_unreachable("unrecognized reason to leave region"); - } + if (MatchingRegion->Kind != RegionKind::Toplevel) { + for (auto &R : reverse(RegionStack)) { + if (R->Kind == RegionKind::Directive && R->DK == DK) + break; + + assert(R->IsCancellable && "Every region in-between must be cancellable as well"); + } + } #endif + return MatchingRegion->IsCancellable; +} - for (auto &R : reverse(RegionStack)) { - // if (R->FiniCB) - // R->FiniCB(ExitingIP, LeaveReason, R.get()); - if (R.get() == RegionToLeave) - return; - } - llvm_unreachable("region to exit not on stack?"); + +OpenMPIRBuilder::OMPRegionInfo * +OpenMPIRBuilder::enterRegion( OpenMPIRBuilder::RegionKind Kind, omp::Directive DK, bool IsCancellable) { + RegionStack.emplace_back(new OMPRegionInfo(Kind, DK, IsCancellable )); + return RegionStack.back().get(); } -#endif -void OpenMPIRBuilder::popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, - function_ref LeaveCb) { + + + + + +void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, + function_ref FinCB) { auto DK = R->DK; assert(RegionStack.back().get() == R && "balanced region push/pop required"); R->assertOK(); @@ -736,14 +803,14 @@ void OpenMPIRBuilder::popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, if (B.Target == Innermost) { // Join common finialization block Builder.SetInsertPoint(B.BB); - BranchInst *TI = Builder.CreateBr(ContinueBB); + BranchInst *TI = Builder.CreateBr(FinalizationBB); // if (LeaveCb) // LeaveCb( InsertPointTy(TI->getParent(), TI->getIterator()), // B.Reason, Innermost); B.BB = nullptr; - } else if (LeaveCb) { + } else if (FinCB) { B.BB = splitBB(Builder, true, ".fini"); - LeaveCb(Builder.saveIP()); + FinCB(Builder.saveIP()); Builder.SetInsertPoint(B.BB); } } @@ -757,59 +824,6 @@ void OpenMPIRBuilder::popRegion(OMPRegionInfo *R, BasicBlock *ContinueBB, RegionStack.pop_back(); } -void OpenMPIRBuilder ::OMPRegionBreak:: assertOK() const { -#ifndef NDEBUG - assert(!BB->getTerminator()); - - switch (Reason) { - case OMPD_cancellation_point: - case OMPD_cancel: - case OMPD_barrier: - break; - default: - llvm_unreachable("unexpected region break reason"); - } - - assert(Target); - switch (Target->DK) { - case OMPD_parallel: - case OMPD_sections: - break; - default: - llvm_unreachable("unexpected region break target"); - } -#endif -} - -void OpenMPIRBuilder::OMPRegionInfo::assertOK() const { -#ifndef NDEBUG - switch (Kind) { - case RegionKind::Toplevel: - assert(DK == OMPD_unknown && "toplevel region is not a specific kind"); - assert(!IsCancellable && "top-level is not cancellable"); - break; - case RegionKind::CanonicalLoop: - break; - case RegionKind::Directive: - switch (DK) { - case OMPD_parallel: - case OMPD_sections: case OMPD_single: - case OMPD_master: - case OMPD_masked: - case OMPD_critical: - case OMPD_ordered: - break; - default: - llvm_unreachable("Not a recognized OpenMP region"); - } - break; - } - - for (auto &B:Breaks) { - B.assertOK(); - } -#endif -} OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK, @@ -1028,7 +1042,7 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, auto &FI = FinalizationStack.back(); FI.FiniCB(Builder.saveIP()); #endif - RegionStack.back()->addBreak(CancellationBlock, CancelledBy, getInnermostDirectionRegion(CancelledDirective) ); + RegionStack.back()->addBreak(CancellationBlock, CancelledBy, getInnermostRegion(CancelledDirective) ); // emitRegionExit({CancellationBlock, CancellationBlock->begin()}, // getInnermostDirectionRegion(CanceledDirective), CancelledBy); @@ -1153,7 +1167,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( }; // FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); - OMPRegionInfo *ParallelRegion = pushRegion(OMPD_parallel, IsCancellable + OMPRegionInfo *ParallelRegion = enterRegion(OMPD_parallel, IsCancellable //, FiniCBWrapper ); #endif @@ -1323,7 +1337,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( }), ParallelRegion->Breaks.end() ); //emitRegionExit(PreFiniIP, ParallelRegion); #endif - popRegion(ParallelRegion, PRegPreFiniBB, FiniCB); + exitRegion(ParallelRegion, PRegPreFiniBB, FiniCB); OI.OuterAllocaBB = OuterAllocaBlock; OI.EntryBB = PRegEntryBB; @@ -1543,7 +1557,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( // TODO: Use CanonicalLoopInfo finalization. FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable}); #endif - auto SectionsRegion = pushRegion(OMPD_sections, IsCancellable); + auto SectionsRegion = enterRegion(OMPD_sections, IsCancellable); // Each section is emitted as a switch case // Each finalization callback is handled from clang.EmitOMPSectionDirective() @@ -1617,7 +1631,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( } // emitRegionExit(Builder.saveIP(), SectionsRegion); - popRegion(SectionsRegion, Finish, FiniCB); + exitRegion(SectionsRegion, Finish, FiniCB); return {Finish, Finish->begin()}; } @@ -1667,7 +1681,7 @@ OpenMPIRBuilder::createSection(const LocationDescription &Loc, }; #endif - Directive OMPD = Directive::OMPD_sections; + Directive OMPD = Directive::OMPD_section; // Since we are using Finalization Callback here, HasFinalize // and IsCancellable have to be true return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, {}, @@ -1986,13 +2000,13 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, Builder.CreateBr(CL->getPreheader()); } - auto LoopRegion = pushRegion(RegionKind::CanonicalLoop, OMPD_unknown, false); + auto LoopRegion = enterRegion(RegionKind::CanonicalLoop, OMPD_unknown, false); // Emit the body content. We do it after connecting the loop to the CFG to // avoid that the callback encounters degenerate BBs. BodyGenCB(CL->getBodyIP(), CL->getIndVar()); - popRegion(LoopRegion, nullptr, {}); + exitRegion(LoopRegion, nullptr, {}); #ifndef NDEBUG CL->assertOK(); @@ -2003,7 +2017,7 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop( const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, - InsertPointTy ComputeIP, const Twine &Name, omp::Directive DK) { + InsertPointTy ComputeIP, const Twine &Name) { // Consider the following difficulties (assuming 8-bit signed integers): // * Adding \p Step to the loop counter which passes \p Stop may overflow: @@ -3439,7 +3453,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( BasicBlock *FiniBB = EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize"); - auto Region = pushRegion(OMPD, IsCancellable); + auto Region = enterRegion(OMPD, IsCancellable); Builder.SetInsertPoint(EntryBB->getTerminator()); emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); @@ -3460,7 +3474,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && "Unexpected Control Flow State!"); - popRegion(Region, FiniBB, FiniCB); + exitRegion(Region, FiniBB, FiniCB); MergeBlockIntoPredecessor(FiniBB); // stop doing that // If we are skipping the region of a non conditional, remove the exit From 47635fcb490ff1e55a25b524a59e62226c5d282e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 1 May 2022 03:19:41 -0500 Subject: [PATCH 23/50] fixin cancellation --- clang/test/OpenMP/cancel_codegen.cpp | 1312 ++++++++--------- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 32 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 166 +-- 3 files changed, 692 insertions(+), 818 deletions(-) diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp index c47e667e65de7..7cfcd563cf576 100644 --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -1336,13 +1336,13 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I39:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I36:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -1389,12 +1389,8 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] -// CHECK3: omp_section_loop.body.case.cncl.fini: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case.cncl: -// CHECK3-NEXT: br label [[SECTION_FINI:%.*]] -// CHECK3: omp_section_loop.body.case.cont: +// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case.split: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK3: omp_section_loop.body.case.section.after: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -1409,14 +1405,10 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK3: omp_section_loop.after: -// CHECK3-NEXT: br label [[SECTION_FINISH:%.*]] -// CHECK3: section_finish: -// CHECK3-NEXT: br label [[DOTOMPFINALIZE13:%.*]] -// CHECK3: .ompfinalize13: -// CHECK3-NEXT: br label [[SECTION_FINI]] -// CHECK3: section_fini: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] -// CHECK3: omp_section_loop.preheader14: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] +// CHECK3: omp_section_loop.aftersections.fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] +// CHECK3: omp_section_loop.preheader13: // CHECK3-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK3-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK3-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -1426,93 +1418,79 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK3-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] -// CHECK3: omp_section_loop.header15: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] -// CHECK3: omp_section_loop.cond16: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] -// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] -// CHECK3: omp_section_loop.body17: -// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] +// CHECK3: omp_section_loop.header14: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] +// CHECK3: omp_section_loop.cond15: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] +// CHECK3: omp_section_loop.body16: +// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] // CHECK3-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK3-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ -// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] -// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ +// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] +// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] // CHECK3-NEXT: ] -// CHECK3: omp_section_loop.body.case24: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) +// CHECK3: omp_section_loop.body.case23: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) // CHECK3-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] -// CHECK3: omp_section_loop.body.case24.cncl.fini: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case24.cncl: -// CHECK3-NEXT: br label [[SECTION_FINI35:%.*]] -// CHECK3: omp_section_loop.body.case24.cont: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case24.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body.case26: +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case23.split: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case23.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body.case25: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] -// CHECK3: omp_section_loop.body.case26.cncl.fini: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case26.cncl: -// CHECK3-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] -// CHECK3: omp_section_loop.body.case26.cont: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] -// CHECK3: omp_section_loop.body.case26.sectionfini: -// CHECK3-NEXT: br label [[OMP_REGION_FINALIZE]] -// CHECK3: omp_region.finalize: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case26.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body17.sections.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC18]] -// CHECK3: omp_section_loop.inc18: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] -// CHECK3: omp_section_loop.exit19: +// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case25.split: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] +// CHECK3: omp_section_loop.body.case25.section.after26: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case25.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body16.sections.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC17]] +// CHECK3: omp_section_loop.inc17: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] +// CHECK3: omp_section_loop.exit18: // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] -// CHECK3: omp_section_loop.after20: -// CHECK3-NEXT: br label [[SECTION_FINISH34:%.*]] -// CHECK3: section_finish34: -// CHECK3-NEXT: br label [[DOTOMPFINALIZE36:%.*]] -// CHECK3: .ompfinalize36: -// CHECK3-NEXT: br label [[SECTION_FINI35]] -// CHECK3: section_fini35: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] +// CHECK3: omp_section_loop.after19: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] +// CHECK3: omp_section_loop.after19sections.fini: // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK3-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 -// CHECK3-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: // CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -1526,23 +1504,29 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK3-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK3-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 +// CHECK3-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK3-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK3-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) // CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK3-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK3: omp_section_loop.body.case.cncl: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] +// CHECK3: omp_section_loop.body.case23.cncl: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] +// CHECK3: omp_section_loop.body.case25.cncl: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -1553,30 +1537,30 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK3-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: omp.precond.end: // CHECK3-NEXT: br label [[CANCEL_CONT]] // CHECK3: cancel.cont: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK3-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK3-NEXT: store i32 0, i32* [[R]], align 4 @@ -1600,54 +1584,50 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.par.region: // CHECK3-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] -// CHECK3: omp.par.region1: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 -// CHECK3-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK3-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 -// CHECK3-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] -// CHECK3: omp.par.region1.cncl.fini: -// CHECK3-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] -// CHECK3: omp.par.region1.cncl: -// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK3: omp.par.pre_finalize: -// CHECK3-NEXT: br label [[DOTOMPFINALIZE:%.*]] -// CHECK3: .ompfinalize: +// CHECK3-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] +// CHECK3: 3: +// CHECK3-NEXT: br label [[TMP4:%.*]] +// CHECK3: 4: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK3-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] +// CHECK3: .cncl5: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK3: omp.par.region1.cont: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] +// CHECK3: .cont: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK3-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK3-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK3: omp.par.region.parallel.after: -// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK3: omp.par.region.if: +// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK3: omp.par.pre_finalize: +// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK3: 14: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) +// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] +// CHECK3: .cncl: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) -// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] -// CHECK3: omp.par.region.if.cncl.fini: -// CHECK3-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] -// CHECK3: omp.par.region.if.cncl: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK3: omp.par.region.if.cont: -// CHECK3-NEXT: br label [[OMP_PAR_REGION1]] +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK3: .split: +// CHECK3-NEXT: br label [[TMP4]] // CHECK3: omp.par.outlined.exit.exitStub: // CHECK3-NEXT: ret void // @@ -1736,14 +1716,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK3: .cancel.exit: -// CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK3: .cancel.continue: +// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK3: .omp.sections.case.cncl: +// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1752,13 +1732,13 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK3-NEXT: br label [[CANCEL_CONT]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // @@ -1779,7 +1759,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -1800,23 +1780,25 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK3: .cancel.exit: -// CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK3: .cancel.continue: +// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK3: .omp.sections.case.cncl: +// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.case2: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] -// CHECK3: .cancel.exit4: -// CHECK3-NEXT: br label [[CANCEL_EXIT]] -// CHECK3: .cancel.continue5: +// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 +// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] +// CHECK3: .omp.sections.case2.split: +// CHECK3-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] +// CHECK3: .omp.sections.case2.section.after: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK3: .omp.sections.case2.cncl: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1825,14 +1807,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) -// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK3-NEXT: br label [[CANCEL_CONT]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // // @@ -1879,7 +1861,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -1907,7 +1889,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK3-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -1929,14 +1911,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK3-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -1946,10 +1928,10 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.reduction.case2: @@ -2006,13 +1988,13 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I39:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I36:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -2059,12 +2041,8 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] -// CHECK4: omp_section_loop.body.case.cncl.fini: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case.cncl: -// CHECK4-NEXT: br label [[SECTION_FINI:%.*]] -// CHECK4: omp_section_loop.body.case.cont: +// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case.split: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK4: omp_section_loop.body.case.section.after: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -2079,14 +2057,10 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK4: omp_section_loop.after: -// CHECK4-NEXT: br label [[SECTION_FINISH:%.*]] -// CHECK4: section_finish: -// CHECK4-NEXT: br label [[DOTOMPFINALIZE13:%.*]] -// CHECK4: .ompfinalize13: -// CHECK4-NEXT: br label [[SECTION_FINI]] -// CHECK4: section_fini: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] -// CHECK4: omp_section_loop.preheader14: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] +// CHECK4: omp_section_loop.aftersections.fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] +// CHECK4: omp_section_loop.preheader13: // CHECK4-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK4-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK4-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -2096,93 +2070,79 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK4-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] -// CHECK4: omp_section_loop.header15: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] -// CHECK4: omp_section_loop.cond16: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] -// CHECK4-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] -// CHECK4: omp_section_loop.body17: -// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] +// CHECK4: omp_section_loop.header14: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] +// CHECK4: omp_section_loop.cond15: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] +// CHECK4-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] +// CHECK4: omp_section_loop.body16: +// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] // CHECK4-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK4-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK4-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ -// CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] -// CHECK4-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK4-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ +// CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] +// CHECK4-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] // CHECK4-NEXT: ] -// CHECK4: omp_section_loop.body.case24: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) +// CHECK4: omp_section_loop.body.case23: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) // CHECK4-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK4-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] -// CHECK4: omp_section_loop.body.case24.cncl.fini: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case24.cncl: -// CHECK4-NEXT: br label [[SECTION_FINI35:%.*]] -// CHECK4: omp_section_loop.body.case24.cont: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] -// CHECK4: omp_section_loop.body.case24.section.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK4: omp_section_loop.body.case26: +// CHECK4-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case23.split: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case23.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body.case25: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK4-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK4-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] -// CHECK4: omp_section_loop.body.case26.cncl.fini: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case26.cncl: -// CHECK4-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] -// CHECK4: omp_section_loop.body.case26.cont: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] -// CHECK4: omp_section_loop.body.case26.sectionfini: -// CHECK4-NEXT: br label [[OMP_REGION_FINALIZE]] -// CHECK4: omp_region.finalize: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] -// CHECK4: omp_section_loop.body.case26.section.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK4: omp_section_loop.body17.sections.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC18]] -// CHECK4: omp_section_loop.inc18: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] -// CHECK4: omp_section_loop.exit19: +// CHECK4-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case25.split: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] +// CHECK4: omp_section_loop.body.case25.section.after26: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case25.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body16.sections.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC17]] +// CHECK4: omp_section_loop.inc17: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] +// CHECK4: omp_section_loop.exit18: // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] -// CHECK4: omp_section_loop.after20: -// CHECK4-NEXT: br label [[SECTION_FINISH34:%.*]] -// CHECK4: section_finish34: -// CHECK4-NEXT: br label [[DOTOMPFINALIZE36:%.*]] -// CHECK4: .ompfinalize36: -// CHECK4-NEXT: br label [[SECTION_FINI35]] -// CHECK4: section_fini35: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] +// CHECK4: omp_section_loop.after19: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] +// CHECK4: omp_section_loop.after19sections.fini: // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK4-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK4-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 -// CHECK4-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK4-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK4-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: // CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -2196,23 +2156,29 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.inner.for.cond: // CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK4-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK4-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK4-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 +// CHECK4-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 // CHECK4-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK4-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK4-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK4-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK4-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) // CHECK4-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK4-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK4: .cancel.exit: // CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK4: omp_section_loop.body.case.cncl: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] +// CHECK4: omp_section_loop.body.case23.cncl: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] +// CHECK4: omp_section_loop.body.case25.cncl: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -2223,30 +2189,30 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: // CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK4-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK4-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: omp.precond.end: // CHECK4-NEXT: br label [[CANCEL_CONT]] // CHECK4: cancel.cont: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK4-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK4-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK4-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK4-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK4-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK4-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK4-NEXT: store i32 0, i32* [[R]], align 4 @@ -2270,54 +2236,50 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.par.region: // CHECK4-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK4-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] -// CHECK4: omp.par.region1: -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK4-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 -// CHECK4-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK4-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 -// CHECK4-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] -// CHECK4: omp.par.region1.cncl.fini: -// CHECK4-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] -// CHECK4: omp.par.region1.cncl: -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[DOTOMPFINALIZE:%.*]] -// CHECK4: .ompfinalize: +// CHECK4-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] +// CHECK4: 3: +// CHECK4-NEXT: br label [[TMP4:%.*]] +// CHECK4: 4: +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 +// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK4-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK4-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK4-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] +// CHECK4: .cncl5: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK4: omp.par.region1.cont: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 -// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK4-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] +// CHECK4: .cont: +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 +// CHECK4-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK4-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK4-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK4-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK4-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK4: omp.par.region.parallel.after: -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK4: omp.par.region.if: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK4: omp.par.pre_finalize: +// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK4: 14: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) +// CHECK4-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +// CHECK4-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] +// CHECK4: .cncl: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) -// CHECK4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 -// CHECK4-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] -// CHECK4: omp.par.region.if.cncl.fini: -// CHECK4-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] -// CHECK4: omp.par.region.if.cncl: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK4: omp.par.region.if.cont: -// CHECK4-NEXT: br label [[OMP_PAR_REGION1]] +// CHECK4-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK4: .split: +// CHECK4-NEXT: br label [[TMP4]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // @@ -2406,14 +2368,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK4-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK4: .cancel.exit: -// CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK4: .cancel.continue: +// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK4: .omp.sections.case.split: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK4: .omp.sections.case.cncl: +// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: @@ -2422,13 +2384,13 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK4-NEXT: br label [[CANCEL_CONT]] // CHECK4: cancel.cont: // CHECK4-NEXT: ret void // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK4-NEXT: br label [[CANCEL_CONT]] // @@ -2449,7 +2411,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -2470,23 +2432,25 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK4-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK4: .cancel.exit: -// CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK4: .cancel.continue: +// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK4: .omp.sections.case.split: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK4: .omp.sections.case.cncl: +// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.sections.case2: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK4-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK4-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] -// CHECK4: .cancel.exit4: -// CHECK4-NEXT: br label [[CANCEL_EXIT]] -// CHECK4: .cancel.continue5: +// CHECK4-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 +// CHECK4-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] +// CHECK4: .omp.sections.case2.split: +// CHECK4-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] +// CHECK4: .omp.sections.case2.section.after: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK4: .omp.sections.case2.cncl: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: @@ -2495,14 +2459,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) -// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK4-NEXT: br label [[CANCEL_CONT]] // CHECK4: cancel.cont: // CHECK4-NEXT: ret void // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) // CHECK4-NEXT: br label [[CANCEL_CONT]] // // @@ -2549,7 +2513,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -2577,7 +2541,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK4-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK4-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -2599,14 +2563,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK4-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK4-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK4-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2616,10 +2580,10 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK4-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.reduction.case2: @@ -3916,13 +3880,13 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I39:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I36:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -3969,12 +3933,8 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] -// CHECK9: omp_section_loop.body.case.cncl.fini: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case.cncl: -// CHECK9-NEXT: br label [[SECTION_FINI:%.*]] -// CHECK9: omp_section_loop.body.case.cont: +// CHECK9-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case.split: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK9: omp_section_loop.body.case.section.after: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -3989,14 +3949,10 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK9: omp_section_loop.after: -// CHECK9-NEXT: br label [[SECTION_FINISH:%.*]] -// CHECK9: section_finish: -// CHECK9-NEXT: br label [[DOTOMPFINALIZE13:%.*]] -// CHECK9: .ompfinalize13: -// CHECK9-NEXT: br label [[SECTION_FINI]] -// CHECK9: section_fini: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] -// CHECK9: omp_section_loop.preheader14: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] +// CHECK9: omp_section_loop.aftersections.fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] +// CHECK9: omp_section_loop.preheader13: // CHECK9-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK9-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK9-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -4006,93 +3962,79 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK9-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] -// CHECK9: omp_section_loop.header15: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] -// CHECK9: omp_section_loop.cond16: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] -// CHECK9-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] -// CHECK9: omp_section_loop.body17: -// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] +// CHECK9: omp_section_loop.header14: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] +// CHECK9: omp_section_loop.cond15: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] +// CHECK9-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] +// CHECK9: omp_section_loop.body16: +// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] // CHECK9-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK9-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK9-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ -// CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] -// CHECK9-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK9-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ +// CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] +// CHECK9-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] // CHECK9-NEXT: ] -// CHECK9: omp_section_loop.body.case24: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) +// CHECK9: omp_section_loop.body.case23: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) // CHECK9-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] -// CHECK9: omp_section_loop.body.case24.cncl.fini: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case24.cncl: -// CHECK9-NEXT: br label [[SECTION_FINI35:%.*]] -// CHECK9: omp_section_loop.body.case24.cont: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] -// CHECK9: omp_section_loop.body.case24.section.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK9: omp_section_loop.body.case26: +// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case23.split: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case23.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body.case25: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK9-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] -// CHECK9: omp_section_loop.body.case26.cncl.fini: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case26.cncl: -// CHECK9-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] -// CHECK9: omp_section_loop.body.case26.cont: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] -// CHECK9: omp_section_loop.body.case26.sectionfini: -// CHECK9-NEXT: br label [[OMP_REGION_FINALIZE]] -// CHECK9: omp_region.finalize: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] -// CHECK9: omp_section_loop.body.case26.section.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK9: omp_section_loop.body17.sections.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC18]] -// CHECK9: omp_section_loop.inc18: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] -// CHECK9: omp_section_loop.exit19: +// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case25.split: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] +// CHECK9: omp_section_loop.body.case25.section.after26: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case25.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body16.sections.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC17]] +// CHECK9: omp_section_loop.inc17: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] +// CHECK9: omp_section_loop.exit18: // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] -// CHECK9: omp_section_loop.after20: -// CHECK9-NEXT: br label [[SECTION_FINISH34:%.*]] -// CHECK9: section_finish34: -// CHECK9-NEXT: br label [[DOTOMPFINALIZE36:%.*]] -// CHECK9: .ompfinalize36: -// CHECK9-NEXT: br label [[SECTION_FINI35]] -// CHECK9: section_fini35: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] +// CHECK9: omp_section_loop.after19: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] +// CHECK9: omp_section_loop.after19sections.fini: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK9-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 -// CHECK9-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK9-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -4106,23 +4048,29 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK9-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK9-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK9-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK9-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) // CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK9-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: // CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK9: omp_section_loop.body.case.cncl: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] +// CHECK9: omp_section_loop.body.case23.cncl: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] +// CHECK9: omp_section_loop.body.case25.cncl: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: @@ -4133,30 +4081,30 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK9-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK9-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: omp.precond.end: // CHECK9-NEXT: br label [[CANCEL_CONT]] // CHECK9: cancel.cont: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK9-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK9-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK9-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK9-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK9-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: store i32 0, i32* [[R]], align 4 @@ -4180,54 +4128,50 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.par.region: // CHECK9-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] -// CHECK9: omp.par.region1: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 -// CHECK9-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK9-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 -// CHECK9-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] -// CHECK9: omp.par.region1.cncl.fini: -// CHECK9-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] -// CHECK9: omp.par.region1.cncl: -// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK9: omp.par.pre_finalize: -// CHECK9-NEXT: br label [[DOTOMPFINALIZE:%.*]] -// CHECK9: .ompfinalize: +// CHECK9-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] +// CHECK9: 3: +// CHECK9-NEXT: br label [[TMP4:%.*]] +// CHECK9: 4: +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 +// CHECK9-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK9-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK9-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK9-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] +// CHECK9: .cncl5: // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK9: omp.par.region1.cont: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] +// CHECK9: .cont: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 +// CHECK9-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK9-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK9-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK9-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK9: omp.par.region.parallel.after: -// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK9: omp.par.region.if: +// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK9: omp.par.pre_finalize: +// CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK9: 14: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) +// CHECK9-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] +// CHECK9: .cncl: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) -// CHECK9-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 -// CHECK9-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] -// CHECK9: omp.par.region.if.cncl.fini: -// CHECK9-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] -// CHECK9: omp.par.region.if.cncl: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK9: omp.par.region.if.cont: -// CHECK9-NEXT: br label [[OMP_PAR_REGION1]] +// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK9: .split: +// CHECK9-NEXT: br label [[TMP4]] // CHECK9: omp.par.outlined.exit.exitStub: // CHECK9-NEXT: ret void // @@ -4316,14 +4260,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.sections.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK9-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK9: .cancel.exit: -// CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK9: .cancel.continue: +// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK9: .omp.sections.case.split: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK9: .omp.sections.case.cncl: +// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.sections.exit: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: @@ -4332,13 +4276,13 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK9-NEXT: br label [[CANCEL_CONT]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // @@ -4359,7 +4303,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -4380,23 +4324,25 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.sections.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK9-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK9: .cancel.exit: -// CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK9: .cancel.continue: +// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK9: .omp.sections.case.split: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK9: .omp.sections.case.cncl: +// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.sections.case2: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] -// CHECK9: .cancel.exit4: -// CHECK9-NEXT: br label [[CANCEL_EXIT]] -// CHECK9: .cancel.continue5: +// CHECK9-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] +// CHECK9: .omp.sections.case2.split: +// CHECK9-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] +// CHECK9: .omp.sections.case2.section.after: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK9: .omp.sections.case2.cncl: +// CHECK9-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK9: .omp.sections.exit: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: @@ -4405,14 +4351,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) -// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK9-NEXT: br label [[CANCEL_CONT]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // // @@ -4459,7 +4405,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -4487,7 +4433,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK9-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -4509,14 +4455,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK9-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK9-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK9-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -4526,10 +4472,10 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.reduction.case2: @@ -4586,13 +4532,13 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[I39:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I36:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK10-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -4639,12 +4585,8 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] -// CHECK10: omp_section_loop.body.case.cncl.fini: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case.cncl: -// CHECK10-NEXT: br label [[SECTION_FINI:%.*]] -// CHECK10: omp_section_loop.body.case.cont: +// CHECK10-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case.split: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK10: omp_section_loop.body.case.section.after: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -4659,14 +4601,10 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK10: omp_section_loop.after: -// CHECK10-NEXT: br label [[SECTION_FINISH:%.*]] -// CHECK10: section_finish: -// CHECK10-NEXT: br label [[DOTOMPFINALIZE13:%.*]] -// CHECK10: .ompfinalize13: -// CHECK10-NEXT: br label [[SECTION_FINI]] -// CHECK10: section_fini: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] -// CHECK10: omp_section_loop.preheader14: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] +// CHECK10: omp_section_loop.aftersections.fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] +// CHECK10: omp_section_loop.preheader13: // CHECK10-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK10-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK10-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -4676,93 +4614,79 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK10-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] -// CHECK10: omp_section_loop.header15: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] -// CHECK10: omp_section_loop.cond16: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] -// CHECK10-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] -// CHECK10: omp_section_loop.body17: -// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] +// CHECK10: omp_section_loop.header14: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] +// CHECK10: omp_section_loop.cond15: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] +// CHECK10-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] +// CHECK10: omp_section_loop.body16: +// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] // CHECK10-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK10-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK10-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ -// CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] -// CHECK10-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK10-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ +// CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] +// CHECK10-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] // CHECK10-NEXT: ] -// CHECK10: omp_section_loop.body.case24: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) +// CHECK10: omp_section_loop.body.case23: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) // CHECK10-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK10-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] -// CHECK10: omp_section_loop.body.case24.cncl.fini: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case24.cncl: -// CHECK10-NEXT: br label [[SECTION_FINI35:%.*]] -// CHECK10: omp_section_loop.body.case24.cont: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] -// CHECK10: omp_section_loop.body.case24.section.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK10: omp_section_loop.body.case26: +// CHECK10-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case23.split: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case23.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body.case25: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK10-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK10-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] -// CHECK10: omp_section_loop.body.case26.cncl.fini: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case26.cncl: -// CHECK10-NEXT: br label [[OMP_REGION_FINALIZE:%.*]] -// CHECK10: omp_section_loop.body.case26.cont: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] -// CHECK10: omp_section_loop.body.case26.sectionfini: -// CHECK10-NEXT: br label [[OMP_REGION_FINALIZE]] -// CHECK10: omp_region.finalize: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] -// CHECK10: omp_section_loop.body.case26.section.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK10: omp_section_loop.body17.sections.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC18]] -// CHECK10: omp_section_loop.inc18: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] -// CHECK10: omp_section_loop.exit19: +// CHECK10-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case25.split: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] +// CHECK10: omp_section_loop.body.case25.section.after26: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case25.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body16.sections.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC17]] +// CHECK10: omp_section_loop.inc17: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] +// CHECK10: omp_section_loop.exit18: // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] -// CHECK10: omp_section_loop.after20: -// CHECK10-NEXT: br label [[SECTION_FINISH34:%.*]] -// CHECK10: section_finish34: -// CHECK10-NEXT: br label [[DOTOMPFINALIZE36:%.*]] -// CHECK10: .ompfinalize36: -// CHECK10-NEXT: br label [[SECTION_FINI35]] -// CHECK10: section_fini35: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] +// CHECK10: omp_section_loop.after19: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] +// CHECK10: omp_section_loop.after19sections.fini: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK10-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK10-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK10-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK10-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK10-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK10-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 -// CHECK10-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK10-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -4776,23 +4700,29 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK10-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK10-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 +// CHECK10-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] +// CHECK10-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 // CHECK10-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK10-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK10-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK10-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK10-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) // CHECK10-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK10-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK10: .cancel.exit: // CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK10: omp_section_loop.body.case.cncl: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] +// CHECK10: omp_section_loop.body.case23.cncl: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] +// CHECK10: omp_section_loop.body.case25.cncl: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: @@ -4803,30 +4733,30 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK10-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK10-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: omp.precond.end: // CHECK10-NEXT: br label [[CANCEL_CONT]] // CHECK10: cancel.cont: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK10-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK10-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK10-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK10-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK10-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK10-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: store i32 0, i32* [[R]], align 4 @@ -4850,54 +4780,50 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.par.region: // CHECK10-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] -// CHECK10: omp.par.region1: -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 -// CHECK10-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK10-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 -// CHECK10-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK10-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 -// CHECK10-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] -// CHECK10: omp.par.region1.cncl.fini: -// CHECK10-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] -// CHECK10: omp.par.region1.cncl: -// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK10: omp.par.pre_finalize: -// CHECK10-NEXT: br label [[DOTOMPFINALIZE:%.*]] -// CHECK10: .ompfinalize: +// CHECK10-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] +// CHECK10: 3: +// CHECK10-NEXT: br label [[TMP4:%.*]] +// CHECK10: 4: +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 +// CHECK10-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK10-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK10-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK10-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK10-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] +// CHECK10: .cncl5: // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK10: omp.par.region1.cont: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 -// CHECK10-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK10-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] +// CHECK10: .cont: +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 +// CHECK10-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK10-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK10-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK10-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK10-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK10-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK10: omp.par.region.parallel.after: -// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK10: omp.par.region.if: +// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK10: omp.par.pre_finalize: +// CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK10: 14: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) +// CHECK10-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +// CHECK10-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] +// CHECK10: .cncl: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) -// CHECK10-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 -// CHECK10-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] -// CHECK10: omp.par.region.if.cncl.fini: -// CHECK10-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] -// CHECK10: omp.par.region.if.cncl: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK10: omp.par.region.if.cont: -// CHECK10-NEXT: br label [[OMP_PAR_REGION1]] +// CHECK10-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK10: .split: +// CHECK10-NEXT: br label [[TMP4]] // CHECK10: omp.par.outlined.exit.exitStub: // CHECK10-NEXT: ret void // @@ -4986,14 +4912,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK10-NEXT: ] // CHECK10: .omp.sections.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK10-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK10: .cancel.exit: -// CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK10: .cancel.continue: +// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK10: .omp.sections.case.split: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK10: .omp.sections.case.cncl: +// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.sections.exit: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: @@ -5002,13 +4928,13 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK10-NEXT: br label [[CANCEL_CONT]] // CHECK10: cancel.cont: // CHECK10-NEXT: ret void // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK10-NEXT: br label [[CANCEL_CONT]] // @@ -5029,7 +4955,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -5050,23 +4976,25 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK10-NEXT: ] // CHECK10: .omp.sections.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK10-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK10: .cancel.exit: -// CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK10: .cancel.continue: +// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK10: .omp.sections.case.split: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK10: .omp.sections.case.cncl: +// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.sections.case2: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK10-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK10-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] -// CHECK10: .cancel.exit4: -// CHECK10-NEXT: br label [[CANCEL_EXIT]] -// CHECK10: .cancel.continue5: +// CHECK10-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 +// CHECK10-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] +// CHECK10: .omp.sections.case2.split: +// CHECK10-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] +// CHECK10: .omp.sections.case2.section.after: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK10: .omp.sections.case2.cncl: +// CHECK10-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK10: .omp.sections.exit: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: @@ -5075,14 +5003,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) -// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK10-NEXT: br label [[CANCEL_CONT]] // CHECK10: cancel.cont: // CHECK10-NEXT: ret void // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) // CHECK10-NEXT: br label [[CANCEL_CONT]] // // @@ -5129,7 +5057,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK10-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -5157,7 +5085,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK10-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK10-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK10-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -5179,14 +5107,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK10-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK10-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK10-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK10-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK10-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK10-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK10-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK10-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK10-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -5196,10 +5124,10 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK10-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK10-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK10-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK10-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK10-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.reduction.case2: diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 223163f0c359f..38ebb28e6ea5e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -103,8 +103,9 @@ class OpenMPIRBuilder { /// A finalize callback knows about all objects that need finalization, e.g. /// destruction, when the scope of the currently generated construct is left /// at the time, and location, the callback is invoked. - using FinalizeCallbackTy = std::function; + using FinalizeCallbackTy = function_ref; +private: enum class RegionKind { /// Sentinel object so we don't always have to check whether the stack is /// empty. @@ -121,7 +122,7 @@ class OpenMPIRBuilder { struct OMPRegionInfo; /// An irregular exit out of a region, such as by cancellation. - struct OMPRegionBreak { + struct OMPRegionBreakInfo { /// The end of this basic block is current end of the path for breaking out of the region. Must have no terminator so finializations (eg. destructors) can be appended until rejoining at the end of the target region. BasicBlock *BB; @@ -131,7 +132,7 @@ class OpenMPIRBuilder { /// The kind of region that is being exited. Control flow will rejoin after the innermost region of this kind. OMPRegionInfo* Target; - OMPRegionBreak(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo* Target); + OMPRegionBreakInfo(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo* Target); /// Consistency self-check. void assertOK() const; @@ -152,7 +153,7 @@ class OpenMPIRBuilder { bool IsCancellable; /// Irregular exits (such as cancellation points) out of this region. - SmallVector Breaks; + SmallVector Breaks; OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable ); @@ -169,7 +170,7 @@ class OpenMPIRBuilder { void assertOK() const; }; -private: + /// The stack of regions surrounding the current in-progress code generation location. Regions are pushed and popped when entering/leaving a region. Constructs/directives that are sensitive to surrounding regions (such as cancellation) must be emitted inside the BodyGenCallbackTy of the surrounding constructs. SmallVector, 8> RegionStack; @@ -311,20 +312,18 @@ class OpenMPIRBuilder { /// Generator for '#omp parallel' /// /// \param Loc The insert and source location description. - /// \param OuterAllocaIP The insertion points to be used for alloca - /// instructions. \param BodyGenCB Callback that will generate the region - /// code. \param PrivCB Callback to copy a given variable (think copy - /// constructor). \param FiniCB Callback to finalize variable copies. \param - /// IfCondition The evaluated 'if' clause expression, if any. \param - /// NumThreads The evaluated 'num_threads' clause expression, if any. \param - /// ProcBind The value of the 'proc_bind' clause (see ProcBindKind). - /// + /// \param AllocaIP The insertion points to be used for alloca instructions. + /// \param BodyGenCB Callback that will generate the region code. + /// \param PrivCB Callback to copy a given variable (think copy constructor). + /// \param FiniCB Callback to finalize variable copies. + /// \param IfCondition The evaluated 'if' clause expression, if any. + /// \param NumThreads The evaluated 'num_threads' clause expression, if any. + /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). /// \param IsCancellable Flag to indicate a cancellable parallel region. - /// MK: Remove? Any non-cancellable? Makes it a difference to the runtime? /// /// \returns The insertion position *after* the parallel. IRBuilder<>::InsertPoint - createParallel(const LocationDescription &Loc, InsertPointTy OuterAllocaIP, + createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, @@ -493,7 +492,8 @@ class OpenMPIRBuilder { /// /// \returns Point where to insert code after the workshare construct. InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, bool NeedsBarrier); + InsertPointTy AllocaIP, + bool NeedsBarrier); /// Modifies the canonical loop a statically-scheduled workshare loop with a /// user-specified chunk size. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 6194efbffcc6e..7615607385efc 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -671,32 +671,37 @@ Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { -OpenMPIRBuilder::OMPRegionBreak::OMPRegionBreak(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo* Target) +OpenMPIRBuilder::OMPRegionBreakInfo::OMPRegionBreakInfo(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo* Target) : BB(BB), Reason(Reason), Target(Target) { -#ifndef NDEBUG assertOK(); -#endif } -void OpenMPIRBuilder ::OMPRegionBreak:: assertOK() const { +void OpenMPIRBuilder ::OMPRegionBreakInfo:: assertOK() const { #ifndef NDEBUG - assert(!BB->getTerminator()); - - switch (Reason) { - case OMPD_cancellation_point: - case OMPD_cancel: - case OMPD_barrier: - break; - default: - llvm_unreachable("unexpected region break reason"); - } + assert(!BB->getTerminator() && "Pending irregular exit must be amendable"); assert(Target); switch (Target->DK) { case OMPD_parallel: + switch (Reason) { + case OMPD_cancellation_point: + case OMPD_cancel: + case OMPD_barrier: + break; + default: + llvm_unreachable("Unexpected region break reason for parallel construct"); + } + break; case OMPD_sections: + switch (Reason) { + case OMPD_cancellation_point: + case OMPD_cancel: + break; + default: + llvm_unreachable("Unexpected region break reason for sections construct"); + } break; default: llvm_unreachable("unexpected region break target"); @@ -709,9 +714,7 @@ void OpenMPIRBuilder ::OMPRegionBreak:: assertOK() const { OpenMPIRBuilder::OMPRegionInfo::OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable ) : Kind(Kind), DK(DK), IsCancellable(IsCancellable) { -#ifndef NDEBUF assertOK(); -#endif } @@ -723,25 +726,27 @@ void OpenMPIRBuilder::OMPRegionInfo::assertOK() const { assert(!IsCancellable && "top-level is not cancellable"); break; case RegionKind::CanonicalLoop: + // TODO break; case RegionKind::Directive: switch (DK) { case OMPD_parallel: - case OMPD_sections: case OMPD_single: + case OMPD_sections: + case OMPD_section: + case OMPD_single: case OMPD_master: case OMPD_masked: case OMPD_critical: case OMPD_ordered: break; default: - llvm_unreachable("Not a recognized OpenMP region"); + llvm_unreachable("Not a recognized OpenMP construct with SESE region"); } break; } - for (auto &B:Breaks) { - B.assertOK(); - } + for (const OMPRegionBreakInfo &Break:Breaks) + Break.assertOK(); #endif } @@ -759,18 +764,7 @@ OpenMPIRBuilder::getInnermostRegion(omp::Directive DK) { bool OpenMPIRBuilder::isLastFinalizationInfoCancellable(omp::Directive DK) { - auto MatchingRegion = getInnermostRegion(DK); -#ifndef NDEBUG - if (MatchingRegion->Kind != RegionKind::Toplevel) { - for (auto &R : reverse(RegionStack)) { - if (R->Kind == RegionKind::Directive && R->DK == DK) - break; - - assert(R->IsCancellable && "Every region in-between must be cancellable as well"); - } - } -#endif - return MatchingRegion->IsCancellable; + return getInnermostRegion(DK)->IsCancellable; } @@ -786,8 +780,7 @@ OpenMPIRBuilder::enterRegion( OpenMPIRBuilder::RegionKind Kind, omp::Directive D -void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, - function_ref FinCB) { +void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, FinalizeCallbackTy FinCB) { auto DK = R->DK; assert(RegionStack.back().get() == R && "balanced region push/pop required"); R->assertOK(); @@ -796,28 +789,29 @@ void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, OMPRegionInfo *Innermost = RegionStack.back().get(); OMPRegionInfo *NewInnermost = RegionStack.rbegin()[1].get(); - for (auto &B : reverse(Innermost->Breaks)) { + + for (OMPRegionBreakInfo &B : reverse(Innermost->Breaks)) { assert(!B.BB->getTerminator()); + assert(Innermost->IsCancellable && "surrounding region must be cancellable"); Builder.SetInsertPoint(B.BB); if (B.Target == Innermost) { // Join common finialization block Builder.SetInsertPoint(B.BB); BranchInst *TI = Builder.CreateBr(FinalizationBB); - // if (LeaveCb) - // LeaveCb( InsertPointTy(TI->getParent(), TI->getIterator()), - // B.Reason, Innermost); B.BB = nullptr; } else if (FinCB) { + // Emit dedicated fininalization since we cannot use use the one for the regular exit. + // TODO: Implement switch-on-source-bb-index scheme like Clang's EmitBranchThroughCleanup does. B.BB = splitBB(Builder, true, ".fini"); FinCB(Builder.saveIP()); Builder.SetInsertPoint(B.BB); } } - for (OMPRegionBreak &B : Innermost->Breaks) { - if (B.BB) - NewInnermost->addBreak(B.BB, B.Reason, B.Target); + for (OMPRegionBreakInfo &Break : Innermost->Breaks) { + if (Break.BB) + NewInnermost->addBreak(Break.BB, Break.Reason, Break.Target); } Innermost->Breaks.clear(); @@ -825,6 +819,7 @@ void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, } + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK, bool ForceSimpleCall, bool CheckCancelFlag) { @@ -833,6 +828,7 @@ OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK, return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag); } + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, bool ForceSimpleCall, bool CheckCancelFlag) { @@ -889,15 +885,7 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, if (!updateToLocation(Loc)) return Loc.IP; -#if 0 - // LLVM utilities like blocks with terminators. - Instruction* UI ; - if (Builder.GetInsertPoint() == Builder.GetInsertBlock()->end() && !Builder.GetInsertBlock()->getTerminator()) { - UI = Builder.CreateUnreachable(); - } else { - UI = &*Builder.GetInsertPoint(); - } -#endif + BasicBlock *New = nullptr; if (IfCondition) { auto Old = Builder.GetInsertBlock(); @@ -910,12 +898,7 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Builder.SetInsertPoint(ThenBlock->getTerminator()); } -#if 0 - Instruction *ThenTI = UI, *ElseTI = nullptr; - if (IfCondition) - SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); - Builder.SetInsertPoint(ThenTI); -#endif + Value *CancelKind = nullptr; switch (CanceledDirective) { @@ -935,37 +918,24 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Value *Result = Builder.CreateCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); -#if 0 -// FIXME: This is bad for so many reasons. - // 1. Just pass IP to createBarrier - // 2. This is createParallel's task - // 3. The parallel may be nowait - // 4. There may be other omp regions in-between - auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) { - if (CanceledDirective == OMPD_parallel) { - IRBuilder<>::InsertPointGuard IPG(Builder); - Builder.restoreIP(IP); - createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), - omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, - /* CheckCancelFlag */ false); - } - }; -#endif + // The actual cancel logic is shared with others, e.g., cancel_barriers. emitCancelationCheckImpl(Loc, Result, CanceledDirective, OMPD_cancel); -#if 0 - // Update the insertion point and remove the terminator we introduced. - Builder.SetInsertPoint(UI->getParent()); - UI->eraseFromParent(); -#endif + if (New) return {New, New->begin()}; return Builder.saveIP(); } + + + + + + void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, Value *CancelFlag, omp::Directive CancelledDirective, @@ -977,26 +947,14 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, // MK: This is garbage BasicBlock *BB = Builder.GetInsertBlock(); -#if 0 - BasicBlock *NonCancellationBlock; - if (Builder.GetInsertPoint() == BB->end()) { - // TODO: This branch will not be needed once we moved to the OpenMPIRBuilder codegen completely. - NonCancellationBlock = BasicBlock::Create(BB->getContext(), BB->getName() + ".cont", BB->getParent()); - } else { - NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint()); - BB->getTerminator()->eraseFromParent(); - Builder.SetInsertPoint(BB); - } -#endif + // Avoid assertions around "fallthtrough" cleanups in clang. // BasicBlock *NonCancellationCleanupBlock = splitBB(Builder, BB->getName() + // ".cont.cleanup", true); BasicBlock *NonCancellationBlock = splitBBWithSuffix(Builder, false, ".cont"); -#if 0 - FinalizationInfo &FI = FinalizationStack.back(); -#endif + BasicBlock *PreCancellationBlock = BasicBlock::Create(BB->getContext(), BB->getName() + ".cncl.fini", @@ -1009,7 +967,7 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, Value *Cmp = Builder.CreateIsNull(CancelFlag); Builder.CreateCondBr(Cmp, NonCancellationBlock, PreCancellationBlock, /* TODO weight */ nullptr, nullptr); - // Builder.CreateBr( NonCancellationBlock); + // From the cancellation block we finalize all variables and go to the // post finalization block that is known to the FiniCB callback. @@ -1027,31 +985,19 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, auto CancellationIP = Builder.saveIP(); - // CancellationIP.viewCFG(); + // TODO: Clang's codegen emits finalization code only once and inserts a // switch to jump back to the target code path (CGF.EmitBranchThroughCleanup). // Currently in the OpenMPIRBuilder, we emit the finialization multiple times // for each path exiting the region (non-cancellation and each cancellation // check). -#if 0 - if (FI.FiniCB) - FI.FiniCB(CancellationIP, CanceledDirective, CancelledBy); - if (ExitCB) - ExitCB(Builder.saveIP()); - auto &FI = FinalizationStack.back(); - FI.FiniCB(Builder.saveIP()); -#endif + RegionStack.back()->addBreak(CancellationBlock, CancelledBy, getInnermostRegion(CancelledDirective) ); - // emitRegionExit({CancellationBlock, CancellationBlock->begin()}, - // getInnermostDirectionRegion(CanceledDirective), CancelledBy); - // Builder.SetInsertPoint(CancellationBlock); - // Builder.CreateBr( CancellationBlock); - // if (FI.CancelCB) - // FI.CancelCB(CancellationIP, CanceledDirective, CancelledBy); - // The continuation block is where code generation continues.s + + // The continuation block is where code generation continues. Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); // MK: needed? } @@ -2000,7 +1946,7 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, Builder.CreateBr(CL->getPreheader()); } - auto LoopRegion = enterRegion(RegionKind::CanonicalLoop, OMPD_unknown, false); + OMPRegionInfo * LoopRegion = enterRegion( RegionKind::CanonicalLoop, OMPD_unknown, /*IsCancellable*/true); // Emit the body content. We do it after connecting the loop to the CFG to // avoid that the callback encounters degenerate BBs. From 53e06f022726ccf049dbde7552a093f51ef8eec5 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 1 May 2022 03:20:45 -0500 Subject: [PATCH 24/50] regen cancel --- clang/test/OpenMP/cancel_codegen.cpp | 1304 ++++++++++++++------------ 1 file changed, 684 insertions(+), 620 deletions(-) diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp index 7cfcd563cf576..1d1bdfd534929 100644 --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -1336,13 +1336,13 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I39:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -1389,8 +1389,12 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case.split: +// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] +// CHECK3: omp_section_loop.body.case.cncl.fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case.cncl: +// CHECK3-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK3: omp_section_loop.body.case.cont: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK3: omp_section_loop.body.case.section.after: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -1405,10 +1409,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK3: omp_section_loop.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK3: omp_section_loop.aftersections.fini: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK3: omp_section_loop.preheader13: +// CHECK3-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK3: section_finish: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE13:%.*]] +// CHECK3: .ompfinalize13: +// CHECK3-NEXT: br label [[SECTION_FINI]] +// CHECK3: section_fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] +// CHECK3: omp_section_loop.preheader14: // CHECK3-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK3-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK3-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -1418,79 +1426,91 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK3-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK3: omp_section_loop.header14: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK3: omp_section_loop.cond15: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK3: omp_section_loop.body16: -// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] +// CHECK3: omp_section_loop.header15: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] +// CHECK3: omp_section_loop.cond16: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] +// CHECK3: omp_section_loop.body17: +// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] // CHECK3-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK3-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ +// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] +// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] // CHECK3-NEXT: ] -// CHECK3: omp_section_loop.body.case23: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) +// CHECK3: omp_section_loop.body.case24: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) // CHECK3-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case23.split: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case23.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body.case25: +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] +// CHECK3: omp_section_loop.body.case24.cncl.fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case24.cncl: +// CHECK3-NEXT: br label [[SECTION_FINI35:%.*]] +// CHECK3: omp_section_loop.body.case24.cont: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case24.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body.case26: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case25.split: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK3: omp_section_loop.body.case25.section.after26: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case25.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body16.sections.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK3: omp_section_loop.inc17: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK3: omp_section_loop.exit18: +// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] +// CHECK3: omp_section_loop.body.case26.cncl.fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case26.cncl: +// CHECK3-NEXT: br label [[SECTION_FINI35]] +// CHECK3: omp_section_loop.body.case26.cont: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] +// CHECK3: omp_section_loop.body.case26.sectionfini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case26.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body17.sections.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC18]] +// CHECK3: omp_section_loop.inc18: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] +// CHECK3: omp_section_loop.exit19: // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK3: omp_section_loop.after19: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK3: omp_section_loop.after19sections.fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] +// CHECK3: omp_section_loop.after20: +// CHECK3-NEXT: br label [[SECTION_FINISH34:%.*]] +// CHECK3: section_finish34: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE36:%.*]] +// CHECK3: .ompfinalize36: +// CHECK3-NEXT: br label [[SECTION_FINI35]] +// CHECK3: section_fini35: // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK3-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK3-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: // CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -1504,29 +1524,23 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK3-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK3-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK3-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK3-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK3-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) // CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK3-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK3: omp_section_loop.body.case.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK3: omp_section_loop.body.case23.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK3: omp_section_loop.body.case25.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -1537,30 +1551,30 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK3-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: omp.precond.end: // CHECK3-NEXT: br label [[CANCEL_CONT]] // CHECK3: cancel.cont: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK3-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK3-NEXT: store i32 0, i32* [[R]], align 4 @@ -1584,50 +1598,54 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.par.region: // CHECK3-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK3: 3: -// CHECK3-NEXT: br label [[TMP4:%.*]] -// CHECK3: 4: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 -// CHECK3-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK3: .cncl5: +// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK3: omp.par.region1: +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK3-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK3-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK3-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] +// CHECK3: omp.par.region1.cncl.fini: +// CHECK3-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK3: omp.par.region1.cncl: +// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK3: omp.par.pre_finalize: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3: .ompfinalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK3: .cont: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// CHECK3: omp.par.region1.cont: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] // CHECK3-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK3-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK3: omp.par.region.parallel.after: -// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK3: omp.par.pre_finalize: -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK3: 14: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK3: .cncl: +// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK3: omp.par.region.if: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK3: .split: -// CHECK3-NEXT: br label [[TMP4]] +// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK3-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] +// CHECK3: omp.par.region.if.cncl.fini: +// CHECK3-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK3: omp.par.region.if.cncl: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK3: omp.par.region.if.cont: +// CHECK3-NEXT: br label [[OMP_PAR_REGION1]] // CHECK3: omp.par.outlined.exit.exitStub: // CHECK3-NEXT: ret void // @@ -1716,14 +1734,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK3: .omp.sections.case.split: +// CHECK3-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3: .cancel.exit: +// CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case.cncl: -// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1732,13 +1750,13 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK3-NEXT: br label [[CANCEL_CONT]] +// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // @@ -1759,7 +1777,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -1780,25 +1798,23 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK3: .omp.sections.case.split: +// CHECK3-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3: .cancel.exit: +// CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case.cncl: -// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.case2: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK3: .omp.sections.case2.split: -// CHECK3-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK3: .omp.sections.case2.section.after: +// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK3: .cancel.exit4: +// CHECK3-NEXT: br label [[CANCEL_EXIT]] +// CHECK3: .cancel.continue5: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case2.cncl: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1807,14 +1823,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK3-NEXT: br label [[CANCEL_CONT]] +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // // @@ -1861,7 +1877,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -1889,7 +1905,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK3-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -1911,14 +1927,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK3-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -1928,10 +1944,10 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.reduction.case2: @@ -1988,13 +2004,13 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I39:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -2041,8 +2057,12 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case.split: +// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] +// CHECK4: omp_section_loop.body.case.cncl.fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case.cncl: +// CHECK4-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK4: omp_section_loop.body.case.cont: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK4: omp_section_loop.body.case.section.after: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -2057,10 +2077,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK4: omp_section_loop.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK4: omp_section_loop.aftersections.fini: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK4: omp_section_loop.preheader13: +// CHECK4-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK4: section_finish: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE13:%.*]] +// CHECK4: .ompfinalize13: +// CHECK4-NEXT: br label [[SECTION_FINI]] +// CHECK4: section_fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] +// CHECK4: omp_section_loop.preheader14: // CHECK4-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK4-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK4-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -2070,79 +2094,91 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK4-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK4: omp_section_loop.header14: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK4: omp_section_loop.cond15: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK4-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK4: omp_section_loop.body16: -// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] +// CHECK4: omp_section_loop.header15: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] +// CHECK4: omp_section_loop.cond16: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] +// CHECK4-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] +// CHECK4: omp_section_loop.body17: +// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] // CHECK4-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK4-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK4-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK4-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK4-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ +// CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] +// CHECK4-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] // CHECK4-NEXT: ] -// CHECK4: omp_section_loop.body.case23: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) +// CHECK4: omp_section_loop.body.case24: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) // CHECK4-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK4-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case23.split: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK4: omp_section_loop.body.case23.section.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK4: omp_section_loop.body.case25: +// CHECK4-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] +// CHECK4: omp_section_loop.body.case24.cncl.fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case24.cncl: +// CHECK4-NEXT: br label [[SECTION_FINI35:%.*]] +// CHECK4: omp_section_loop.body.case24.cont: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case24.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body.case26: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK4-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK4-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case25.split: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK4: omp_section_loop.body.case25.section.after26: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK4: omp_section_loop.body.case25.section.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK4: omp_section_loop.body16.sections.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK4: omp_section_loop.inc17: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK4: omp_section_loop.exit18: +// CHECK4-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] +// CHECK4: omp_section_loop.body.case26.cncl.fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case26.cncl: +// CHECK4-NEXT: br label [[SECTION_FINI35]] +// CHECK4: omp_section_loop.body.case26.cont: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] +// CHECK4: omp_section_loop.body.case26.sectionfini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case26.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body17.sections.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC18]] +// CHECK4: omp_section_loop.inc18: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] +// CHECK4: omp_section_loop.exit19: // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK4: omp_section_loop.after19: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK4: omp_section_loop.after19sections.fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] +// CHECK4: omp_section_loop.after20: +// CHECK4-NEXT: br label [[SECTION_FINISH34:%.*]] +// CHECK4: section_finish34: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE36:%.*]] +// CHECK4: .ompfinalize36: +// CHECK4-NEXT: br label [[SECTION_FINI35]] +// CHECK4: section_fini35: // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK4-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK4-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK4-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK4-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK4-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: // CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -2156,29 +2192,23 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.inner.for.cond: // CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK4-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK4-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK4-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK4-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 // CHECK4-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK4-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK4-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK4-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK4-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) // CHECK4-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK4-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK4: .cancel.exit: // CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK4: omp_section_loop.body.case.cncl: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK4: omp_section_loop.body.case23.cncl: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK4: omp_section_loop.body.case25.cncl: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -2189,30 +2219,30 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: // CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK4-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK4-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: omp.precond.end: // CHECK4-NEXT: br label [[CANCEL_CONT]] // CHECK4: cancel.cont: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK4-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK4-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK4-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK4-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK4-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK4-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK4-NEXT: store i32 0, i32* [[R]], align 4 @@ -2236,50 +2266,54 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.par.region: // CHECK4-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK4-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK4-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK4: 3: -// CHECK4-NEXT: br label [[TMP4:%.*]] -// CHECK4: 4: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK4-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 -// CHECK4-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK4-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK4-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK4: .cncl5: +// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK4: omp.par.region1: +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK4-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK4-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK4-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK4-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] +// CHECK4: omp.par.region1.cncl.fini: +// CHECK4-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK4: omp.par.region1.cncl: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK4: omp.par.pre_finalize: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4: .ompfinalize: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK4: .cont: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK4-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK4-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// CHECK4: omp.par.region1.cont: +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK4-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] // CHECK4-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK4-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK4: omp.par.region.parallel.after: -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK4: 14: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK4-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK4-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK4: .cncl: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK4: omp.par.region.if: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK4: .split: -// CHECK4-NEXT: br label [[TMP4]] +// CHECK4-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK4-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] +// CHECK4: omp.par.region.if.cncl.fini: +// CHECK4-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK4: omp.par.region.if.cncl: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK4: omp.par.region.if.cont: +// CHECK4-NEXT: br label [[OMP_PAR_REGION1]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // @@ -2368,14 +2402,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK4: .omp.sections.case.split: +// CHECK4-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK4: .cancel.exit: +// CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case.cncl: -// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: @@ -2384,13 +2418,13 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK4-NEXT: br label [[CANCEL_CONT]] +// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: cancel.cont: // CHECK4-NEXT: ret void // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK4-NEXT: br label [[CANCEL_CONT]] // @@ -2411,7 +2445,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -2432,25 +2466,23 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK4: .omp.sections.case.split: +// CHECK4-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK4: .cancel.exit: +// CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case.cncl: -// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.sections.case2: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK4-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK4-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK4-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK4: .omp.sections.case2.split: -// CHECK4-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK4: .omp.sections.case2.section.after: +// CHECK4-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK4-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK4: .cancel.exit4: +// CHECK4-NEXT: br label [[CANCEL_EXIT]] +// CHECK4: .cancel.continue5: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case2.cncl: -// CHECK4-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: @@ -2459,14 +2491,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK4-NEXT: br label [[CANCEL_CONT]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: cancel.cont: // CHECK4-NEXT: ret void // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK4-NEXT: br label [[CANCEL_CONT]] // // @@ -2513,7 +2545,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -2541,7 +2573,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK4-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK4-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -2563,14 +2595,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK4-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK4-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK4-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2580,10 +2612,10 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK4-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.reduction.case2: @@ -3880,13 +3912,13 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I39:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -3933,8 +3965,12 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case.split: +// CHECK9-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] +// CHECK9: omp_section_loop.body.case.cncl.fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case.cncl: +// CHECK9-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK9: omp_section_loop.body.case.cont: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK9: omp_section_loop.body.case.section.after: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -3949,10 +3985,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK9: omp_section_loop.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK9: omp_section_loop.aftersections.fini: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK9: omp_section_loop.preheader13: +// CHECK9-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK9: section_finish: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE13:%.*]] +// CHECK9: .ompfinalize13: +// CHECK9-NEXT: br label [[SECTION_FINI]] +// CHECK9: section_fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] +// CHECK9: omp_section_loop.preheader14: // CHECK9-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK9-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK9-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -3962,79 +4002,91 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK9-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK9: omp_section_loop.header14: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK9: omp_section_loop.cond15: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK9-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK9: omp_section_loop.body16: -// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] +// CHECK9: omp_section_loop.header15: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] +// CHECK9: omp_section_loop.cond16: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] +// CHECK9-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] +// CHECK9: omp_section_loop.body17: +// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] // CHECK9-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK9-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK9-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK9-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK9-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ +// CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] +// CHECK9-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] // CHECK9-NEXT: ] -// CHECK9: omp_section_loop.body.case23: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) +// CHECK9: omp_section_loop.body.case24: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) // CHECK9-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case23.split: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK9: omp_section_loop.body.case23.section.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK9: omp_section_loop.body.case25: +// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] +// CHECK9: omp_section_loop.body.case24.cncl.fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case24.cncl: +// CHECK9-NEXT: br label [[SECTION_FINI35:%.*]] +// CHECK9: omp_section_loop.body.case24.cont: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case24.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body.case26: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK9-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case25.split: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK9: omp_section_loop.body.case25.section.after26: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK9: omp_section_loop.body.case25.section.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK9: omp_section_loop.body16.sections.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK9: omp_section_loop.inc17: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK9: omp_section_loop.exit18: +// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] +// CHECK9: omp_section_loop.body.case26.cncl.fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case26.cncl: +// CHECK9-NEXT: br label [[SECTION_FINI35]] +// CHECK9: omp_section_loop.body.case26.cont: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] +// CHECK9: omp_section_loop.body.case26.sectionfini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case26.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body17.sections.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC18]] +// CHECK9: omp_section_loop.inc18: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] +// CHECK9: omp_section_loop.exit19: // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK9: omp_section_loop.after19: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK9: omp_section_loop.after19sections.fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] +// CHECK9: omp_section_loop.after20: +// CHECK9-NEXT: br label [[SECTION_FINISH34:%.*]] +// CHECK9: section_finish34: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE36:%.*]] +// CHECK9: .ompfinalize36: +// CHECK9-NEXT: br label [[SECTION_FINI35]] +// CHECK9: section_fini35: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK9-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK9-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK9-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -4048,29 +4100,23 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK9-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK9-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK9-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK9-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK9-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) // CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK9-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: // CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK9: omp_section_loop.body.case.cncl: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK9: omp_section_loop.body.case23.cncl: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK9: omp_section_loop.body.case25.cncl: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: @@ -4081,30 +4127,30 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK9-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK9-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: omp.precond.end: // CHECK9-NEXT: br label [[CANCEL_CONT]] // CHECK9: cancel.cont: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK9-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK9-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK9-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK9-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK9-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: store i32 0, i32* [[R]], align 4 @@ -4128,50 +4174,54 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.par.region: // CHECK9-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK9: 3: -// CHECK9-NEXT: br label [[TMP4:%.*]] -// CHECK9: 4: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK9-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 -// CHECK9-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK9-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK9-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK9: .cncl5: +// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK9: omp.par.region1: +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK9-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK9-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK9-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] +// CHECK9: omp.par.region1.cncl.fini: +// CHECK9-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK9: omp.par.region1.cncl: +// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK9: omp.par.pre_finalize: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK9: .ompfinalize: // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK9: .cont: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK9-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK9-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// CHECK9: omp.par.region1.cont: +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK9-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] // CHECK9-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK9-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK9-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK9: omp.par.region.parallel.after: -// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK9: omp.par.pre_finalize: -// CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK9: 14: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK9-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK9: .cncl: +// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK9: omp.par.region.if: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK9: .split: -// CHECK9-NEXT: br label [[TMP4]] +// CHECK9-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK9-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] +// CHECK9: omp.par.region.if.cncl.fini: +// CHECK9-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK9: omp.par.region.if.cncl: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK9: omp.par.region.if.cont: +// CHECK9-NEXT: br label [[OMP_PAR_REGION1]] // CHECK9: omp.par.outlined.exit.exitStub: // CHECK9-NEXT: ret void // @@ -4260,14 +4310,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.sections.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK9: .omp.sections.case.split: +// CHECK9-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9: .cancel.exit: +// CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK9: .omp.sections.case.cncl: -// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.sections.exit: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: @@ -4276,13 +4326,13 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK9-NEXT: br label [[CANCEL_CONT]] +// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // @@ -4303,7 +4353,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -4324,25 +4374,23 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.sections.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK9: .omp.sections.case.split: +// CHECK9-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9: .cancel.exit: +// CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK9: .omp.sections.case.cncl: -// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.sections.case2: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK9-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK9: .omp.sections.case2.split: -// CHECK9-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK9: .omp.sections.case2.section.after: +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK9: .cancel.exit4: +// CHECK9-NEXT: br label [[CANCEL_EXIT]] +// CHECK9: .cancel.continue5: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK9: .omp.sections.case2.cncl: -// CHECK9-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK9: .omp.sections.exit: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: @@ -4351,14 +4399,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK9-NEXT: br label [[CANCEL_CONT]] +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // // @@ -4405,7 +4453,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -4433,7 +4481,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK9-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -4455,14 +4503,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK9-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK9-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK9-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -4472,10 +4520,10 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.reduction.case2: @@ -4532,13 +4580,13 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I39:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK10-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -4585,8 +4633,12 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case.split: +// CHECK10-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] +// CHECK10: omp_section_loop.body.case.cncl.fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case.cncl: +// CHECK10-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK10: omp_section_loop.body.case.cont: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK10: omp_section_loop.body.case.section.after: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -4601,10 +4653,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK10: omp_section_loop.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK10: omp_section_loop.aftersections.fini: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK10: omp_section_loop.preheader13: +// CHECK10-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK10: section_finish: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE13:%.*]] +// CHECK10: .ompfinalize13: +// CHECK10-NEXT: br label [[SECTION_FINI]] +// CHECK10: section_fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] +// CHECK10: omp_section_loop.preheader14: // CHECK10-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK10-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK10-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -4614,79 +4670,91 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK10-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK10: omp_section_loop.header14: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK10: omp_section_loop.cond15: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK10-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK10: omp_section_loop.body16: -// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] +// CHECK10: omp_section_loop.header15: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] +// CHECK10: omp_section_loop.cond16: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] +// CHECK10-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] +// CHECK10: omp_section_loop.body17: +// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] // CHECK10-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK10-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK10-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK10-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK10-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ +// CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] +// CHECK10-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] // CHECK10-NEXT: ] -// CHECK10: omp_section_loop.body.case23: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) +// CHECK10: omp_section_loop.body.case24: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) // CHECK10-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK10-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case23.split: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK10: omp_section_loop.body.case23.section.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK10: omp_section_loop.body.case25: +// CHECK10-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] +// CHECK10: omp_section_loop.body.case24.cncl.fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case24.cncl: +// CHECK10-NEXT: br label [[SECTION_FINI35:%.*]] +// CHECK10: omp_section_loop.body.case24.cont: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case24.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body.case26: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK10-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK10-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case25.split: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK10: omp_section_loop.body.case25.section.after26: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK10: omp_section_loop.body.case25.section.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK10: omp_section_loop.body16.sections.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK10: omp_section_loop.inc17: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK10: omp_section_loop.exit18: +// CHECK10-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] +// CHECK10: omp_section_loop.body.case26.cncl.fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case26.cncl: +// CHECK10-NEXT: br label [[SECTION_FINI35]] +// CHECK10: omp_section_loop.body.case26.cont: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] +// CHECK10: omp_section_loop.body.case26.sectionfini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case26.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body17.sections.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC18]] +// CHECK10: omp_section_loop.inc18: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] +// CHECK10: omp_section_loop.exit19: // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK10: omp_section_loop.after19: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK10: omp_section_loop.after19sections.fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] +// CHECK10: omp_section_loop.after20: +// CHECK10-NEXT: br label [[SECTION_FINISH34:%.*]] +// CHECK10: section_finish34: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE36:%.*]] +// CHECK10: .ompfinalize36: +// CHECK10-NEXT: br label [[SECTION_FINI35]] +// CHECK10: section_fini35: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK10-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK10-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK10-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK10-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK10-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK10-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK10-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK10-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -4700,29 +4768,23 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK10-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK10-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK10-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] +// CHECK10-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 // CHECK10-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK10-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK10-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK10-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK10-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) // CHECK10-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK10-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK10: .cancel.exit: // CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK10: omp_section_loop.body.case.cncl: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK10: omp_section_loop.body.case23.cncl: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK10: omp_section_loop.body.case25.cncl: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: @@ -4733,30 +4795,30 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK10-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK10-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: omp.precond.end: // CHECK10-NEXT: br label [[CANCEL_CONT]] // CHECK10: cancel.cont: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK10-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK10-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK10-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK10-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK10-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK10-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: store i32 0, i32* [[R]], align 4 @@ -4780,50 +4842,54 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.par.region: // CHECK10-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK10-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK10: 3: -// CHECK10-NEXT: br label [[TMP4:%.*]] -// CHECK10: 4: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK10-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK10-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 -// CHECK10-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK10-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK10-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK10: .cncl5: +// CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK10: omp.par.region1: +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK10-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK10-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK10-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK10-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK10-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] +// CHECK10: omp.par.region1.cncl.fini: +// CHECK10-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK10: omp.par.region1.cncl: +// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK10: omp.par.pre_finalize: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK10: .ompfinalize: // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK10: .cont: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK10-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK10-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK10-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// CHECK10: omp.par.region1.cont: +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK10-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK10-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] // CHECK10-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK10-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK10-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK10: omp.par.region.parallel.after: -// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK10: omp.par.pre_finalize: -// CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK10: 14: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK10-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK10-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK10: .cncl: +// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK10: omp.par.region.if: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK10: .split: -// CHECK10-NEXT: br label [[TMP4]] +// CHECK10-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK10-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK10-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] +// CHECK10: omp.par.region.if.cncl.fini: +// CHECK10-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK10: omp.par.region.if.cncl: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] +// CHECK10: omp.par.region.if.cont: +// CHECK10-NEXT: br label [[OMP_PAR_REGION1]] // CHECK10: omp.par.outlined.exit.exitStub: // CHECK10-NEXT: ret void // @@ -4912,14 +4978,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK10-NEXT: ] // CHECK10: .omp.sections.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK10: .omp.sections.case.split: +// CHECK10-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK10: .cancel.exit: +// CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK10: .omp.sections.case.cncl: -// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.sections.exit: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: @@ -4928,13 +4994,13 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK10-NEXT: br label [[CANCEL_CONT]] +// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: cancel.cont: // CHECK10-NEXT: ret void // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK10-NEXT: br label [[CANCEL_CONT]] // @@ -4955,7 +5021,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -4976,25 +5042,23 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK10-NEXT: ] // CHECK10: .omp.sections.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK10: .omp.sections.case.split: +// CHECK10-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK10: .cancel.exit: +// CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK10: .omp.sections.case.cncl: -// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.sections.case2: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK10-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK10-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK10-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK10: .omp.sections.case2.split: -// CHECK10-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK10: .omp.sections.case2.section.after: +// CHECK10-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK10-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK10: .cancel.exit4: +// CHECK10-NEXT: br label [[CANCEL_EXIT]] +// CHECK10: .cancel.continue5: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK10: .omp.sections.case2.cncl: -// CHECK10-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK10: .omp.sections.exit: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: @@ -5003,14 +5067,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK10-NEXT: br label [[CANCEL_CONT]] +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: cancel.cont: // CHECK10-NEXT: ret void // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK10-NEXT: br label [[CANCEL_CONT]] // // @@ -5057,7 +5121,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK10-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -5085,7 +5149,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK10-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK10-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK10-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -5107,14 +5171,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK10-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK10-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK10-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK10-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK10-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK10-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK10-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK10-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK10-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -5124,10 +5188,10 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK10-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK10-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK10-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK10-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK10-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.reduction.case2: From 746fc7edc1d4161cdd1c021704e6341531906c76 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 1 May 2022 11:11:43 -0500 Subject: [PATCH 25/50] cleanup --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 7615607385efc..67071cf6434bc 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -948,9 +948,6 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, BasicBlock *BB = Builder.GetInsertBlock(); - // Avoid assertions around "fallthtrough" cleanups in clang. - // BasicBlock *NonCancellationCleanupBlock = splitBB(Builder, BB->getName() + - // ".cont.cleanup", true); BasicBlock *NonCancellationBlock = splitBBWithSuffix(Builder, false, ".cont"); From 2ac4ea238e6b8a52549170ab423e4fb0f2eafcba Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 1 May 2022 11:12:48 -0500 Subject: [PATCH 26/50] clang-format --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 93 +++++--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 216 +++++++----------- .../Frontend/OpenMPIRBuilderTest.cpp | 17 +- 3 files changed, 145 insertions(+), 181 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 38ebb28e6ea5e..1cca6fdb764ff 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -103,7 +103,7 @@ class OpenMPIRBuilder { /// A finalize callback knows about all objects that need finalization, e.g. /// destruction, when the scope of the currently generated construct is left /// at the time, and location, the callback is invoked. - using FinalizeCallbackTy = function_ref; + using FinalizeCallbackTy = function_ref; private: enum class RegionKind { @@ -121,47 +121,54 @@ class OpenMPIRBuilder { struct OMPRegionInfo; -/// An irregular exit out of a region, such as by cancellation. + /// An irregular exit out of a region, such as by cancellation. struct OMPRegionBreakInfo { -/// The end of this basic block is current end of the path for breaking out of the region. Must have no terminator so finializations (eg. destructors) can be appended until rejoining at the end of the target region. + /// The end of this basic block is current end of the path for breaking out + /// of the region. Must have no terminator so finializations (eg. + /// destructors) can be appended until rejoining at the end of the target + /// region. BasicBlock *BB; /// What triggered the break out of a region, such as a canecellation point. omp::Directive Reason; - /// The kind of region that is being exited. Control flow will rejoin after the innermost region of this kind. - OMPRegionInfo* Target; + /// The kind of region that is being exited. Control flow will rejoin after + /// the innermost region of this kind. + OMPRegionInfo *Target; - OMPRegionBreakInfo(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo* Target); + OMPRegionBreakInfo(BasicBlock *BB, omp::Directive Reason, + OMPRegionInfo *Target); /// Consistency self-check. void assertOK() const; }; - - /// An OpenMP region with a single entry and single exit (unless containing a irregular exit) that may be associated with a construct. + /// An OpenMP region with a single entry and single exit (unless containing a + /// irregular exit) that may be associated with a construct. struct OMPRegionInfo { /// The kind of region: topmost sentinel, loop, or directive. RegionKind Kind; - /// If this region represents a directive-associated region, the kind of directive. + /// If this region represents a directive-associated region, the kind of + /// directive. omp::Directive DK; /// Inside a parallel region, determines whether a barrier must check /// whether cancellation has occured. - // TODO: Do not rely on the frontend to know whether a region contains a cancellation construct, but determine within OpenMPIRBuilder itself. + // TODO: Do not rely on the frontend to know whether a region contains a + // cancellation construct, but determine within OpenMPIRBuilder itself. bool IsCancellable; /// Irregular exits (such as cancellation points) out of this region. SmallVector Breaks; - OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable ); - + OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable); /// Register an irregular exit to this region. void addBreak(BasicBlock *BB, omp::Directive Reason, - OMPRegionInfo* Target) { - assert(IsCancellable && "Only cancellable region may have irregular exits"); + OMPRegionInfo *Target) { + assert(IsCancellable && + "Only cancellable region may have irregular exits"); assert(!BB->getTerminator()); Breaks.emplace_back(BB, Reason, Target); } @@ -170,37 +177,49 @@ class OpenMPIRBuilder { void assertOK() const; }; - - /// The stack of regions surrounding the current in-progress code generation location. Regions are pushed and popped when entering/leaving a region. Constructs/directives that are sensitive to surrounding regions (such as cancellation) must be emitted inside the BodyGenCallbackTy of the surrounding constructs. + /// The stack of regions surrounding the current in-progress code generation + /// location. Regions are pushed and popped when entering/leaving a region. + /// Constructs/directives that are sensitive to surrounding regions (such as + /// cancellation) must be emitted inside the BodyGenCallbackTy of the + /// surrounding constructs. SmallVector, 8> RegionStack; - /// Return the innermost surrounding region of a specific directive kind, or the toplevel region if not present. + /// Return the innermost surrounding region of a specific directive kind, or + /// the toplevel region if not present. OMPRegionInfo *getInnermostRegion(omp::Directive DK); /// Return true if the last entry in the finalization stack is of kind \p DK /// and cancellable. bool isLastFinalizationInfoCancellable(omp::Directive DK); - /// @{ - /// Push a new region to the region stack. Must eventually be popped again using exitRegion. - OMPRegionInfo *enterRegion(RegionKind Kind, omp::Directive DK, bool IsCancellable); + /// Push a new region to the region stack. Must eventually be popped again + /// using exitRegion. + OMPRegionInfo *enterRegion(RegionKind Kind, omp::Directive DK, + bool IsCancellable); OMPRegionInfo *enterRegion(omp::Directive DK, bool IsCancellable) { - return enterRegion(RegionKind::Directive, DK, IsCancellable); + return enterRegion(RegionKind::Directive, DK, IsCancellable); } /// @} /// Pop a region from the region stack. Exits are handled the following way: - /// - /// 1. For the regular region exit, \p FinCB is used by the caller to emit finalization code somehwere on the control path exiting the region. exitRegion itself does nothing. /// - /// 2. For irregular region exits that rejoing with the control flow after this region, exitRegion emits a branch to \p FinalizationBB containing the finalization code. This is typically that same code as for case 1 avoiding emitting the same finialization code multiple times. - /// - /// 3. For irregular region exits that rejoin a surrounding region, exitRegion calls \p FinCB to insert the finalization code into the exiting control path. The irregular exit is then added as an irregular exit of the sourrounding loop that, opon its exit, can add its own finialization code and/or rejoin the control flow there. + /// 1. For the regular region exit, \p FinCB is used by the caller to emit + /// finalization code somehwere on the control path exiting the region. + /// exitRegion itself does nothing. + /// + /// 2. For irregular region exits that rejoing with the control flow after + /// this region, exitRegion emits a branch to \p FinalizationBB containing the + /// finalization code. This is typically that same code as for case 1 avoiding + /// emitting the same finialization code multiple times. + /// + /// 3. For irregular region exits that rejoin a surrounding region, exitRegion + /// calls \p FinCB to insert the finalization code into the exiting control + /// path. The irregular exit is then added as an irregular exit of the + /// sourrounding loop that, opon its exit, can add its own finialization code + /// and/or rejoin the control flow there. void exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, - function_ref FinCB); - - + function_ref FinCB); public: /// Callback type for body (=inner region) code generation @@ -1000,7 +1019,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the single call. InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, bool IsNowait, + FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt); /// Generator for '#omp master' @@ -1012,7 +1031,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the master. InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB); + FinalizeCallbackTy FiniCB); /// Generator for '#omp masked' /// @@ -1023,7 +1042,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the masked. InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, Value *Filter); + FinalizeCallbackTy FiniCB, Value *Filter); /// Generator for '#omp critical' /// @@ -1036,7 +1055,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the critical. InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, + FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst); /// Generator for '#omp ordered depend (source | sink)' @@ -1065,7 +1084,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the ordered. InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, + FinalizeCallbackTy FiniCB, bool IsThreads); /// Generator for '#omp sections' @@ -1083,7 +1102,7 @@ class OpenMPIRBuilder { InsertPointTy AllocaIP, ArrayRef SectionCBs, PrivatizeCallbackTy PrivCB, - FinalizeCallbackTy FiniCB, bool IsCancellable, + FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait); /// Generator for '#omp section' @@ -1094,7 +1113,7 @@ class OpenMPIRBuilder { /// \returns The insertion position *after* the section. InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB); + FinalizeCallbackTy FiniCB); /// Generate conditional branch and relevant BasicBlocks through which private /// threads copy the 'copyin' variables from Master copy to threadprivate @@ -1298,7 +1317,7 @@ class OpenMPIRBuilder { InsertPointTy EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, bool Conditional = false, + FinalizeCallbackTy FiniCB, bool Conditional = false, bool HasFinalize = true, bool IsCancellable = false); /// Get the platform-specific name separator. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 67071cf6434bc..258cbcbdf4119 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -661,97 +661,89 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc, Loc.IP.getBlock()->getParent()); } - - Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { return Builder.CreateCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident, "omp_global_thread_num"); } - - -OpenMPIRBuilder::OMPRegionBreakInfo::OMPRegionBreakInfo(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo* Target) +OpenMPIRBuilder::OMPRegionBreakInfo::OMPRegionBreakInfo(BasicBlock *BB, + omp::Directive Reason, + OMPRegionInfo *Target) : BB(BB), Reason(Reason), Target(Target) { - assertOK(); + assertOK(); } - - -void OpenMPIRBuilder ::OMPRegionBreakInfo:: assertOK() const { +void OpenMPIRBuilder ::OMPRegionBreakInfo::assertOK() const { #ifndef NDEBUG - assert(!BB->getTerminator() && "Pending irregular exit must be amendable"); - - assert(Target); - switch (Target->DK) { - case OMPD_parallel: - switch (Reason) { - case OMPD_cancellation_point: - case OMPD_cancel: - case OMPD_barrier: - break; - default: - llvm_unreachable("Unexpected region break reason for parallel construct"); - } - break; - case OMPD_sections: - switch (Reason) { - case OMPD_cancellation_point: - case OMPD_cancel: - break; - default: - llvm_unreachable("Unexpected region break reason for sections construct"); - } - break; + assert(!BB->getTerminator() && "Pending irregular exit must be amendable"); + + assert(Target); + switch (Target->DK) { + case OMPD_parallel: + switch (Reason) { + case OMPD_cancellation_point: + case OMPD_cancel: + case OMPD_barrier: + break; + default: + llvm_unreachable("Unexpected region break reason for parallel construct"); + } + break; + case OMPD_sections: + switch (Reason) { + case OMPD_cancellation_point: + case OMPD_cancel: + break; default: - llvm_unreachable("unexpected region break target"); + llvm_unreachable("Unexpected region break reason for sections construct"); } + break; + default: + llvm_unreachable("unexpected region break target"); + } #endif } - - - -OpenMPIRBuilder::OMPRegionInfo::OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable ) - : Kind(Kind), DK(DK), IsCancellable(IsCancellable) { - assertOK(); +OpenMPIRBuilder::OMPRegionInfo::OMPRegionInfo(RegionKind Kind, + omp::Directive DK, + bool IsCancellable) + : Kind(Kind), DK(DK), IsCancellable(IsCancellable) { + assertOK(); } - void OpenMPIRBuilder::OMPRegionInfo::assertOK() const { #ifndef NDEBUG - switch (Kind) { - case RegionKind::Toplevel: - assert(DK == OMPD_unknown && "toplevel region is not a specific kind"); - assert(!IsCancellable && "top-level is not cancellable"); - break; - case RegionKind::CanonicalLoop: - // TODO - break; - case RegionKind::Directive: - switch (DK) { - case OMPD_parallel: - case OMPD_sections: - case OMPD_section: - case OMPD_single: - case OMPD_master: - case OMPD_masked: - case OMPD_critical: - case OMPD_ordered: - break; - default: - llvm_unreachable("Not a recognized OpenMP construct with SESE region"); - } - break; + switch (Kind) { + case RegionKind::Toplevel: + assert(DK == OMPD_unknown && "toplevel region is not a specific kind"); + assert(!IsCancellable && "top-level is not cancellable"); + break; + case RegionKind::CanonicalLoop: + // TODO + break; + case RegionKind::Directive: + switch (DK) { + case OMPD_parallel: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_masked: + case OMPD_critical: + case OMPD_ordered: + break; + default: + llvm_unreachable("Not a recognized OpenMP construct with SESE region"); } + break; + } - for (const OMPRegionBreakInfo &Break:Breaks) - Break.assertOK(); + for (const OMPRegionBreakInfo &Break : Breaks) + Break.assertOK(); #endif } - - OpenMPIRBuilder::OMPRegionInfo * OpenMPIRBuilder::getInnermostRegion(omp::Directive DK) { for (auto &R : reverse(RegionStack)) { @@ -761,26 +753,19 @@ OpenMPIRBuilder::getInnermostRegion(omp::Directive DK) { return RegionStack.front().get(); } - - bool OpenMPIRBuilder::isLastFinalizationInfoCancellable(omp::Directive DK) { - return getInnermostRegion(DK)->IsCancellable; + return getInnermostRegion(DK)->IsCancellable; } - - OpenMPIRBuilder::OMPRegionInfo * -OpenMPIRBuilder::enterRegion( OpenMPIRBuilder::RegionKind Kind, omp::Directive DK, bool IsCancellable) { - RegionStack.emplace_back(new OMPRegionInfo(Kind, DK, IsCancellable )); +OpenMPIRBuilder::enterRegion(OpenMPIRBuilder::RegionKind Kind, + omp::Directive DK, bool IsCancellable) { + RegionStack.emplace_back(new OMPRegionInfo(Kind, DK, IsCancellable)); return RegionStack.back().get(); } - - - - - -void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, FinalizeCallbackTy FinCB) { +void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, + FinalizeCallbackTy FinCB) { auto DK = R->DK; assert(RegionStack.back().get() == R && "balanced region push/pop required"); R->assertOK(); @@ -789,10 +774,10 @@ void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, F OMPRegionInfo *Innermost = RegionStack.back().get(); OMPRegionInfo *NewInnermost = RegionStack.rbegin()[1].get(); - for (OMPRegionBreakInfo &B : reverse(Innermost->Breaks)) { assert(!B.BB->getTerminator()); - assert(Innermost->IsCancellable && "surrounding region must be cancellable"); + assert(Innermost->IsCancellable && + "surrounding region must be cancellable"); Builder.SetInsertPoint(B.BB); if (B.Target == Innermost) { @@ -801,8 +786,10 @@ void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, F BranchInst *TI = Builder.CreateBr(FinalizationBB); B.BB = nullptr; } else if (FinCB) { - // Emit dedicated fininalization since we cannot use use the one for the regular exit. - // TODO: Implement switch-on-source-bb-index scheme like Clang's EmitBranchThroughCleanup does. + // Emit dedicated fininalization since we cannot use use the one for the + // regular exit. + // TODO: Implement switch-on-source-bb-index scheme like Clang's + // EmitBranchThroughCleanup does. B.BB = splitBB(Builder, true, ".fini"); FinCB(Builder.saveIP()); Builder.SetInsertPoint(B.BB); @@ -810,16 +797,14 @@ void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, F } for (OMPRegionBreakInfo &Break : Innermost->Breaks) { - if (Break.BB) - NewInnermost->addBreak(Break.BB, Break.Reason, Break.Target); + if (Break.BB) + NewInnermost->addBreak(Break.BB, Break.Reason, Break.Target); } Innermost->Breaks.clear(); RegionStack.pop_back(); } - - OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK, bool ForceSimpleCall, bool CheckCancelFlag) { @@ -828,7 +813,6 @@ OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK, return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag); } - OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, bool ForceSimpleCall, bool CheckCancelFlag) { @@ -885,7 +869,6 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, if (!updateToLocation(Loc)) return Loc.IP; - BasicBlock *New = nullptr; if (IfCondition) { auto Old = Builder.GetInsertBlock(); @@ -898,8 +881,6 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Builder.SetInsertPoint(ThenBlock->getTerminator()); } - - Value *CancelKind = nullptr; switch (CanceledDirective) { #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ @@ -918,28 +899,17 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Value *Result = Builder.CreateCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); - - // The actual cancel logic is shared with others, e.g., cancel_barriers. emitCancelationCheckImpl(Loc, Result, CanceledDirective, OMPD_cancel); - - if (New) return {New, New->begin()}; return Builder.saveIP(); } - - - - - - -void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, - Value *CancelFlag, - omp::Directive CancelledDirective, - omp::Directive CancelledBy) { +void OpenMPIRBuilder::emitCancelationCheckImpl( + LocationDescription Loc, Value *CancelFlag, + omp::Directive CancelledDirective, omp::Directive CancelledBy) { assert(isLastFinalizationInfoCancellable(CancelledDirective) && "Unexpected cancellation!"); @@ -947,12 +917,8 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, // MK: This is garbage BasicBlock *BB = Builder.GetInsertBlock(); - - BasicBlock *NonCancellationBlock = splitBBWithSuffix(Builder, false, ".cont"); - - BasicBlock *PreCancellationBlock = BasicBlock::Create(BB->getContext(), BB->getName() + ".cncl.fini", BB->getParent(), NonCancellationBlock); @@ -964,7 +930,6 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, Value *Cmp = Builder.CreateIsNull(CancelFlag); Builder.CreateCondBr(Cmp, NonCancellationBlock, PreCancellationBlock, /* TODO weight */ nullptr, nullptr); - // From the cancellation block we finalize all variables and go to the // post finalization block that is known to the FiniCB callback. @@ -982,17 +947,14 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(LocationDescription Loc, auto CancellationIP = Builder.saveIP(); - - // TODO: Clang's codegen emits finalization code only once and inserts a // switch to jump back to the target code path (CGF.EmitBranchThroughCleanup). // Currently in the OpenMPIRBuilder, we emit the finialization multiple times // for each path exiting the region (non-cancellation and each cancellation // check). - RegionStack.back()->addBreak(CancellationBlock, CancelledBy, getInnermostRegion(CancelledDirective) ); - - + RegionStack.back()->addBreak(CancellationBlock, CancelledBy, + getInnermostRegion(CancelledDirective)); // The continuation block is where code generation continues. Builder.SetInsertPoint(NonCancellationBlock, @@ -1111,7 +1073,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( // FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); OMPRegionInfo *ParallelRegion = enterRegion(OMPD_parallel, IsCancellable - //, FiniCBWrapper + //, FiniCBWrapper ); #endif @@ -1258,7 +1220,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( if (FiniCB) { InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); - FiniCB(PreFiniIP ); + FiniCB(PreFiniIP); } #if 0 for (auto& B : reverse(ParallelRegion->Breaks)) { @@ -1570,7 +1532,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( if (FiniCB) { Builder.SetInsertPoint(Finish); Finish = splitBB(Builder, true, "section_fini"); - FiniCB(Builder.saveAndClearIP() ); + FiniCB(Builder.saveAndClearIP()); } // emitRegionExit(Builder.saveIP(), SectionsRegion); @@ -1943,7 +1905,8 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, Builder.CreateBr(CL->getPreheader()); } - OMPRegionInfo * LoopRegion = enterRegion( RegionKind::CanonicalLoop, OMPD_unknown, /*IsCancellable*/true); + OMPRegionInfo *LoopRegion = enterRegion(RegionKind::CanonicalLoop, + OMPD_unknown, /*IsCancellable*/ true); // Emit the body content. We do it after connecting the loop to the CFG to // avoid that the callback encounters degenerate BBs. @@ -3254,8 +3217,6 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( // } // __kmpc_barrier - - EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, /*Conditional*/ true, /*hasFinalize*/ true); @@ -3384,7 +3345,6 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( if (HasFinalize) FinalizationStack.push_back({FiniCB, OMPD, IsCancellable}); #endif - // Create inlined region's entry and body blocks, in preparation // for conditional creation @@ -3400,15 +3360,10 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( Builder.SetInsertPoint(EntryBB->getTerminator()); emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); - - // generate body BodyGenCB(/* AllocaIP */ InsertPointTy(), /* CodeGenIP */ Builder.saveIP()); - - - // emit exit call and do any needed finalization. auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && @@ -3431,8 +3386,6 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( SplitPos->eraseFromParent(); Builder.SetInsertPoint(InsertBB); - - return Builder.saveIP(); } @@ -3445,7 +3398,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( BasicBlock *EntryBB = Builder.GetInsertBlock(); Value *CallBool = Builder.CreateIsNotNull(EntryCall); auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body"); - auto *UI = new UnreachableInst(Builder.getContext(), ThenBB); // stop doing that + auto *UI = + new UnreachableInst(Builder.getContext(), ThenBB); // stop doing that // Emit thenBB and set the Builder's insertion point there for // body generation next. Place the block after the current block. diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 309203578626e..8c3af67ffc861 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -355,7 +355,6 @@ TEST_F(OpenMPIRBuilderTest, CreateBarrier) { EXPECT_FALSE(verifyModule(*M, &errs())); } - TEST_F(OpenMPIRBuilderTest, DbgLoc) { OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize(); @@ -450,9 +449,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) { - ++NumFinalizationPoints; - }; + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); @@ -531,9 +528,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) { - ++NumFinalizationPoints; - }; + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumOuterBodiesGenerated; @@ -627,9 +622,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) { - ++NumFinalizationPoints; - }; + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumOuterBodiesGenerated; @@ -3515,9 +3508,7 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { }; // Do nothing in finalization. - auto FiniCB = [&](InsertPointTy CodeGenIP) { - return CodeGenIP; - }; + auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; }; InsertPointTy AfterIP = OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB, From 7be77dc23fac6c63bbe0ad5d38e633f2e04c704e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 1 May 2022 11:24:33 -0500 Subject: [PATCH 27/50] reformat --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 23 ++++++------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 1cca6fdb764ff..b77c1acfc7295 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -103,7 +103,7 @@ class OpenMPIRBuilder { /// A finalize callback knows about all objects that need finalization, e.g. /// destruction, when the scope of the currently generated construct is left /// at the time, and location, the callback is invoked. - using FinalizeCallbackTy = function_ref; + using FinalizeCallbackTy = function_ref; private: enum class RegionKind { @@ -149,14 +149,11 @@ class OpenMPIRBuilder { /// The kind of region: topmost sentinel, loop, or directive. RegionKind Kind; - /// If this region represents a directive-associated region, the kind of - /// directive. + /// The directive kind of the innermost directive that has an associated + /// region which might require finalization when it is left. omp::Directive DK; - /// Inside a parallel region, determines whether a barrier must check - /// whether cancellation has occured. - // TODO: Do not rely on the frontend to know whether a region contains a - // cancellation construct, but determine within OpenMPIRBuilder itself. + /// Flag to indicate if the directive is cancellable. bool IsCancellable; /// Irregular exits (such as cancellation points) out of this region. @@ -205,19 +202,13 @@ class OpenMPIRBuilder { /// Pop a region from the region stack. Exits are handled the following way: /// /// 1. For the regular region exit, \p FinCB is used by the caller to emit - /// finalization code somehwere on the control path exiting the region. - /// exitRegion itself does nothing. + /// finalization code somehwere on the control path exiting the region. exitRegion itself does nothing. /// /// 2. For irregular region exits that rejoing with the control flow after - /// this region, exitRegion emits a branch to \p FinalizationBB containing the - /// finalization code. This is typically that same code as for case 1 avoiding - /// emitting the same finialization code multiple times. + /// this region, exitRegion emits a branch to \p FinalizationBB containing the finalization code. This is typically that same code as for case 1 avoiding emitting the same finialization code multiple times. /// /// 3. For irregular region exits that rejoin a surrounding region, exitRegion - /// calls \p FinCB to insert the finalization code into the exiting control - /// path. The irregular exit is then added as an irregular exit of the - /// sourrounding loop that, opon its exit, can add its own finialization code - /// and/or rejoin the control flow there. + /// calls \p FinCB to insert the finalization code into the exiting control path. The irregular exit is then added as an irregular exit of the sourrounding loop that, opon its exit, can add its own finialization code and/or rejoin the control flow there. void exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, function_ref FinCB); From 38ee9a42f244e0b199bf1d10e555faf153940bc0 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 1 May 2022 13:11:31 -0500 Subject: [PATCH 28/50] WIP to fix polly --- .../include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 12 +++++++----- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 17 ++++++----------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index b77c1acfc7295..2ba38071f26cb 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -185,9 +185,6 @@ class OpenMPIRBuilder { /// the toplevel region if not present. OMPRegionInfo *getInnermostRegion(omp::Directive DK); - /// Return true if the last entry in the finalization stack is of kind \p DK - /// and cancellable. - bool isLastFinalizationInfoCancellable(omp::Directive DK); /// @{ /// Push a new region to the region stack. Must eventually be popped again @@ -868,7 +865,7 @@ class OpenMPIRBuilder { /// \param CanceledDirective The kind of directive that is cancled. /// \param ExitCB Extra code to be generated in the exit block. void emitCancelationCheckImpl(LocationDescription Loc, Value *CancelFlag, - omp::Directive CanceledDirective, + omp::Directive CancelledDirective, omp::Directive CancelledBy); /// Generate a barrier runtime call. @@ -889,6 +886,12 @@ class OpenMPIRBuilder { /// \param Loc The location at which the request originated and is fulfilled. void emitFlush(const LocationDescription &Loc); +private: + /// Return true if the last entry in the finalization stack is of kind \p DK + /// and cancellable. + bool isLastFinalizationInfoCancellable(omp::Directive DK); + +public: /// Generate a taskwait runtime call. /// /// \param Loc The location at which the request originated and is fulfilled. @@ -1551,7 +1554,6 @@ class OpenMPIRBuilder { /// \returns The CanonicalLoopInfo that represents the emitted loop. CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, - // bool Finalize, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name = {}); diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 258cbcbdf4119..7c54305ca2248 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -526,12 +526,7 @@ void OpenMPIRBuilder::finalize(Function *Fn) { OpenMPIRBuilder::OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) { RegionStack.emplace_back(new OMPRegionInfo( - RegionKind::Toplevel, omp::OMPD_unknown, /*IsCancellable*/ false - // , [](InsertPointTy ExitingIP, omp::Directive LeaveReason, - // OMPRegionInfo *Region) { - // llvm_unreachable("top-level is not finialized"); - // } - )); + RegionKind::Toplevel, omp::OMPD_unknown, /*IsCancellable*/false )); assert(RegionStack.size() == 1); } @@ -674,7 +669,7 @@ OpenMPIRBuilder::OMPRegionBreakInfo::OMPRegionBreakInfo(BasicBlock *BB, assertOK(); } -void OpenMPIRBuilder ::OMPRegionBreakInfo::assertOK() const { +void OpenMPIRBuilder::OMPRegionBreakInfo::assertOK() const { #ifndef NDEBUG assert(!BB->getTerminator() && "Pending irregular exit must be amendable"); @@ -746,7 +741,7 @@ void OpenMPIRBuilder::OMPRegionInfo::assertOK() const { OpenMPIRBuilder::OMPRegionInfo * OpenMPIRBuilder::getInnermostRegion(omp::Directive DK) { - for (auto &R : reverse(RegionStack)) { + for ( const std::unique_ptr &R : reverse(RegionStack)) { if (R->Kind == RegionKind::Directive && R->DK == DK) return R.get(); } @@ -766,7 +761,6 @@ OpenMPIRBuilder::enterRegion(OpenMPIRBuilder::RegionKind Kind, void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, FinalizeCallbackTy FinCB) { - auto DK = R->DK; assert(RegionStack.back().get() == R && "balanced region push/pop required"); R->assertOK(); @@ -783,7 +777,7 @@ void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, if (B.Target == Innermost) { // Join common finialization block Builder.SetInsertPoint(B.BB); - BranchInst *TI = Builder.CreateBr(FinalizationBB); + Builder.CreateBr(FinalizationBB); B.BB = nullptr; } else if (FinCB) { // Emit dedicated fininalization since we cannot use use the one for the @@ -871,7 +865,7 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, BasicBlock *New = nullptr; if (IfCondition) { - auto Old = Builder.GetInsertBlock(); + BasicBlock* Old = Builder.GetInsertBlock(); New = splitBB(Builder, false); BasicBlock *ThenBlock = BasicBlock::Create( Builder.getContext(), Old->getName() + ".if", New->getParent(), New); @@ -881,6 +875,7 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Builder.SetInsertPoint(ThenBlock->getTerminator()); } + Value *CancelKind = nullptr; switch (CanceledDirective) { #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ From 06ec5521547f0ab39337237aecb6e7349bebe7c9 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 01:56:56 -0500 Subject: [PATCH 29/50] Try fix cancel --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 2 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 8 ++++---- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp | 4 +--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 1bef327a64c2c..1424c74036389 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -314,7 +314,7 @@ class OpenMPIRBuilder { /// /// \returns The insertion point after the barrier. InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, - omp::Directive CanceledDirective); + omp::Directive CancelledDirective); /// Generator for '#omp parallel' /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index f415ceb108966..f0a81498124ad 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -859,7 +859,7 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Value *IfCondition, - omp::Directive CanceledDirective) { + omp::Directive CancelledDirective) { if (!updateToLocation(Loc)) return Loc.IP; @@ -877,7 +877,7 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Value *CancelKind = nullptr; - switch (CanceledDirective) { + switch (CancelledDirective) { #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ case DirectiveEnum: \ CancelKind = Builder.getInt32(Value); \ @@ -895,7 +895,7 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); // The actual cancel logic is shared with others, e.g., cancel_barriers. - emitCancelationCheckImpl(Loc, Result, CanceledDirective, OMPD_cancel); + emitCancelationCheckImpl(Loc, Result, CancelledDirective, OMPD_cancel); if (New) return {New, New->begin()}; @@ -943,7 +943,7 @@ void OpenMPIRBuilder::emitOffloadingEntry(Constant *Addr, StringRef Name, void OpenMPIRBuilder::emitCancelationCheckImpl( LocationDescription Loc, Value *CancelFlag, omp::Directive CancelledDirective, omp::Directive CancelledBy) { - assert(isLastFinalizationInfoCancellable(CanceledDirective) && + assert(isLastFinalizationInfoCancellable(CancelledDirective) && "Unexpected cancellation!"); // For a cancel barrier we create two new blocks. diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 8c3af67ffc861..e1fc59f6e73a8 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -3507,11 +3507,9 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { return Builder.saveIP(); }; - // Do nothing in finalization. - auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; }; InsertPointTy AfterIP = - OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB, + OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, /* FiniCB */ {}, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false); From 1bb0a35827b004b9bdc914fd554461acd723cf4f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 02:33:35 -0500 Subject: [PATCH 30/50] cleanup cancel --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 4 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 114 ++++++++---------- 2 files changed, 52 insertions(+), 66 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 1424c74036389..d8b551a7602e3 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -202,11 +202,11 @@ class OpenMPIRBuilder { /// finalization code somehwere on the control path exiting the region. exitRegion itself does nothing. /// /// 2. For irregular region exits that rejoing with the control flow after - /// this region, exitRegion emits a branch to \p FinalizationBB containing the finalization code. This is typically that same code as for case 1 avoiding emitting the same finialization code multiple times. + /// this region, exitRegion emits a branch to \p FinBB containing the finalization code. This is typically that same code as for case 1 avoiding emitting the same finialization code multiple times. /// /// 3. For irregular region exits that rejoin a surrounding region, exitRegion /// calls \p FinCB to insert the finalization code into the exiting control path. The irregular exit is then added as an irregular exit of the sourrounding loop that, opon its exit, can add its own finialization code and/or rejoin the control flow there. - void exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, + void exitRegion(OMPRegionInfo *R, BasicBlock *FinBB, function_ref FinCB); public: diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index f0a81498124ad..b7bfe24177a47 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -13,14 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" -#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/RegionPrinter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" @@ -31,7 +29,6 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Value.h" -#include "llvm/IR/Verifier.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" @@ -323,12 +320,6 @@ BasicBlock *llvm::splitBB(IRBuilder<> &Builder, bool CreateBranch, return New; } -static BasicBlock *splitBBWithSuffix(IRBuilderBase::InsertPoint IP, - bool CreateBranch, llvm::Twine Suffix) { - BasicBlock *Old = IP.getBlock(); - return splitBB(IP, CreateBranch, Old->getName() + Suffix); -} - BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix) { BasicBlock *Old = Builder.GetInsertBlock(); @@ -527,7 +518,6 @@ void OpenMPIRBuilder::finalize(Function *Fn) { OpenMPIRBuilder::OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) { RegionStack.emplace_back(new OMPRegionInfo( RegionKind::Toplevel, omp::OMPD_unknown, /*IsCancellable*/false )); - assert(RegionStack.size() == 1); } OpenMPIRBuilder::~OpenMPIRBuilder() { @@ -759,34 +749,34 @@ OpenMPIRBuilder::enterRegion(OpenMPIRBuilder::RegionKind Kind, return RegionStack.back().get(); } -void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinalizationBB, +void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinBB, FinalizeCallbackTy FinCB) { + assert(RegionStack.size() >= 2 && "Expect at least two regions on the stack: toplevel and the one exiting"); assert(RegionStack.back().get() == R && "balanced region push/pop required"); R->assertOK(); - // Trickly down no yet handled breaks. + // Trickle down no yet handled breaks. OMPRegionInfo *Innermost = RegionStack.back().get(); OMPRegionInfo *NewInnermost = RegionStack.rbegin()[1].get(); - for (OMPRegionBreakInfo &B : reverse(Innermost->Breaks)) { - assert(!B.BB->getTerminator()); + for (OMPRegionBreakInfo &Break : reverse(Innermost->Breaks)) { + assert(!Break.BB->getTerminator() && "Expect BB not yet connected back to the cfg"); assert(Innermost->IsCancellable && "surrounding region must be cancellable"); - Builder.SetInsertPoint(B.BB); + Builder.SetInsertPoint(Break.BB); - if (B.Target == Innermost) { + if (Break.Target == Innermost) { // Join common finialization block - Builder.SetInsertPoint(B.BB); - Builder.CreateBr(FinalizationBB); - B.BB = nullptr; + Builder.CreateBr(FinBB); + Break.BB = nullptr; } else if (FinCB) { // Emit dedicated fininalization since we cannot use use the one for the // regular exit. // TODO: Implement switch-on-source-bb-index scheme like Clang's - // EmitBranchThroughCleanup does. - B.BB = splitBB(Builder, true, ".fini"); + // EmitBranchThroughCleanup does. + Break.BB = splitBB(Builder, true, ".fini"); FinCB(Builder.saveIP()); - Builder.SetInsertPoint(B.BB); + Builder.SetInsertPoint(Break.BB); } } @@ -863,16 +853,22 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, if (!updateToLocation(Loc)) return Loc.IP; - BasicBlock *New = nullptr; + // Create condition for cancel if necessary. + BasicBlock *ContBB = nullptr; if (IfCondition) { - BasicBlock* Old = Builder.GetInsertBlock(); - New = splitBB(Builder, false); - BasicBlock *ThenBlock = BasicBlock::Create( - Builder.getContext(), Old->getName() + ".if", New->getParent(), New); - Builder.CreateCondBr(IfCondition, ThenBlock, New); - Builder.SetInsertPoint(ThenBlock); - Builder.CreateBr(New); - Builder.SetInsertPoint(ThenBlock->getTerminator()); + // EntryBB + // | | + // | ThenBB (".if") + // | | + // ContBB + BasicBlock* EntryBB = Builder.GetInsertBlock(); + ContBB = splitBB(Builder, /*CreateBranch*/false); + BasicBlock *ThenBB = BasicBlock::Create( + Builder.getContext(), EntryBB->getName() + ".if", ContBB->getParent(), ContBB); + Builder.CreateCondBr(IfCondition, ThenBB, ContBB); + Builder.SetInsertPoint(ThenBB); + Builder.CreateBr(ContBB); + Builder.SetInsertPoint(ThenBB->getTerminator()); } @@ -897,8 +893,9 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, // The actual cancel logic is shared with others, e.g., cancel_barriers. emitCancelationCheckImpl(Loc, Result, CancelledDirective, OMPD_cancel); - if (New) - return {New, New->begin()}; + + if (ContBB) + return {ContBB, ContBB->begin()}; return Builder.saveIP(); } @@ -947,38 +944,42 @@ void OpenMPIRBuilder::emitCancelationCheckImpl( "Unexpected cancellation!"); // For a cancel barrier we create two new blocks. - // MK: This is garbage BasicBlock *BB = Builder.GetInsertBlock(); + LLVMContext &Ctx = BB->getContext(); + + // BB + // br i1 CancelFlag + // | | + // | PreCancellationBlock (".cncl.fini") + // | | + // | CancellationBlock (".cncl") + // | | + // NonCancellationBlock (".cont") - BasicBlock *NonCancellationBlock = splitBBWithSuffix(Builder, false, ".cont"); + BasicBlock *NonCancellationBlock = splitBBWithSuffix(Builder, /*CreateBranch*/false, ".cont"); - BasicBlock *PreCancellationBlock = - BasicBlock::Create(BB->getContext(), BB->getName() + ".cncl.fini", - BB->getParent(), NonCancellationBlock); - BasicBlock *CancellationBlock = - BasicBlock::Create(BB->getContext(), BB->getName() + ".cncl", - BB->getParent(), NonCancellationBlock); + BasicBlock *PreCancellationBlock = BasicBlock::Create(Ctx, BB->getName() + ".cncl.fini", BB->getParent(), NonCancellationBlock); + BasicBlock *CancellationBlock =BasicBlock::Create(Ctx, BB->getName() + ".cncl", BB->getParent(), NonCancellationBlock); // Jump to them based on the return value. Value *Cmp = Builder.CreateIsNull(CancelFlag); Builder.CreateCondBr(Cmp, NonCancellationBlock, PreCancellationBlock, /* TODO weight */ nullptr, nullptr); + // From the cancellation block we finalize all variables and go to the // post finalization block that is known to the FiniCB callback. Builder.SetInsertPoint(PreCancellationBlock); - Builder.CreateBr(CancellationBlock); - // if (ExitCB) - // ExitCB(Builder.saveIP(),CanceledDirective); + // Unless cancellation has been detected by a barrier itself, need to // synchronize between threads (after finalization). Builder.SetInsertPoint(CancellationBlock); if (CancelledDirective == OMPD_parallel && CancelledBy != OMPD_barrier) - emitBarrierImpl(Loc, CancelledBy, false, false); + emitBarrierImpl(Loc, CancelledBy, /*ForceSimpleCall*/ false, /*CheckCancelFlag*/ false); - auto CancellationIP = Builder.saveIP(); + InsertPointTy CancellationIP = Builder.saveIP(); // TODO: Clang's codegen emits finalization code only once and inserts a // switch to jump back to the target code path (CGF.EmitBranchThroughCleanup). @@ -986,12 +987,10 @@ void OpenMPIRBuilder::emitCancelationCheckImpl( // for each path exiting the region (non-cancellation and each cancellation // check). - RegionStack.back()->addBreak(CancellationBlock, CancelledBy, - getInnermostRegion(CancelledDirective)); + RegionStack.back()->addBreak(CancellationBlock, CancelledBy, getInnermostRegion(CancelledDirective)); // The continuation block is where code generation continues. - Builder.SetInsertPoint(NonCancellationBlock, - NonCancellationBlock->begin()); // MK: needed? + Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); } IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( @@ -1085,20 +1084,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( /* CheckCancelFlag */ false); } -#if 0 - // Hide "open-ended" blocks from the given FiniCB by setting the right jump - // target to the region exit block. - if (IP.getBlock()->end() == IP.getPoint()) { - llvm_unreachable("don't do such thing!!!"); - IRBuilder<>::InsertPointGuard IPG(Builder); - Builder.restoreIP(IP); - Instruction *I = Builder.CreateBr(PRegExitBB); - IP = InsertPointTy(I->getParent(), I->getIterator()); - } - assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 && - IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB && - "Unexpected insertion point for finalization call!"); -#endif + if (FiniCB) FiniCB(IP); // Needed? From 7f7620df1c4e5fbecacd77a32f4280e889537ece Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 14:42:44 -0500 Subject: [PATCH 31/50] fix inline region --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 23 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 204 ++++++++++++------ .../Frontend/OpenMPIRBuilderTest.cpp | 2 +- 3 files changed, 159 insertions(+), 70 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index d8b551a7602e3..813a22b03bd26 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -105,6 +105,8 @@ class OpenMPIRBuilder { /// at the time, and location, the callback is invoked. using FinalizeCallbackTy = function_ref; + + private: enum class RegionKind { /// Sentinel object so we don't always have to check whether the stack is @@ -121,6 +123,8 @@ class OpenMPIRBuilder { struct OMPRegionInfo; + + /// An irregular exit out of a region, such as by cancellation. struct OMPRegionBreakInfo { /// The end of this basic block is current end of the path for breaking out @@ -143,6 +147,8 @@ class OpenMPIRBuilder { void assertOK() const; }; + + /// An OpenMP region with a single entry and single exit (unless containing a /// irregular exit) that may be associated with a construct. struct OMPRegionInfo { @@ -166,7 +172,7 @@ class OpenMPIRBuilder { OMPRegionInfo *Target) { assert(IsCancellable && "Only cancellable region may have irregular exits"); - assert(!BB->getTerminator()); + assert(!BB->getTerminator() && "Irregular exit must not rejoin the cfg"); Breaks.emplace_back(BB, Reason, Target); } @@ -186,6 +192,8 @@ class OpenMPIRBuilder { OMPRegionInfo *getInnermostRegion(omp::Directive DK); + + /// @{ /// Push a new region to the region stack. Must eventually be popped again /// using exitRegion. @@ -196,6 +204,11 @@ class OpenMPIRBuilder { } /// @} + + // using RegionBreakCallbackTy = function_ref; + //using RegionBreakCallbackTy = function_ref; + +#if 0 /// Pop a region from the region stack. Exits are handled the following way: /// /// 1. For the regular region exit, \p FinCB is used by the caller to emit @@ -206,8 +219,8 @@ class OpenMPIRBuilder { /// /// 3. For irregular region exits that rejoin a surrounding region, exitRegion /// calls \p FinCB to insert the finalization code into the exiting control path. The irregular exit is then added as an irregular exit of the sourrounding loop that, opon its exit, can add its own finialization code and/or rejoin the control flow there. - void exitRegion(OMPRegionInfo *R, BasicBlock *FinBB, - function_ref FinCB); +#endif + void exitRegion(OMPRegionInfo *R); public: /// Callback type for body (=inner region) code generation @@ -883,7 +896,7 @@ class OpenMPIRBuilder { /// Generate control flow and cleanup for cancellation. /// /// \param CancelFlag Flag indicating if the cancellation is performed. - /// \param CanceledDirective The kind of directive that is cancled. + /// \param CancelledDirective The kind of directive that is cancled. /// \param ExitCB Extra code to be generated in the exit block. void emitCancelationCheckImpl(LocationDescription Loc, Value *CancelFlag, omp::Directive CancelledDirective, @@ -1333,7 +1346,7 @@ class OpenMPIRBuilder { EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional = false, - bool HasFinalize = true, bool IsCancellable = false); + bool HasFinalize = true, bool IsCancellable = false); // TODO: remove HasFinalize /// Get the platform-specific name separator. /// \param Parts different parts of the final name that needs separation diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index b7bfe24177a47..4c7c90beb3488 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -749,22 +749,24 @@ OpenMPIRBuilder::enterRegion(OpenMPIRBuilder::RegionKind Kind, return RegionStack.back().get(); } -void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinBB, - FinalizeCallbackTy FinCB) { +void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R) { assert(RegionStack.size() >= 2 && "Expect at least two regions on the stack: toplevel and the one exiting"); assert(RegionStack.back().get() == R && "balanced region push/pop required"); - R->assertOK(); + // Trickle down no yet handled breaks. OMPRegionInfo *Innermost = RegionStack.back().get(); OMPRegionInfo *NewInnermost = RegionStack.rbegin()[1].get(); +#if 0 for (OMPRegionBreakInfo &Break : reverse(Innermost->Breaks)) { assert(!Break.BB->getTerminator() && "Expect BB not yet connected back to the cfg"); assert(Innermost->IsCancellable && "surrounding region must be cancellable"); Builder.SetInsertPoint(Break.BB); + + if (Break.Target == Innermost) { // Join common finialization block Builder.CreateBr(FinBB); @@ -779,14 +781,19 @@ void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R, BasicBlock *FinBB, Builder.SetInsertPoint(Break.BB); } } +#endif for (OMPRegionBreakInfo &Break : Innermost->Breaks) { - if (Break.BB) - NewInnermost->addBreak(Break.BB, Break.Reason, Break.Target); + if (Break.Target == R) { + assert(!Break.BB && "Irregular exit must have been handled by this region"); + } else { + NewInnermost->addBreak(Break.BB, Break.Reason, Break.Target); + } } Innermost->Breaks.clear(); RegionStack.pop_back(); + NewInnermost->assertOK(); } OpenMPIRBuilder::InsertPointTy @@ -958,26 +965,27 @@ void OpenMPIRBuilder::emitCancelationCheckImpl( BasicBlock *NonCancellationBlock = splitBBWithSuffix(Builder, /*CreateBranch*/false, ".cont"); - BasicBlock *PreCancellationBlock = BasicBlock::Create(Ctx, BB->getName() + ".cncl.fini", BB->getParent(), NonCancellationBlock); + // BasicBlock *PreCancellationBlock = BasicBlock::Create(Ctx, BB->getName() + ".cncl.fini", BB->getParent(), NonCancellationBlock); BasicBlock *CancellationBlock =BasicBlock::Create(Ctx, BB->getName() + ".cncl", BB->getParent(), NonCancellationBlock); // Jump to them based on the return value. Value *Cmp = Builder.CreateIsNull(CancelFlag); - Builder.CreateCondBr(Cmp, NonCancellationBlock, PreCancellationBlock, + Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock, /* TODO weight */ nullptr, nullptr); // From the cancellation block we finalize all variables and go to the // post finalization block that is known to the FiniCB callback. - Builder.SetInsertPoint(PreCancellationBlock); - Builder.CreateBr(CancellationBlock); - + //Builder.SetInsertPoint(PreCancellationBlock); + //Builder.CreateBr(CancellationBlock); +#if 0 // Unless cancellation has been detected by a barrier itself, need to // synchronize between threads (after finalization). Builder.SetInsertPoint(CancellationBlock); if (CancelledDirective == OMPD_parallel && CancelledBy != OMPD_barrier) emitBarrierImpl(Loc, CancelledBy, /*ForceSimpleCall*/ false, /*CheckCancelFlag*/ false); +#endif InsertPointTy CancellationIP = Builder.saveIP(); @@ -1072,29 +1080,15 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( BasicBlock *PRegExitBB = PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); -#if 1 - auto FiniCBWrapper = [&](InsertPointTy IP, omp::Directive LeaveReason, - OMPRegionInfo *Region) { - // FIXME: This is broken - // 1. Should be done after the FiniCB - // 2. It may deadlock - if (LeaveReason != OMPD_unknown) { - createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), - omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, - /* CheckCancelFlag */ false); - } - if (FiniCB) - FiniCB(IP); // Needed? - }; + + // FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); - OMPRegionInfo *ParallelRegion = enterRegion(OMPD_parallel, IsCancellable - //, FiniCBWrapper - ); -#endif + // OMPRegionInfo *ParallelRegion = enterRegion(OMPD_parallel, IsCancellable , FiniCBWrapper ); + OMPRegionInfo *ParallelRegion = enterRegion(OMPD_parallel, IsCancellable ); // Generate the privatization allocas in the block that will become the entry // of the outlined function. @@ -1235,6 +1229,39 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( "Unexpected finalization stack state!"); #endif +#if 0 + auto FiniCBWrapper = [&](InsertPointTy IP, omp::Directive BreakReason, OMPRegionInfo *FinRegion) { + if (BreakReason != OMPD_barrier) { + emitBarrierImpl(Loc, BreakReason, /*ForceSimpleCall*/ false, /*CheckCancelFlag*/ false); + } + +#if 0 + // FIXME: This is broken + // 1. Should be done after the FiniCB + // 2. It may deadlock + if (LeaveReason != OMPD_unknown) { + createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), + omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false); + } +#endif + + +#if 0 + // Unless cancellation has been detected by a barrier itself, need to + // synchronize between threads (after finalization). + Builder.SetInsertPoint(CancellationBlock); + if (CancelledDirective == OMPD_parallel && CancelledBy != OMPD_barrier) + emitBarrierImpl(Loc, CancelledBy, /*ForceSimpleCall*/ false, /*CheckCancelFlag*/ false); +#endif + + + + if (FiniCB) + FiniCB(IP); + }; +#endif + Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); if (FiniCB) { @@ -1261,7 +1288,34 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( }), ParallelRegion->Breaks.end() ); //emitRegionExit(PreFiniIP, ParallelRegion); #endif - exitRegion(ParallelRegion, PRegPreFiniBB, FiniCB); + + for (OMPRegionBreakInfo & Break : ParallelRegion->Breaks) { + Builder.SetInsertPoint(Break.BB); + + if (FiniCB) { + BasicBlock *AfterFini = splitBBWithSuffix(Builder, true, ".finisplit"); + FiniCB(Builder.saveAndClearIP() ); + Builder.SetInsertPoint(AfterFini); + } + + // Unless cancellation has been detected by a barrier itself, need to + // synchronize between threads (after finalization). + if (Break.Reason != OMPD_barrier) { + Builder.restoreIP( emitBarrierImpl(Loc, Break.Reason, /*ForceSimpleCall*/ false, /*CheckCancelFlag*/ false)); + } + + // If the break was targeting this parallel region, rejoin after it. + if (Break.Target == ParallelRegion) { + Builder.CreateBr(PRegExitBB); + Builder.ClearInsertionPoint(); + } + + Break.BB = Builder.GetInsertBlock() ; + assert(! Break.BB || !Break.BB->getTerminator()); + } + + // exitRegion(ParallelRegion, PRegPreFiniBB, FiniCB); + exitRegion(ParallelRegion); OI.OuterAllocaBB = OuterAllocaBlock; OI.EntryBB = PRegEntryBB; @@ -1545,17 +1599,30 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( } #endif // Instruction *I = Builder.CreateBr(ExitBB); - + llvm_unreachable("TODO"); Builder.restoreIP(AfterIP); auto Finish = splitBB(Builder, true, "section_finish"); if (FiniCB) { - Builder.SetInsertPoint(Finish); - Finish = splitBB(Builder, true, "section_fini"); - FiniCB(Builder.saveAndClearIP()); + Builder.SetInsertPoint(Finish); + Finish = splitBB(Builder, true, "section_fini"); + FiniCB(Builder.saveAndClearIP()); + } + + for (OMPRegionBreakInfo& Break : SectionsRegion->Breaks) { + if (Break.Target == SectionsRegion) { + Builder.CreateBr(Finish); + Break.BB = nullptr; + } else if (FiniCB) { + Builder.SetInsertPoint(Break.BB); + Finish = splitBBWithSuffix(Builder, true, ".finisplit"); + FiniCB(Builder.saveAndClearIP()); + + } } // emitRegionExit(Builder.saveIP(), SectionsRegion); - exitRegion(SectionsRegion, Finish, FiniCB); + //exitRegion(SectionsRegion, Finish, FiniCB); + exitRegion(SectionsRegion); return {Finish, Finish->begin()}; } @@ -1931,7 +1998,8 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, // avoid that the callback encounters degenerate BBs. BodyGenCB(CL->getBodyIP(), CL->getIndVar()); - exitRegion(LoopRegion, nullptr, {}); + //exitRegion(LoopRegion, nullptr, {}); + exitRegion(LoopRegion); #ifndef NDEBUG CL->assertOK(); @@ -3360,10 +3428,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, bool HasFinalize, bool IsCancellable) { -#if 0 - if (HasFinalize) - FinalizationStack.push_back({FiniCB, OMPD, IsCancellable}); -#endif + + OMPRegionInfo* Region = enterRegion(OMPD, IsCancellable); + // Create inlined region's entry and body blocks, in preparation // for conditional creation @@ -3375,30 +3442,57 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( BasicBlock *FiniBB = EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize"); - auto Region = enterRegion(OMPD, IsCancellable); Builder.SetInsertPoint(EntryBB->getTerminator()); emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); // generate body BodyGenCB(/* AllocaIP */ InsertPointTy(), - /* CodeGenIP */ Builder.saveIP()); + /* CodeGenIP */ Builder.saveAndClearIP()); + + assert(HasFinalize == !!FiniCB); + BasicBlock *FiniStartBB = FiniBB; + if (FiniCB) { + Builder.SetInsertPoint(FiniBB, FiniBB->begin()); + FiniBB = splitBBWithSuffix(Builder, /*CreateBranch*/ true, ".finisplit"); + FiniCB(Builder.saveAndClearIP()); + } // emit exit call and do any needed finalization. auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && - FiniBB->getTerminator()->getSuccessor(0) == ExitBB && + FiniBB->getTerminator()->getSuccessor(0) == ExitBB && "Unexpected control flow graph state!!"); emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); + + + for (OMPRegionBreakInfo& Break : Region->Breaks) { + if (Break.Target == Region) { + Builder.SetInsertPoint(Break.BB); + Builder.CreateBr(FiniStartBB); + Break.BB = nullptr; + } else if (FiniCB) { + Builder.SetInsertPoint(Break.BB); + Break.BB = splitBBWithSuffix(Builder, /* CreateBranch */ true, ".finisplit"); + FiniCB(Builder.saveAndClearIP()); + } + } + + exitRegion(Region); + + + // FIXME: Only added to not break tests. + if (FiniStartBB != FiniBB) + MergeBlockIntoPredecessor(FiniStartBB); + assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && "Unexpected Control Flow State!"); - exitRegion(Region, FiniBB, FiniCB); - MergeBlockIntoPredecessor(FiniBB); // stop doing that + MergeBlockIntoPredecessor(FiniBB); // If we are skipping the region of a non conditional, remove the exit // block, and clear the builder's insertion point. assert(SplitPos->getParent() == ExitBB && "Unexpected Insertion point location!"); - auto merged = MergeBlockIntoPredecessor(ExitBB); // stop doing that + auto merged = MergeBlockIntoPredecessor(ExitBB); BasicBlock *ExitPredBB = SplitPos->getParent(); auto InsertBB = merged ? ExitPredBB : ExitBB; if (!isa_and_nonnull(SplitPos)) @@ -3417,8 +3511,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( BasicBlock *EntryBB = Builder.GetInsertBlock(); Value *CallBool = Builder.CreateIsNotNull(EntryCall); auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body"); - auto *UI = - new UnreachableInst(Builder.getContext(), ThenBB); // stop doing that + auto *UI = new UnreachableInst(Builder.getContext(), ThenBB); // Emit thenBB and set the Builder's insertion point there for // body generation next. Place the block after the current block. @@ -3445,24 +3538,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( Builder.restoreIP(FinIP); - // If there is finalization to do, emit it before the exit call - if (HasFinalize) { -#if 0 - assert(!FinalizationStack.empty() && - "Unexpected finalization stack state!"); - - FinalizationInfo Fi = FinalizationStack.pop_back_val(); - assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); - Fi.FiniCB(FinIP); -#endif - - BasicBlock *FiniBB = FinIP.getBlock(); - Instruction *FiniBBTI = FiniBB->getTerminator(); - - // set Builder IP for call creation - Builder.SetInsertPoint(FiniBBTI); - } if (!ExitCall) return Builder.saveIP(); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index e1fc59f6e73a8..01fe42628ad89 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -2138,7 +2138,7 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) { MasterEndCI = nullptr; } } - EXPECT_NE(MasterEndCI, nullptr); + ASSERT_NE(MasterEndCI, nullptr); EXPECT_EQ(MasterEndCI->arg_size(), 2U); EXPECT_TRUE(isa(MasterEndCI->getArgOperand(0))); EXPECT_EQ(MasterEndCI->getArgOperand(1), MasterEntryCI->getArgOperand(1)); From 76d309de6cd790dadc879bfbb3561e8b843af9cb Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 15:05:27 -0500 Subject: [PATCH 32/50] try fixing sections --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 2 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 67 +++++-------------- .../Frontend/OpenMPIRBuilderTest.cpp | 4 +- 3 files changed, 21 insertions(+), 52 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 813a22b03bd26..5f255a1a0f24f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -323,7 +323,7 @@ class OpenMPIRBuilder { /// /// \param Loc The location where the directive was encountered. /// \param IfCondition The evaluated 'if' clause expression, if any. - /// \param CanceledDirective The kind of directive that is cancled. + /// \param CancelledDirective The kind of directive that is cancelled. /// /// \returns The insertion point after the barrier. InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 4c7c90beb3488..38aa3f104b9f1 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1511,31 +1511,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( if (!updateToLocation(Loc)) return Loc.IP; -#if 0 - auto FiniCBWrapper = [&](InsertPointTy IP) { - if (IP.getBlock()->end() != IP.getPoint()) - return FiniCB(IP); - // This must be done otherwise any nested constructs using FinalizeOMPRegion - // will fail because that function requires the Finalization Basic Block to - // have a terminator, which is already removed by EmitOMPRegionBody. - // IP is currently at cancelation block. - // We need to backtrack to the condition block to fetch - // the exit block and create a branch from cancelation - // to exit block. - IRBuilder<>::InsertPointGuard IPG(Builder); - Builder.restoreIP(IP); - auto *CaseBB = IP.getBlock()->getSinglePredecessor(); - auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); - auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); - Instruction *I = Builder.CreateBr(ExitBB); - IP = InsertPointTy(I->getParent(), I->getIterator()); - return FiniCB(IP); - }; - // TODO: Use CanonicalLoopInfo finalization. - FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable}); -#endif - auto SectionsRegion = enterRegion(OMPD_sections, IsCancellable); + OMPRegionInfo* SectionsRegion = enterRegion(OMPD_sections, IsCancellable); // Each section is emitted as a switch case // Each finalization callback is handled from clang.EmitOMPSectionDirective() @@ -1582,46 +1559,38 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( Value *ST = ConstantInt::get(I32Ty, 1); llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop( Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); - auto AfterIP = LoopInfo->getAfterIP(); - applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait); + InsertPointTy AfterIP = + applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait); + + -#if 0 - // Apply the finalization callback in LoopAfterBB - auto FiniInfo = FinalizationStack.pop_back_val(); - assert(FiniInfo.DK == OMPD_sections && - "Unexpected finalization stack state!"); - if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) { - Builder.restoreIP(AfterIP); - BasicBlock *FiniBB = - splitBBWithSuffix(Builder, /*CreateBranch=*/true, "sections.fini"); - CB(Builder.saveIP()); - AfterIP = {FiniBB, FiniBB->begin()}; - } -#endif - // Instruction *I = Builder.CreateBr(ExitBB); - llvm_unreachable("TODO"); Builder.restoreIP(AfterIP); - auto Finish = splitBB(Builder, true, "section_finish"); + BasicBlock* Finish = splitBB(Builder, true, "section_finish"); if (FiniCB) { Builder.SetInsertPoint(Finish); Finish = splitBB(Builder, true, "section_fini"); FiniCB(Builder.saveAndClearIP()); } + + for (OMPRegionBreakInfo& Break : SectionsRegion->Breaks) { if (Break.Target == SectionsRegion) { + Builder.SetInsertPoint(Break.BB); Builder.CreateBr(Finish); Break.BB = nullptr; - } else if (FiniCB) { - Builder.SetInsertPoint(Break.BB); - Finish = splitBBWithSuffix(Builder, true, ".finisplit"); - FiniCB(Builder.saveAndClearIP()); - + } else if (FiniCB) { + Builder.SetInsertPoint(Break.BB); + Break.BB = splitBBWithSuffix(Builder, /* CreateBranch */ true, ".finisplit"); + FiniCB(Builder.saveAndClearIP()); } } - // emitRegionExit(Builder.saveIP(), SectionsRegion); - //exitRegion(SectionsRegion, Finish, FiniCB); + + + + + exitRegion(SectionsRegion); return {Finish, Finish->begin()}; diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 01fe42628ad89..1f168e1fb8c28 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -906,8 +906,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 0U); - EXPECT_EQ(NumFinalizationPoints, 1U); - EXPECT_EQ(FakeDestructor->getNumUses(), 1U); + EXPECT_EQ(NumFinalizationPoints, 2U); + EXPECT_EQ(FakeDestructor->getNumUses(), 2U); Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); From d13d3ee887a6c263b2df20ba2d9a08ce24b87cc3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 15:46:40 -0500 Subject: [PATCH 33/50] mlir test fix --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 38aa3f104b9f1..eeb784836ccd0 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -974,20 +974,7 @@ void OpenMPIRBuilder::emitCancelationCheckImpl( /* TODO weight */ nullptr, nullptr); - // From the cancellation block we finalize all variables and go to the - // post finalization block that is known to the FiniCB callback. - //Builder.SetInsertPoint(PreCancellationBlock); - //Builder.CreateBr(CancellationBlock); -#if 0 - // Unless cancellation has been detected by a barrier itself, need to - // synchronize between threads (after finalization). - Builder.SetInsertPoint(CancellationBlock); - if (CancelledDirective == OMPD_parallel && CancelledBy != OMPD_barrier) - emitBarrierImpl(Loc, CancelledBy, /*ForceSimpleCall*/ false, /*CheckCancelFlag*/ false); -#endif - - InsertPointTy CancellationIP = Builder.saveIP(); // TODO: Clang's codegen emits finalization code only once and inserts a // switch to jump back to the target code path (CGF.EmitBranchThroughCleanup). @@ -3418,9 +3405,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( BodyGenCB(/* AllocaIP */ InsertPointTy(), /* CodeGenIP */ Builder.saveAndClearIP()); - assert(HasFinalize == !!FiniCB); + BasicBlock *FiniStartBB = FiniBB; - if (FiniCB) { + if (FiniCB ) { Builder.SetInsertPoint(FiniBB, FiniBB->begin()); FiniBB = splitBBWithSuffix(Builder, /*CreateBranch*/ true, ".finisplit"); FiniCB(Builder.saveAndClearIP()); From 45bccf82e89ba187356cbc97e6605b061d9e9152 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 16:06:29 -0500 Subject: [PATCH 34/50] codegen_master fix --- clang/lib/CodeGen/CodeGenFunction.h | 2 +- clang/test/OpenMP/master_codegen.cpp | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 498cd7c1050f7..7925e68202949 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1802,7 +1802,7 @@ class CodeGenFunction : public CodeGenTypeCache { CGBuilderTy::InsertPointGuard IPG(CGF.Builder); // MK: needed? CGF.Builder.restoreIP(IP); - auto DestBB = llvm::splitBB(CGF.Builder, false, ".ompfinalize"); + llvm::BasicBlock * DestBB = llvm::splitBB(CGF.Builder, false, ".ompfinalize"); // llvm::BasicBlock *IPBB = IP.getBlock(); // llvm::BasicBlock *DestBB = IPBB->getUniqueSuccessor(); diff --git a/clang/test/OpenMP/master_codegen.cpp b/clang/test/OpenMP/master_codegen.cpp index 90248447a7c19..79543121449b7 100644 --- a/clang/test/OpenMP/master_codegen.cpp +++ b/clang/test/OpenMP/master_codegen.cpp @@ -33,8 +33,10 @@ int main() { // ALL-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] // ALL: [[THEN]] // ALL-NEXT: store i8 2, i8* [[A_ADDR]] -// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] -// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_master([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // ALL-NEXT: br label {{%?}}[[EXIT]] // ALL: [[EXIT]] From b1367be776c4d085cd02fb40650904cd3abf822e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 16:26:40 -0500 Subject: [PATCH 35/50] codegen_masted fix WIP --- clang/test/OpenMP/cancel_codegen.cpp | 1304 ++++++++--------- clang/test/OpenMP/masked_codegen.cpp | 6 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 24 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 30 +- 4 files changed, 653 insertions(+), 711 deletions(-) diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp index 1d1bdfd534929..7cfcd563cf576 100644 --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -1336,13 +1336,13 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I39:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I36:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -1389,12 +1389,8 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] -// CHECK3: omp_section_loop.body.case.cncl.fini: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case.cncl: -// CHECK3-NEXT: br label [[SECTION_FINI:%.*]] -// CHECK3: omp_section_loop.body.case.cont: +// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case.split: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK3: omp_section_loop.body.case.section.after: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -1409,14 +1405,10 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK3: omp_section_loop.after: -// CHECK3-NEXT: br label [[SECTION_FINISH:%.*]] -// CHECK3: section_finish: -// CHECK3-NEXT: br label [[DOTOMPFINALIZE13:%.*]] -// CHECK3: .ompfinalize13: -// CHECK3-NEXT: br label [[SECTION_FINI]] -// CHECK3: section_fini: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] -// CHECK3: omp_section_loop.preheader14: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] +// CHECK3: omp_section_loop.aftersections.fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] +// CHECK3: omp_section_loop.preheader13: // CHECK3-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK3-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK3-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -1426,91 +1418,79 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK3-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] -// CHECK3: omp_section_loop.header15: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] -// CHECK3: omp_section_loop.cond16: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] -// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] -// CHECK3: omp_section_loop.body17: -// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] +// CHECK3: omp_section_loop.header14: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] +// CHECK3: omp_section_loop.cond15: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] +// CHECK3: omp_section_loop.body16: +// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] // CHECK3-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK3-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ -// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] -// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ +// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] +// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] // CHECK3-NEXT: ] -// CHECK3: omp_section_loop.body.case24: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) +// CHECK3: omp_section_loop.body.case23: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) // CHECK3-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] -// CHECK3: omp_section_loop.body.case24.cncl.fini: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case24.cncl: -// CHECK3-NEXT: br label [[SECTION_FINI35:%.*]] -// CHECK3: omp_section_loop.body.case24.cont: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case24.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body.case26: +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case23.split: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case23.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body.case25: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] -// CHECK3: omp_section_loop.body.case26.cncl.fini: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case26.cncl: -// CHECK3-NEXT: br label [[SECTION_FINI35]] -// CHECK3: omp_section_loop.body.case26.cont: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] -// CHECK3: omp_section_loop.body.case26.sectionfini: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case26.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body17.sections.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC18]] -// CHECK3: omp_section_loop.inc18: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] -// CHECK3: omp_section_loop.exit19: +// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case25.split: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] +// CHECK3: omp_section_loop.body.case25.section.after26: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case25.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body16.sections.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC17]] +// CHECK3: omp_section_loop.inc17: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] +// CHECK3: omp_section_loop.exit18: // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] -// CHECK3: omp_section_loop.after20: -// CHECK3-NEXT: br label [[SECTION_FINISH34:%.*]] -// CHECK3: section_finish34: -// CHECK3-NEXT: br label [[DOTOMPFINALIZE36:%.*]] -// CHECK3: .ompfinalize36: -// CHECK3-NEXT: br label [[SECTION_FINI35]] -// CHECK3: section_fini35: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] +// CHECK3: omp_section_loop.after19: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] +// CHECK3: omp_section_loop.after19sections.fini: // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK3-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 -// CHECK3-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: // CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -1524,23 +1504,29 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK3-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK3-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 +// CHECK3-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK3-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK3-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) // CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK3-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK3: omp_section_loop.body.case.cncl: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] +// CHECK3: omp_section_loop.body.case23.cncl: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] +// CHECK3: omp_section_loop.body.case25.cncl: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -1551,30 +1537,30 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK3-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: omp.precond.end: // CHECK3-NEXT: br label [[CANCEL_CONT]] // CHECK3: cancel.cont: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK3-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK3-NEXT: store i32 0, i32* [[R]], align 4 @@ -1598,54 +1584,50 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.par.region: // CHECK3-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] -// CHECK3: omp.par.region1: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 -// CHECK3-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK3-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 -// CHECK3-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] -// CHECK3: omp.par.region1.cncl.fini: -// CHECK3-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] -// CHECK3: omp.par.region1.cncl: -// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK3: omp.par.pre_finalize: -// CHECK3-NEXT: br label [[DOTOMPFINALIZE:%.*]] -// CHECK3: .ompfinalize: +// CHECK3-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] +// CHECK3: 3: +// CHECK3-NEXT: br label [[TMP4:%.*]] +// CHECK3: 4: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK3-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] +// CHECK3: .cncl5: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK3: omp.par.region1.cont: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] +// CHECK3: .cont: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK3-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK3-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK3: omp.par.region.parallel.after: -// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK3: omp.par.region.if: +// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK3: omp.par.pre_finalize: +// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK3: 14: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) +// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] +// CHECK3: .cncl: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) -// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] -// CHECK3: omp.par.region.if.cncl.fini: -// CHECK3-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] -// CHECK3: omp.par.region.if.cncl: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK3: omp.par.region.if.cont: -// CHECK3-NEXT: br label [[OMP_PAR_REGION1]] +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK3: .split: +// CHECK3-NEXT: br label [[TMP4]] // CHECK3: omp.par.outlined.exit.exitStub: // CHECK3-NEXT: ret void // @@ -1734,14 +1716,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK3: .cancel.exit: -// CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK3: .cancel.continue: +// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK3: .omp.sections.case.cncl: +// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1750,13 +1732,13 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK3-NEXT: br label [[CANCEL_CONT]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // @@ -1777,7 +1759,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -1798,23 +1780,25 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK3: .cancel.exit: -// CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK3: .cancel.continue: +// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK3: .omp.sections.case.cncl: +// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.case2: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] -// CHECK3: .cancel.exit4: -// CHECK3-NEXT: br label [[CANCEL_EXIT]] -// CHECK3: .cancel.continue5: +// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 +// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] +// CHECK3: .omp.sections.case2.split: +// CHECK3-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] +// CHECK3: .omp.sections.case2.section.after: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK3: .omp.sections.case2.cncl: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1823,14 +1807,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) -// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK3-NEXT: br label [[CANCEL_CONT]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // // @@ -1877,7 +1861,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -1905,7 +1889,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK3-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -1927,14 +1911,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK3-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -1944,10 +1928,10 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.reduction.case2: @@ -2004,13 +1988,13 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I39:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I36:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -2057,12 +2041,8 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] -// CHECK4: omp_section_loop.body.case.cncl.fini: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case.cncl: -// CHECK4-NEXT: br label [[SECTION_FINI:%.*]] -// CHECK4: omp_section_loop.body.case.cont: +// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case.split: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK4: omp_section_loop.body.case.section.after: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -2077,14 +2057,10 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK4: omp_section_loop.after: -// CHECK4-NEXT: br label [[SECTION_FINISH:%.*]] -// CHECK4: section_finish: -// CHECK4-NEXT: br label [[DOTOMPFINALIZE13:%.*]] -// CHECK4: .ompfinalize13: -// CHECK4-NEXT: br label [[SECTION_FINI]] -// CHECK4: section_fini: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] -// CHECK4: omp_section_loop.preheader14: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] +// CHECK4: omp_section_loop.aftersections.fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] +// CHECK4: omp_section_loop.preheader13: // CHECK4-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK4-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK4-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -2094,91 +2070,79 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK4-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] -// CHECK4: omp_section_loop.header15: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] -// CHECK4: omp_section_loop.cond16: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] -// CHECK4-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] -// CHECK4: omp_section_loop.body17: -// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] +// CHECK4: omp_section_loop.header14: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] +// CHECK4: omp_section_loop.cond15: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] +// CHECK4-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] +// CHECK4: omp_section_loop.body16: +// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] // CHECK4-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK4-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK4-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ -// CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] -// CHECK4-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK4-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ +// CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] +// CHECK4-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] // CHECK4-NEXT: ] -// CHECK4: omp_section_loop.body.case24: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) +// CHECK4: omp_section_loop.body.case23: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) // CHECK4-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK4-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] -// CHECK4: omp_section_loop.body.case24.cncl.fini: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case24.cncl: -// CHECK4-NEXT: br label [[SECTION_FINI35:%.*]] -// CHECK4: omp_section_loop.body.case24.cont: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] -// CHECK4: omp_section_loop.body.case24.section.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK4: omp_section_loop.body.case26: +// CHECK4-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case23.split: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case23.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body.case25: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK4-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK4-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] -// CHECK4: omp_section_loop.body.case26.cncl.fini: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case26.cncl: -// CHECK4-NEXT: br label [[SECTION_FINI35]] -// CHECK4: omp_section_loop.body.case26.cont: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] -// CHECK4: omp_section_loop.body.case26.sectionfini: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] -// CHECK4: omp_section_loop.body.case26.section.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK4: omp_section_loop.body17.sections.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC18]] -// CHECK4: omp_section_loop.inc18: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] -// CHECK4: omp_section_loop.exit19: +// CHECK4-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case25.split: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] +// CHECK4: omp_section_loop.body.case25.section.after26: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case25.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body16.sections.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC17]] +// CHECK4: omp_section_loop.inc17: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] +// CHECK4: omp_section_loop.exit18: // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] -// CHECK4: omp_section_loop.after20: -// CHECK4-NEXT: br label [[SECTION_FINISH34:%.*]] -// CHECK4: section_finish34: -// CHECK4-NEXT: br label [[DOTOMPFINALIZE36:%.*]] -// CHECK4: .ompfinalize36: -// CHECK4-NEXT: br label [[SECTION_FINI35]] -// CHECK4: section_fini35: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] +// CHECK4: omp_section_loop.after19: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] +// CHECK4: omp_section_loop.after19sections.fini: // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK4-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK4-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 -// CHECK4-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK4-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK4-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: // CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -2192,23 +2156,29 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.inner.for.cond: // CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK4-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK4-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK4-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 +// CHECK4-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 // CHECK4-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK4-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK4-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK4-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK4-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) // CHECK4-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK4-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK4: .cancel.exit: // CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK4: omp_section_loop.body.case.cncl: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] +// CHECK4: omp_section_loop.body.case23.cncl: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] +// CHECK4: omp_section_loop.body.case25.cncl: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -2219,30 +2189,30 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: // CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK4-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK4-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: omp.precond.end: // CHECK4-NEXT: br label [[CANCEL_CONT]] // CHECK4: cancel.cont: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK4-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK4-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK4-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK4-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK4-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK4-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK4-NEXT: store i32 0, i32* [[R]], align 4 @@ -2266,54 +2236,50 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.par.region: // CHECK4-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK4-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] -// CHECK4: omp.par.region1: -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK4-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 -// CHECK4-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK4-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 -// CHECK4-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] -// CHECK4: omp.par.region1.cncl.fini: -// CHECK4-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] -// CHECK4: omp.par.region1.cncl: -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[DOTOMPFINALIZE:%.*]] -// CHECK4: .ompfinalize: +// CHECK4-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] +// CHECK4: 3: +// CHECK4-NEXT: br label [[TMP4:%.*]] +// CHECK4: 4: +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 +// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK4-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK4-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK4-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] +// CHECK4: .cncl5: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK4: omp.par.region1.cont: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 -// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK4-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] +// CHECK4: .cont: +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 +// CHECK4-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK4-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK4-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK4-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK4-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK4: omp.par.region.parallel.after: -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK4: omp.par.region.if: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK4: omp.par.pre_finalize: +// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK4: 14: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) +// CHECK4-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +// CHECK4-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] +// CHECK4: .cncl: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) -// CHECK4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 -// CHECK4-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] -// CHECK4: omp.par.region.if.cncl.fini: -// CHECK4-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] -// CHECK4: omp.par.region.if.cncl: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK4: omp.par.region.if.cont: -// CHECK4-NEXT: br label [[OMP_PAR_REGION1]] +// CHECK4-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK4: .split: +// CHECK4-NEXT: br label [[TMP4]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // @@ -2402,14 +2368,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK4-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK4: .cancel.exit: -// CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK4: .cancel.continue: +// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK4: .omp.sections.case.split: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK4: .omp.sections.case.cncl: +// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: @@ -2418,13 +2384,13 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK4-NEXT: br label [[CANCEL_CONT]] // CHECK4: cancel.cont: // CHECK4-NEXT: ret void // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK4-NEXT: br label [[CANCEL_CONT]] // @@ -2445,7 +2411,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -2466,23 +2432,25 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK4-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK4: .cancel.exit: -// CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK4: .cancel.continue: +// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK4: .omp.sections.case.split: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK4: .omp.sections.case.cncl: +// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.sections.case2: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK4-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK4-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] -// CHECK4: .cancel.exit4: -// CHECK4-NEXT: br label [[CANCEL_EXIT]] -// CHECK4: .cancel.continue5: +// CHECK4-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 +// CHECK4-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] +// CHECK4: .omp.sections.case2.split: +// CHECK4-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] +// CHECK4: .omp.sections.case2.section.after: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK4: .omp.sections.case2.cncl: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: @@ -2491,14 +2459,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) -// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK4-NEXT: br label [[CANCEL_CONT]] // CHECK4: cancel.cont: // CHECK4-NEXT: ret void // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) // CHECK4-NEXT: br label [[CANCEL_CONT]] // // @@ -2545,7 +2513,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -2573,7 +2541,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK4-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK4-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -2595,14 +2563,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK4-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK4-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK4-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2612,10 +2580,10 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK4-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.reduction.case2: @@ -3912,13 +3880,13 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I39:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I36:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -3965,12 +3933,8 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] -// CHECK9: omp_section_loop.body.case.cncl.fini: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case.cncl: -// CHECK9-NEXT: br label [[SECTION_FINI:%.*]] -// CHECK9: omp_section_loop.body.case.cont: +// CHECK9-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case.split: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK9: omp_section_loop.body.case.section.after: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -3985,14 +3949,10 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK9: omp_section_loop.after: -// CHECK9-NEXT: br label [[SECTION_FINISH:%.*]] -// CHECK9: section_finish: -// CHECK9-NEXT: br label [[DOTOMPFINALIZE13:%.*]] -// CHECK9: .ompfinalize13: -// CHECK9-NEXT: br label [[SECTION_FINI]] -// CHECK9: section_fini: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] -// CHECK9: omp_section_loop.preheader14: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] +// CHECK9: omp_section_loop.aftersections.fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] +// CHECK9: omp_section_loop.preheader13: // CHECK9-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK9-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK9-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -4002,91 +3962,79 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK9-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] -// CHECK9: omp_section_loop.header15: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] -// CHECK9: omp_section_loop.cond16: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] -// CHECK9-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] -// CHECK9: omp_section_loop.body17: -// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] +// CHECK9: omp_section_loop.header14: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] +// CHECK9: omp_section_loop.cond15: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] +// CHECK9-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] +// CHECK9: omp_section_loop.body16: +// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] // CHECK9-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK9-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK9-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ -// CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] -// CHECK9-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK9-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ +// CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] +// CHECK9-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] // CHECK9-NEXT: ] -// CHECK9: omp_section_loop.body.case24: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) +// CHECK9: omp_section_loop.body.case23: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) // CHECK9-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] -// CHECK9: omp_section_loop.body.case24.cncl.fini: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case24.cncl: -// CHECK9-NEXT: br label [[SECTION_FINI35:%.*]] -// CHECK9: omp_section_loop.body.case24.cont: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] -// CHECK9: omp_section_loop.body.case24.section.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK9: omp_section_loop.body.case26: +// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case23.split: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case23.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body.case25: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK9-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] -// CHECK9: omp_section_loop.body.case26.cncl.fini: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case26.cncl: -// CHECK9-NEXT: br label [[SECTION_FINI35]] -// CHECK9: omp_section_loop.body.case26.cont: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] -// CHECK9: omp_section_loop.body.case26.sectionfini: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] -// CHECK9: omp_section_loop.body.case26.section.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK9: omp_section_loop.body17.sections.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC18]] -// CHECK9: omp_section_loop.inc18: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] -// CHECK9: omp_section_loop.exit19: +// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case25.split: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] +// CHECK9: omp_section_loop.body.case25.section.after26: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case25.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body16.sections.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC17]] +// CHECK9: omp_section_loop.inc17: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] +// CHECK9: omp_section_loop.exit18: // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] -// CHECK9: omp_section_loop.after20: -// CHECK9-NEXT: br label [[SECTION_FINISH34:%.*]] -// CHECK9: section_finish34: -// CHECK9-NEXT: br label [[DOTOMPFINALIZE36:%.*]] -// CHECK9: .ompfinalize36: -// CHECK9-NEXT: br label [[SECTION_FINI35]] -// CHECK9: section_fini35: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] +// CHECK9: omp_section_loop.after19: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] +// CHECK9: omp_section_loop.after19sections.fini: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK9-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 -// CHECK9-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK9-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -4100,23 +4048,29 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK9-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK9-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK9-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK9-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) // CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK9-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: // CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK9: omp_section_loop.body.case.cncl: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] +// CHECK9: omp_section_loop.body.case23.cncl: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] +// CHECK9: omp_section_loop.body.case25.cncl: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: @@ -4127,30 +4081,30 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK9-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK9-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: omp.precond.end: // CHECK9-NEXT: br label [[CANCEL_CONT]] // CHECK9: cancel.cont: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK9-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK9-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK9-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK9-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK9-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: store i32 0, i32* [[R]], align 4 @@ -4174,54 +4128,50 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.par.region: // CHECK9-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] -// CHECK9: omp.par.region1: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 -// CHECK9-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK9-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 -// CHECK9-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] -// CHECK9: omp.par.region1.cncl.fini: -// CHECK9-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] -// CHECK9: omp.par.region1.cncl: -// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK9: omp.par.pre_finalize: -// CHECK9-NEXT: br label [[DOTOMPFINALIZE:%.*]] -// CHECK9: .ompfinalize: +// CHECK9-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] +// CHECK9: 3: +// CHECK9-NEXT: br label [[TMP4:%.*]] +// CHECK9: 4: +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 +// CHECK9-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK9-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK9-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK9-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] +// CHECK9: .cncl5: // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK9: omp.par.region1.cont: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] +// CHECK9: .cont: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 +// CHECK9-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK9-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK9-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK9-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK9: omp.par.region.parallel.after: -// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK9: omp.par.region.if: +// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK9: omp.par.pre_finalize: +// CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK9: 14: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) +// CHECK9-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] +// CHECK9: .cncl: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) -// CHECK9-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 -// CHECK9-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] -// CHECK9: omp.par.region.if.cncl.fini: -// CHECK9-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] -// CHECK9: omp.par.region.if.cncl: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK9: omp.par.region.if.cont: -// CHECK9-NEXT: br label [[OMP_PAR_REGION1]] +// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK9: .split: +// CHECK9-NEXT: br label [[TMP4]] // CHECK9: omp.par.outlined.exit.exitStub: // CHECK9-NEXT: ret void // @@ -4310,14 +4260,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.sections.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK9-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK9: .cancel.exit: -// CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK9: .cancel.continue: +// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK9: .omp.sections.case.split: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK9: .omp.sections.case.cncl: +// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.sections.exit: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: @@ -4326,13 +4276,13 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK9-NEXT: br label [[CANCEL_CONT]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // @@ -4353,7 +4303,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -4374,23 +4324,25 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.sections.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK9-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK9: .cancel.exit: -// CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK9: .cancel.continue: +// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK9: .omp.sections.case.split: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK9: .omp.sections.case.cncl: +// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.sections.case2: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] -// CHECK9: .cancel.exit4: -// CHECK9-NEXT: br label [[CANCEL_EXIT]] -// CHECK9: .cancel.continue5: +// CHECK9-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] +// CHECK9: .omp.sections.case2.split: +// CHECK9-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] +// CHECK9: .omp.sections.case2.section.after: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK9: .omp.sections.case2.cncl: +// CHECK9-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK9: .omp.sections.exit: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: @@ -4399,14 +4351,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) -// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK9-NEXT: br label [[CANCEL_CONT]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // // @@ -4453,7 +4405,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -4481,7 +4433,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK9-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -4503,14 +4455,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK9-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK9-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK9-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -4520,10 +4472,10 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.reduction.case2: @@ -4580,13 +4532,13 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[I39:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I36:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK10-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -4633,12 +4585,8 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) // CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL_FINI:%.*]] -// CHECK10: omp_section_loop.body.case.cncl.fini: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case.cncl: -// CHECK10-NEXT: br label [[SECTION_FINI:%.*]] -// CHECK10: omp_section_loop.body.case.cont: +// CHECK10-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case.split: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK10: omp_section_loop.body.case.section.after: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -4653,14 +4601,10 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK10: omp_section_loop.after: -// CHECK10-NEXT: br label [[SECTION_FINISH:%.*]] -// CHECK10: section_finish: -// CHECK10-NEXT: br label [[DOTOMPFINALIZE13:%.*]] -// CHECK10: .ompfinalize13: -// CHECK10-NEXT: br label [[SECTION_FINI]] -// CHECK10: section_fini: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER14:%.*]] -// CHECK10: omp_section_loop.preheader14: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] +// CHECK10: omp_section_loop.aftersections.fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] +// CHECK10: omp_section_loop.preheader13: // CHECK10-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 // CHECK10-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 // CHECK10-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 @@ -4670,91 +4614,79 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK10-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER15:%.*]] -// CHECK10: omp_section_loop.header15: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_IV21:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER14]] ], [ [[OMP_SECTION_LOOP_NEXT23:%.*]], [[OMP_SECTION_LOOP_INC18:%.*]] ] -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_COND16:%.*]] -// CHECK10: omp_section_loop.cond16: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP22:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV21]], [[TMP12]] -// CHECK10-NEXT: br i1 [[OMP_SECTION_LOOP_CMP22]], label [[OMP_SECTION_LOOP_BODY17:%.*]], label [[OMP_SECTION_LOOP_EXIT19:%.*]] -// CHECK10: omp_section_loop.body17: -// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV21]], [[TMP9]] +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] +// CHECK10: omp_section_loop.header14: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] +// CHECK10: omp_section_loop.cond15: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] +// CHECK10-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] +// CHECK10: omp_section_loop.body16: +// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] // CHECK10-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK10-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK10-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER:%.*]] [ -// CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE24:%.*]] -// CHECK10-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK10-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ +// CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] +// CHECK10-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] // CHECK10-NEXT: ] -// CHECK10: omp_section_loop.body.case24: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM25:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM25]], i32 3) +// CHECK10: omp_section_loop.body.case23: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) // CHECK10-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK10-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE24_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL_FINI:%.*]] -// CHECK10: omp_section_loop.body.case24.cncl.fini: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case24.cncl: -// CHECK10-NEXT: br label [[SECTION_FINI35:%.*]] -// CHECK10: omp_section_loop.body.case24.cont: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE24_SECTION_AFTER:%.*]] -// CHECK10: omp_section_loop.body.case24.section.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK10: omp_section_loop.body.case26: +// CHECK10-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case23.split: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case23.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body.case25: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) // CHECK10-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK10-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL_FINI:%.*]] -// CHECK10: omp_section_loop.body.case26.cncl.fini: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case26.cncl: -// CHECK10-NEXT: br label [[SECTION_FINI35]] -// CHECK10: omp_section_loop.body.case26.cont: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTIONFINI:%.*]] -// CHECK10: omp_section_loop.body.case26.sectionfini: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] -// CHECK10: omp_section_loop.body.case26.section.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY17_SECTIONS_AFTER]] -// CHECK10: omp_section_loop.body17.sections.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC18]] -// CHECK10: omp_section_loop.inc18: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT23]] = add nuw i32 [[OMP_SECTION_LOOP_IV21]], 1 -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER15]] -// CHECK10: omp_section_loop.exit19: +// CHECK10-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case25.split: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] +// CHECK10: omp_section_loop.body.case25.section.after26: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case25.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body16.sections.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC17]] +// CHECK10: omp_section_loop.inc17: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] +// CHECK10: omp_section_loop.exit18: // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER20:%.*]] -// CHECK10: omp_section_loop.after20: -// CHECK10-NEXT: br label [[SECTION_FINISH34:%.*]] -// CHECK10: section_finish34: -// CHECK10-NEXT: br label [[DOTOMPFINALIZE36:%.*]] -// CHECK10: .ompfinalize36: -// CHECK10-NEXT: br label [[SECTION_FINI35]] -// CHECK10: section_fini35: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] +// CHECK10: omp_section_loop.after19: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] +// CHECK10: omp_section_loop.after19sections.fini: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK10-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK10-NEXT: [[SUB38:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK10-NEXT: store i32 [[SUB38]], i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK10-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK10-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK10-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM40]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 -// CHECK10-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK10-NEXT: br i1 [[CMP41]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -4768,23 +4700,29 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP42:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK10-NEXT: br i1 [[CMP42]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK10-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD43]], i32* [[I39]], align 4 +// CHECK10-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] +// CHECK10-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 // CHECK10-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK10-NEXT: [[TOBOOL44:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK10-NEXT: br i1 [[TOBOOL44]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK10-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK10-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM45]], i32 2) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) // CHECK10-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK10-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK10: .cancel.exit: // CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK10: omp_section_loop.body.case.cncl: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] +// CHECK10: omp_section_loop.body.case23.cncl: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] +// CHECK10: omp_section_loop.body.case25.cncl: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: @@ -4795,30 +4733,30 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD46:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK10-NEXT: store i32 [[ADD46]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK10-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM48]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM47]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: omp.precond.end: // CHECK10-NEXT: br label [[CANCEL_CONT]] // CHECK10: cancel.cont: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK10-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM50]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK10-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK10-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK10-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK10-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM51]], i8* [[TMP36]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK10-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: store i32 0, i32* [[R]], align 4 @@ -4842,54 +4780,50 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.par.region: // CHECK10-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] -// CHECK10: omp.par.region1: -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 -// CHECK10-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK10-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 -// CHECK10-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX4]], align 1 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK10-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 -// CHECK10-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL_FINI:%.*]] -// CHECK10: omp.par.region1.cncl.fini: -// CHECK10-NEXT: br label [[OMP_PAR_REGION1_CNCL:%.*]] -// CHECK10: omp.par.region1.cncl: -// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] -// CHECK10: omp.par.pre_finalize: -// CHECK10-NEXT: br label [[DOTOMPFINALIZE:%.*]] -// CHECK10: .ompfinalize: +// CHECK10-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] +// CHECK10: 3: +// CHECK10-NEXT: br label [[TMP4:%.*]] +// CHECK10: 4: +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 +// CHECK10-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK10-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK10-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK10-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK10-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] +// CHECK10: .cncl5: // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK10: omp.par.region1.cont: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 -// CHECK10-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK10-NEXT: [[CONV8:%.*]] = sext i8 [[TMP11]] to i32 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP8]] +// CHECK10: .cont: +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 +// CHECK10-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK10-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK10-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK10-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK10-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK10-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK10: omp.par.region.parallel.after: -// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK10: omp.par.region.if: +// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +// CHECK10: omp.par.pre_finalize: +// CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK10: 14: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) +// CHECK10-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +// CHECK10-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] +// CHECK10: .cncl: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) -// CHECK10-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 -// CHECK10-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL_FINI:%.*]] -// CHECK10: omp.par.region.if.cncl.fini: -// CHECK10-NEXT: br label [[OMP_PAR_REGION_IF_CNCL:%.*]] -// CHECK10: omp.par.region.if.cncl: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE]] -// CHECK10: omp.par.region.if.cont: -// CHECK10-NEXT: br label [[OMP_PAR_REGION1]] +// CHECK10-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] +// CHECK10: .split: +// CHECK10-NEXT: br label [[TMP4]] // CHECK10: omp.par.outlined.exit.exitStub: // CHECK10-NEXT: ret void // @@ -4978,14 +4912,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK10-NEXT: ] // CHECK10: .omp.sections.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK10-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK10: .cancel.exit: -// CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK10: .cancel.continue: +// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK10: .omp.sections.case.split: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK10: .omp.sections.case.cncl: +// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.sections.exit: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: @@ -4994,13 +4928,13 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK10-NEXT: br label [[CANCEL_CONT]] // CHECK10: cancel.cont: // CHECK10-NEXT: ret void // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK10-NEXT: br label [[CANCEL_CONT]] // @@ -5021,7 +4955,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -5042,23 +4976,25 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK10-NEXT: ] // CHECK10: .omp.sections.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK10-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] -// CHECK10: .cancel.exit: -// CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK10: .cancel.continue: +// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK10: .omp.sections.case.split: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK10: .omp.sections.case.cncl: +// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.sections.case2: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK10-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK10-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] -// CHECK10: .cancel.exit4: -// CHECK10-NEXT: br label [[CANCEL_EXIT]] -// CHECK10: .cancel.continue5: +// CHECK10-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 +// CHECK10-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] +// CHECK10: .omp.sections.case2.split: +// CHECK10-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] +// CHECK10: .omp.sections.case2.section.after: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK10: .omp.sections.case2.cncl: +// CHECK10-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK10: .omp.sections.exit: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: @@ -5067,14 +5003,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) -// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) +// CHECK10-NEXT: br label [[CANCEL_CONT]] // CHECK10: cancel.cont: // CHECK10-NEXT: ret void // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) // CHECK10-NEXT: br label [[CANCEL_CONT]] // // @@ -5121,7 +5057,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK10-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -5149,7 +5085,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK10-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK10-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK10-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -5171,14 +5107,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK10-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK10-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK10-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK10-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK10-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK10-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK10-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK10-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK10-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -5188,10 +5124,10 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK10-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK10-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK10-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK10-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK10-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.reduction.case2: diff --git a/clang/test/OpenMP/masked_codegen.cpp b/clang/test/OpenMP/masked_codegen.cpp index 4c3fd546f8f00..26d34b5695801 100644 --- a/clang/test/OpenMP/masked_codegen.cpp +++ b/clang/test/OpenMP/masked_codegen.cpp @@ -33,8 +33,10 @@ int main() { // ALL-NEXT: br i1 [[IS_MASKED]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] // ALL: [[THEN]] // ALL-NEXT: store i8 2, i8* [[A_ADDR]] -// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] -// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // ALL-NEXT: br label {{%?}}[[EXIT]] // ALL: [[EXIT]] diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 5f255a1a0f24f..e5333c5e8d1fa 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -111,7 +111,7 @@ class OpenMPIRBuilder { enum class RegionKind { /// Sentinel object so we don't always have to check whether the stack is /// empty. - Toplevel, + Function, /// Actions on loop-associated directives are deferred until all applyXYZ /// actions have been applied to them. @@ -169,12 +169,7 @@ class OpenMPIRBuilder { /// Register an irregular exit to this region. void addBreak(BasicBlock *BB, omp::Directive Reason, - OMPRegionInfo *Target) { - assert(IsCancellable && - "Only cancellable region may have irregular exits"); - assert(!BB->getTerminator() && "Irregular exit must not rejoin the cfg"); - Breaks.emplace_back(BB, Reason, Target); - } + OMPRegionInfo *Target) ; /// Consistency self-check. void assertOK() const; @@ -205,21 +200,8 @@ class OpenMPIRBuilder { /// @} - // using RegionBreakCallbackTy = function_ref; - //using RegionBreakCallbackTy = function_ref; -#if 0 - /// Pop a region from the region stack. Exits are handled the following way: - /// - /// 1. For the regular region exit, \p FinCB is used by the caller to emit - /// finalization code somehwere on the control path exiting the region. exitRegion itself does nothing. - /// - /// 2. For irregular region exits that rejoing with the control flow after - /// this region, exitRegion emits a branch to \p FinBB containing the finalization code. This is typically that same code as for case 1 avoiding emitting the same finialization code multiple times. - /// - /// 3. For irregular region exits that rejoin a surrounding region, exitRegion - /// calls \p FinCB to insert the finalization code into the exiting control path. The irregular exit is then added as an irregular exit of the sourrounding loop that, opon its exit, can add its own finialization code and/or rejoin the control flow there. -#endif + /// Pop a region from the region stack. Net yet rejoined irregular exits fall through the outer surrounding region. void exitRegion(OMPRegionInfo *R); public: diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index eeb784836ccd0..f6e3dc97c26ef 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -517,12 +517,12 @@ void OpenMPIRBuilder::finalize(Function *Fn) { OpenMPIRBuilder::OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) { RegionStack.emplace_back(new OMPRegionInfo( - RegionKind::Toplevel, omp::OMPD_unknown, /*IsCancellable*/false )); + RegionKind::Function, omp::OMPD_unknown, /*IsCancellable*/false )); } OpenMPIRBuilder::~OpenMPIRBuilder() { assert(RegionStack.size() == 1 && - RegionStack.back()->Kind == RegionKind::Toplevel && + RegionStack.back()->Kind == RegionKind::Function && "OMPRegion push/pop must be balanced"); assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); } @@ -663,7 +663,7 @@ void OpenMPIRBuilder::OMPRegionBreakInfo::assertOK() const { #ifndef NDEBUG assert(!BB->getTerminator() && "Pending irregular exit must be amendable"); - assert(Target); + assert(Target && "Irregular exit requires a target"); switch (Target->DK) { case OMPD_parallel: switch (Reason) { @@ -697,12 +697,22 @@ OpenMPIRBuilder::OMPRegionInfo::OMPRegionInfo(RegionKind Kind, assertOK(); } +void OpenMPIRBuilder::OMPRegionInfo::addBreak(BasicBlock *BB, omp::Directive Reason, + OMPRegionInfo *Target) { + assert(IsCancellable && + "Only cancellable region may have irregular exits"); + assert(!BB->getTerminator() && "Irregular exit must not rejoin the cfg"); + Breaks.emplace_back(BB, Reason, Target); + assertOK(); +} + void OpenMPIRBuilder::OMPRegionInfo::assertOK() const { #ifndef NDEBUG switch (Kind) { - case RegionKind::Toplevel: + case RegionKind::Function: assert(DK == OMPD_unknown && "toplevel region is not a specific kind"); assert(!IsCancellable && "top-level is not cancellable"); + assert(Breaks.empty() && "Topmost region cannot have irregular exits"); break; case RegionKind::CanonicalLoop: // TODO @@ -3406,6 +3416,18 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( /* CodeGenIP */ Builder.saveAndClearIP()); + + // Exits are handled the following way: + // + // 1. For the regular region exit, \p FinCB is used by the caller to emit + // finalization code somehwere on the control path exiting the region. exitRegion itself does nothing. + // + // 2. For irregular region exits that rejoing with the control flow after + // this region, exitRegion emits a branch to \p FinBB containing the finalization code. This is typically that same code as for case 1 avoiding emitting the same finialization code multiple times. + // + // 3. For irregular region exits that rejoin a surrounding region, exitRegion + // calls \p FinCB to insert the finalization code into the exiting control path. The irregular exit is then added as an irregular exit of the sourrounding loop that, opon its exit, can add its own finialization code and/or rejoin the control flow there. + BasicBlock *FiniStartBB = FiniBB; if (FiniCB ) { Builder.SetInsertPoint(FiniBB, FiniBB->begin()); From 053a5430cef593d397e9e40f5d479a4b4947ec94 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 17:10:51 -0500 Subject: [PATCH 36/50] codegen_ordered fix WIP --- clang/test/OpenMP/ordered_codegen.cpp | 200 +++++++++++------- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 2 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 18 +- 3 files changed, 134 insertions(+), 86 deletions(-) diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp index 931629f40bb5b..af2d320dbcdd2 100644 --- a/clang/test/OpenMP/ordered_codegen.cpp +++ b/clang/test/OpenMP/ordered_codegen.cpp @@ -1386,6 +1386,8 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK1-IRBUILDER: .ompfinalize: // CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: @@ -1474,6 +1476,8 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: store float [[MUL7]], float* [[ARRAYIDX8]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK1-IRBUILDER: .ompfinalize: // CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: @@ -1610,6 +1614,8 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: store float [[MUL29]], float* [[ARRAYIDX31]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK1-IRBUILDER: .ompfinalize: // CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: @@ -1717,6 +1723,8 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: store float [[MUL14]], float* [[ARRAYIDX16]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK1-IRBUILDER: .ompfinalize: // CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: @@ -1803,6 +1811,8 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK1-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK1-IRBUILDER: .ompfinalize: // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1866,7 +1876,7 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK1-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 // CHECK1-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] -// CHECK1-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] +// CHECK1-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END43:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body33: // CHECK1-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK1-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 @@ -1880,17 +1890,19 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body33.ordered.after: -// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] -// CHECK1-IRBUILDER: omp.body.continue38: -// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] -// CHECK1-IRBUILDER: omp.inner.for.inc39: +// CHECK1-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK1-IRBUILDER: .ompfinalize38: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE39:%.*]] +// CHECK1-IRBUILDER: omp.body.continue39: +// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC40:%.*]] +// CHECK1-IRBUILDER: omp.inner.for.inc40: // CHECK1-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK1-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 -// CHECK1-IRBUILDER-NEXT: store i32 [[ADD40]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) -// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[ADD41:%.*]] = add i32 [[TMP32]], 1 +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD41]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]]), !llvm.access.group !7 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK1-IRBUILDER: omp.inner.for.end42: +// CHECK1-IRBUILDER: omp.inner.for.end43: // CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1-IRBUILDER: omp.dispatch.inc: // CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] @@ -1902,19 +1914,19 @@ void foo_simd(int low, int up) { // CHECK1-IRBUILDER-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 // CHECK1-IRBUILDER-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 // CHECK1-IRBUILDER-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 -// CHECK1-IRBUILDER-NEXT: [[SUB43:%.*]] = sub i32 [[TMP36]], [[TMP37]] -// CHECK1-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[SUB43]], 1 -// CHECK1-IRBUILDER-NEXT: [[ADD45:%.*]] = add i32 [[SUB44]], 1 -// CHECK1-IRBUILDER-NEXT: [[DIV46:%.*]] = udiv i32 [[ADD45]], 1 -// CHECK1-IRBUILDER-NEXT: [[MUL47:%.*]] = mul i32 [[DIV46]], 1 -// CHECK1-IRBUILDER-NEXT: [[ADD48:%.*]] = add i32 [[TMP35]], [[MUL47]] -// CHECK1-IRBUILDER-NEXT: store i32 [[ADD48]], i32* [[I28]], align 4 +// CHECK1-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[TMP36]], [[TMP37]] +// CHECK1-IRBUILDER-NEXT: [[SUB45:%.*]] = sub i32 [[SUB44]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD46:%.*]] = add i32 [[SUB45]], 1 +// CHECK1-IRBUILDER-NEXT: [[DIV47:%.*]] = udiv i32 [[ADD46]], 1 +// CHECK1-IRBUILDER-NEXT: [[MUL48:%.*]] = mul i32 [[DIV47]], 1 +// CHECK1-IRBUILDER-NEXT: [[ADD49:%.*]] = add i32 [[TMP35]], [[MUL48]] +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD49]], i32* [[I28]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1-IRBUILDER: .omp.final.done: // CHECK1-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] // CHECK1-IRBUILDER: omp.precond.end: -// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK1-IRBUILDER-NEXT: ret void // // @@ -2014,6 +2026,8 @@ void foo_simd(int low, int up) { // CHECK2-IRBUILDER-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK2-IRBUILDER: .ompfinalize: // CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2-IRBUILDER: omp.body.continue: @@ -2102,6 +2116,8 @@ void foo_simd(int low, int up) { // CHECK2-IRBUILDER-NEXT: store float [[MUL7]], float* [[ARRAYIDX8]], align 4 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK2-IRBUILDER: .ompfinalize: // CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2-IRBUILDER: omp.body.continue: @@ -2238,6 +2254,8 @@ void foo_simd(int low, int up) { // CHECK2-IRBUILDER-NEXT: store float [[MUL29]], float* [[ARRAYIDX31]], align 4 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK2-IRBUILDER: .ompfinalize: // CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) // CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2-IRBUILDER: omp.body.continue: @@ -2345,6 +2363,8 @@ void foo_simd(int low, int up) { // CHECK2-IRBUILDER-NEXT: store float [[MUL14]], float* [[ARRAYIDX16]], align 4 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK2-IRBUILDER: .ompfinalize: // CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) // CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2-IRBUILDER: omp.body.continue: @@ -2431,6 +2451,8 @@ void foo_simd(int low, int up) { // CHECK2-IRBUILDER-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK2-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK2-IRBUILDER: .ompfinalize: // CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2-IRBUILDER: omp.body.continue: // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2494,7 +2516,7 @@ void foo_simd(int low, int up) { // CHECK2-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK2-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 // CHECK2-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] -// CHECK2-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] +// CHECK2-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END43:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body33: // CHECK2-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK2-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 @@ -2508,17 +2530,19 @@ void foo_simd(int low, int up) { // CHECK2-IRBUILDER-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] // CHECK2-IRBUILDER: omp.inner.for.body33.ordered.after: -// CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] -// CHECK2-IRBUILDER: omp.body.continue38: -// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] -// CHECK2-IRBUILDER: omp.inner.for.inc39: +// CHECK2-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK2-IRBUILDER: .ompfinalize38: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE39:%.*]] +// CHECK2-IRBUILDER: omp.body.continue39: +// CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC40:%.*]] +// CHECK2-IRBUILDER: omp.inner.for.inc40: // CHECK2-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK2-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 -// CHECK2-IRBUILDER-NEXT: store i32 [[ADD40]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) -// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[ADD41:%.*]] = add i32 [[TMP32]], 1 +// CHECK2-IRBUILDER-NEXT: store i32 [[ADD41]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]]), !llvm.access.group !7 // CHECK2-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK2-IRBUILDER: omp.inner.for.end42: +// CHECK2-IRBUILDER: omp.inner.for.end43: // CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2-IRBUILDER: omp.dispatch.inc: // CHECK2-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] @@ -2530,19 +2554,19 @@ void foo_simd(int low, int up) { // CHECK2-IRBUILDER-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 // CHECK2-IRBUILDER-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 // CHECK2-IRBUILDER-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 -// CHECK2-IRBUILDER-NEXT: [[SUB43:%.*]] = sub i32 [[TMP36]], [[TMP37]] -// CHECK2-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[SUB43]], 1 -// CHECK2-IRBUILDER-NEXT: [[ADD45:%.*]] = add i32 [[SUB44]], 1 -// CHECK2-IRBUILDER-NEXT: [[DIV46:%.*]] = udiv i32 [[ADD45]], 1 -// CHECK2-IRBUILDER-NEXT: [[MUL47:%.*]] = mul i32 [[DIV46]], 1 -// CHECK2-IRBUILDER-NEXT: [[ADD48:%.*]] = add i32 [[TMP35]], [[MUL47]] -// CHECK2-IRBUILDER-NEXT: store i32 [[ADD48]], i32* [[I28]], align 4 +// CHECK2-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[TMP36]], [[TMP37]] +// CHECK2-IRBUILDER-NEXT: [[SUB45:%.*]] = sub i32 [[SUB44]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD46:%.*]] = add i32 [[SUB45]], 1 +// CHECK2-IRBUILDER-NEXT: [[DIV47:%.*]] = udiv i32 [[ADD46]], 1 +// CHECK2-IRBUILDER-NEXT: [[MUL48:%.*]] = mul i32 [[DIV47]], 1 +// CHECK2-IRBUILDER-NEXT: [[ADD49:%.*]] = add i32 [[TMP35]], [[MUL48]] +// CHECK2-IRBUILDER-NEXT: store i32 [[ADD49]], i32* [[I28]], align 4 // CHECK2-IRBUILDER-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2-IRBUILDER: .omp.final.done: // CHECK2-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] // CHECK2-IRBUILDER: omp.precond.end: -// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK2-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK2-IRBUILDER-NEXT: ret void // // @@ -3836,6 +3860,8 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3-IRBUILDER: .ompfinalize: // CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: @@ -3924,6 +3950,8 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: store float [[MUL7]], float* [[ARRAYIDX8]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3-IRBUILDER: .ompfinalize: // CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: @@ -4060,6 +4088,8 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: store float [[MUL29]], float* [[ARRAYIDX31]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3-IRBUILDER: .ompfinalize: // CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: @@ -4167,6 +4197,8 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: store float [[MUL14]], float* [[ARRAYIDX16]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3-IRBUILDER: .ompfinalize: // CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: @@ -4253,6 +4285,8 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK3-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3-IRBUILDER: .ompfinalize: // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4316,7 +4350,7 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK3-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 // CHECK3-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] -// CHECK3-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] +// CHECK3-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END43:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body33: // CHECK3-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK3-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 @@ -4330,17 +4364,19 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body33.ordered.after: -// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] -// CHECK3-IRBUILDER: omp.body.continue38: -// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] -// CHECK3-IRBUILDER: omp.inner.for.inc39: +// CHECK3-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK3-IRBUILDER: .ompfinalize38: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE39:%.*]] +// CHECK3-IRBUILDER: omp.body.continue39: +// CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC40:%.*]] +// CHECK3-IRBUILDER: omp.inner.for.inc40: // CHECK3-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK3-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 -// CHECK3-IRBUILDER-NEXT: store i32 [[ADD40]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) -// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[ADD41:%.*]] = add i32 [[TMP32]], 1 +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD41]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]]), !llvm.access.group !7 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK3-IRBUILDER: omp.inner.for.end42: +// CHECK3-IRBUILDER: omp.inner.for.end43: // CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3-IRBUILDER: omp.dispatch.inc: // CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] @@ -4352,19 +4388,19 @@ void foo_simd(int low, int up) { // CHECK3-IRBUILDER-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 // CHECK3-IRBUILDER-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 // CHECK3-IRBUILDER-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 -// CHECK3-IRBUILDER-NEXT: [[SUB43:%.*]] = sub i32 [[TMP36]], [[TMP37]] -// CHECK3-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[SUB43]], 1 -// CHECK3-IRBUILDER-NEXT: [[ADD45:%.*]] = add i32 [[SUB44]], 1 -// CHECK3-IRBUILDER-NEXT: [[DIV46:%.*]] = udiv i32 [[ADD45]], 1 -// CHECK3-IRBUILDER-NEXT: [[MUL47:%.*]] = mul i32 [[DIV46]], 1 -// CHECK3-IRBUILDER-NEXT: [[ADD48:%.*]] = add i32 [[TMP35]], [[MUL47]] -// CHECK3-IRBUILDER-NEXT: store i32 [[ADD48]], i32* [[I28]], align 4 +// CHECK3-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[TMP36]], [[TMP37]] +// CHECK3-IRBUILDER-NEXT: [[SUB45:%.*]] = sub i32 [[SUB44]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD46:%.*]] = add i32 [[SUB45]], 1 +// CHECK3-IRBUILDER-NEXT: [[DIV47:%.*]] = udiv i32 [[ADD46]], 1 +// CHECK3-IRBUILDER-NEXT: [[MUL48:%.*]] = mul i32 [[DIV47]], 1 +// CHECK3-IRBUILDER-NEXT: [[ADD49:%.*]] = add i32 [[TMP35]], [[MUL48]] +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD49]], i32* [[I28]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3-IRBUILDER: .omp.final.done: // CHECK3-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] // CHECK3-IRBUILDER: omp.precond.end: -// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK3-IRBUILDER-NEXT: ret void // // @@ -4464,6 +4500,8 @@ void foo_simd(int low, int up) { // CHECK4-IRBUILDER-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4-IRBUILDER: .ompfinalize: // CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4-IRBUILDER: omp.body.continue: @@ -4552,6 +4590,8 @@ void foo_simd(int low, int up) { // CHECK4-IRBUILDER-NEXT: store float [[MUL7]], float* [[ARRAYIDX8]], align 4 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4-IRBUILDER: .ompfinalize: // CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) // CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4-IRBUILDER: omp.body.continue: @@ -4688,6 +4728,8 @@ void foo_simd(int low, int up) { // CHECK4-IRBUILDER-NEXT: store float [[MUL29]], float* [[ARRAYIDX31]], align 4 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4-IRBUILDER: .ompfinalize: // CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM23]]) // CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4-IRBUILDER: omp.body.continue: @@ -4795,6 +4837,8 @@ void foo_simd(int low, int up) { // CHECK4-IRBUILDER-NEXT: store float [[MUL14]], float* [[ARRAYIDX16]], align 4 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4-IRBUILDER: .ompfinalize: // CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8]]) // CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4-IRBUILDER: omp.body.continue: @@ -4881,6 +4925,8 @@ void foo_simd(int low, int up) { // CHECK4-IRBUILDER-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body.ordered.after: +// CHECK4-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4-IRBUILDER: .ompfinalize: // CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4-IRBUILDER: omp.body.continue: // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4944,7 +4990,7 @@ void foo_simd(int low, int up) { // CHECK4-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK4-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 // CHECK4-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] -// CHECK4-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] +// CHECK4-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END43:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body33: // CHECK4-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 // CHECK4-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 @@ -4958,17 +5004,19 @@ void foo_simd(int low, int up) { // CHECK4-IRBUILDER-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] // CHECK4-IRBUILDER: omp.inner.for.body33.ordered.after: -// CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] -// CHECK4-IRBUILDER: omp.body.continue38: -// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] -// CHECK4-IRBUILDER: omp.inner.for.inc39: +// CHECK4-IRBUILDER-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK4-IRBUILDER: .ompfinalize38: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE39:%.*]] +// CHECK4-IRBUILDER: omp.body.continue39: +// CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC40:%.*]] +// CHECK4-IRBUILDER: omp.inner.for.inc40: // CHECK4-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK4-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 -// CHECK4-IRBUILDER-NEXT: store i32 [[ADD40]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) -// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[ADD41:%.*]] = add i32 [[TMP32]], 1 +// CHECK4-IRBUILDER-NEXT: store i32 [[ADD41]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]]), !llvm.access.group !7 // CHECK4-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] -// CHECK4-IRBUILDER: omp.inner.for.end42: +// CHECK4-IRBUILDER: omp.inner.for.end43: // CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4-IRBUILDER: omp.dispatch.inc: // CHECK4-IRBUILDER-NEXT: br label [[OMP_DISPATCH_COND]] @@ -4980,19 +5028,19 @@ void foo_simd(int low, int up) { // CHECK4-IRBUILDER-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 // CHECK4-IRBUILDER-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_19]], align 4 // CHECK4-IRBUILDER-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 -// CHECK4-IRBUILDER-NEXT: [[SUB43:%.*]] = sub i32 [[TMP36]], [[TMP37]] -// CHECK4-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[SUB43]], 1 -// CHECK4-IRBUILDER-NEXT: [[ADD45:%.*]] = add i32 [[SUB44]], 1 -// CHECK4-IRBUILDER-NEXT: [[DIV46:%.*]] = udiv i32 [[ADD45]], 1 -// CHECK4-IRBUILDER-NEXT: [[MUL47:%.*]] = mul i32 [[DIV46]], 1 -// CHECK4-IRBUILDER-NEXT: [[ADD48:%.*]] = add i32 [[TMP35]], [[MUL47]] -// CHECK4-IRBUILDER-NEXT: store i32 [[ADD48]], i32* [[I28]], align 4 +// CHECK4-IRBUILDER-NEXT: [[SUB44:%.*]] = sub i32 [[TMP36]], [[TMP37]] +// CHECK4-IRBUILDER-NEXT: [[SUB45:%.*]] = sub i32 [[SUB44]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD46:%.*]] = add i32 [[SUB45]], 1 +// CHECK4-IRBUILDER-NEXT: [[DIV47:%.*]] = udiv i32 [[ADD46]], 1 +// CHECK4-IRBUILDER-NEXT: [[MUL48:%.*]] = mul i32 [[DIV47]], 1 +// CHECK4-IRBUILDER-NEXT: [[ADD49:%.*]] = add i32 [[TMP35]], [[MUL48]] +// CHECK4-IRBUILDER-NEXT: store i32 [[ADD49]], i32* [[I28]], align 4 // CHECK4-IRBUILDER-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK4-IRBUILDER: .omp.final.done: // CHECK4-IRBUILDER-NEXT: br label [[OMP_PRECOND_END]] // CHECK4-IRBUILDER: omp.precond.end: -// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK4-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK4-IRBUILDER-NEXT: ret void // // diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index e5333c5e8d1fa..eb5232d2d8aa7 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1328,7 +1328,7 @@ class OpenMPIRBuilder { EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional = false, - bool HasFinalize = true, bool IsCancellable = false); // TODO: remove HasFinalize + bool IsCancellable = false); /// Get the platform-specific name separator. /// \param Parts different parts of the final name that needs separation diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index f6e3dc97c26ef..4fc2cebc3fb36 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1642,7 +1642,7 @@ OpenMPIRBuilder::createSection(const LocationDescription &Loc, // Since we are using Finalization Callback here, HasFinalize // and IsCancellable have to be true return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, {}, - /*Conditional*/ false, /*hasFinalize*/ true, + /*Conditional*/ false, /*IsCancellable*/ true); } @@ -1843,7 +1843,7 @@ OpenMPIRBuilder::createMaster(const LocationDescription &Loc, Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, - /*Conditional*/ true, /*hasFinalize*/ true); + /*Conditional*/ true); } OpenMPIRBuilder::InsertPointTy @@ -1868,7 +1868,7 @@ OpenMPIRBuilder::createMasked(const LocationDescription &Loc, Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd); return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, - /*Conditional*/ true, /*hasFinalize*/ true); + /*Conditional*/ true); } CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( @@ -3271,8 +3271,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( // __kmpc_barrier EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, - /*Conditional*/ true, - /*hasFinalize*/ true); + /*Conditional*/ true); if (!IsNowait) createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, @@ -3311,7 +3310,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, - /*Conditional*/ false, /*hasFinalize*/ true); + /*Conditional*/ false); } OpenMPIRBuilder::InsertPointTy @@ -3386,13 +3385,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd( } return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, - /*Conditional*/ false, /*hasFinalize*/ true); + /*Conditional*/ false); } OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, - bool HasFinalize, bool IsCancellable) { + bool IsCancellable) { OMPRegionInfo* Region = enterRegion(OMPD, IsCancellable); @@ -3440,7 +3439,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && FiniBB->getTerminator()->getSuccessor(0) == ExitBB && "Unexpected control flow graph state!!"); - emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); + emitCommonDirectiveExit(OMPD, FinIP, ExitCall); for (OMPRegionBreakInfo& Break : Region->Breaks) { @@ -3455,6 +3454,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( } } + exitRegion(Region); From 1817688c14c335ae2287d7ae2c7c62253943438a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 21:41:57 -0500 Subject: [PATCH 37/50] codegen_critical fix --- clang/test/OpenMP/critical_codegen.cpp | 6 ++++++ clang/test/OpenMP/critical_codegen_attr.cpp | 14 ++++++++++---- clang/test/OpenMP/masked_codegen.cpp | 8 ++++---- clang/test/OpenMP/master_codegen.cpp | 8 ++++---- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/clang/test/OpenMP/critical_codegen.cpp b/clang/test/OpenMP/critical_codegen.cpp index 41454b6dd1b48..f14177227a97e 100644 --- a/clang/test/OpenMP/critical_codegen.cpp +++ b/clang/test/OpenMP/critical_codegen.cpp @@ -35,6 +35,8 @@ int main() { // ALL-NEXT: store i8 2, i8* [[A_ADDR]] // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) #pragma omp critical a = 2; @@ -44,6 +46,8 @@ int main() { // NORMAL-NEXT: invoke {{.*}}void [[FOO]]() // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) #pragma omp critical(the_name) foo(); @@ -53,6 +57,8 @@ int main() { // NORMAL-NEXT: invoke {{.*}}void [[FOO]]() // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]]) #pragma omp critical(the_name1) hint(23) foo(); diff --git a/clang/test/OpenMP/critical_codegen_attr.cpp b/clang/test/OpenMP/critical_codegen_attr.cpp index 768d64bf7975d..2b73a7a06f9b2 100644 --- a/clang/test/OpenMP/critical_codegen_attr.cpp +++ b/clang/test/OpenMP/critical_codegen_attr.cpp @@ -33,8 +33,10 @@ int main() { // ALL: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) // ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) // ALL-NEXT: store i8 2, i8* [[A_ADDR]] -// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] -// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) [[omp::directive(critical)]] a = 2; @@ -42,8 +44,10 @@ int main() { // ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) // IRBUILDER-NEXT: call {{.*}}void [[FOO]]() // NORMAL-NEXT: invoke {{.*}}void [[FOO]]() -// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] -// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) [[omp::directive(critical(the_name))]] foo(); @@ -53,6 +57,8 @@ int main() { // NORMAL-NEXT: invoke {{.*}}void [[FOO]]() // IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] // IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]]) [[omp::directive(critical(the_name1) hint(23))]] foo(); diff --git a/clang/test/OpenMP/masked_codegen.cpp b/clang/test/OpenMP/masked_codegen.cpp index 26d34b5695801..992d5658a5f3b 100644 --- a/clang/test/OpenMP/masked_codegen.cpp +++ b/clang/test/OpenMP/masked_codegen.cpp @@ -33,10 +33,10 @@ int main() { // ALL-NEXT: br i1 [[IS_MASKED]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] // ALL: [[THEN]] // ALL-NEXT: store i8 2, i8* [[A_ADDR]] -// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] -// IRBUILDER: [[AFTER]] -// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] -// IRBUILDER: [[FINALIZE]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // ALL-NEXT: br label {{%?}}[[EXIT]] // ALL: [[EXIT]] diff --git a/clang/test/OpenMP/master_codegen.cpp b/clang/test/OpenMP/master_codegen.cpp index 79543121449b7..cb202ffc0a175 100644 --- a/clang/test/OpenMP/master_codegen.cpp +++ b/clang/test/OpenMP/master_codegen.cpp @@ -33,10 +33,10 @@ int main() { // ALL-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] // ALL: [[THEN]] // ALL-NEXT: store i8 2, i8* [[A_ADDR]] -// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] -// IRBUILDER: [[AFTER]] -// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] -// IRBUILDER: [[FINALIZE]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] +// IRBUILDER: [[FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_master([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // ALL-NEXT: br label {{%?}}[[EXIT]] // ALL: [[EXIT]] From 3537e894b6000ae5005edd19c705e2828c2b2283 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 21:45:27 -0500 Subject: [PATCH 38/50] codegen_cancel fix --- clang/test/OpenMP/cancel_codegen.cpp | 1448 +++++++++++++------------- 1 file changed, 752 insertions(+), 696 deletions(-) diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp index 7cfcd563cf576..0503018295152 100644 --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -1329,20 +1329,20 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LASTITER30:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LOWERBOUND31:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_UPPERBOUND32:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_STRIDE33:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I41:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -1365,8 +1365,8 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4 // CHECK3-NEXT: store i32 0, i32* [[P_UPPERBOUND]], align 4 // CHECK3-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]] @@ -1386,11 +1386,13 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]] // CHECK3-NEXT: ] // CHECK3: omp_section_loop.body.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 3) // CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case.split: +// CHECK3-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case.cncl: +// CHECK3-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK3: omp_section_loop.body.case.cont: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK3: omp_section_loop.body.case.section.after: // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -1400,97 +1402,109 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1 // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] // CHECK3: omp_section_loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM14]]) // CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK3: omp_section_loop.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK3: omp_section_loop.aftersections.fini: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK3: omp_section_loop.preheader13: -// CHECK3-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, i32* [[P_LASTITER28]], i32* [[P_LOWERBOUND29]], i32* [[P_UPPERBOUND30]], i32* [[P_STRIDE31]], i32 1, i32 0) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND29]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 +// CHECK3-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK3: section_finish: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE15:%.*]] +// CHECK3: .ompfinalize15: +// CHECK3-NEXT: br label [[SECTION_FINI]] +// CHECK3: section_fini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER16:%.*]] +// CHECK3: omp_section_loop.preheader16: +// CHECK3-NEXT: store i32 0, i32* [[P_LOWERBOUND31]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[P_UPPERBOUND32]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[P_STRIDE33]], align 4 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM34:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]], i32 34, i32* [[P_LASTITER30]], i32* [[P_LOWERBOUND31]], i32* [[P_UPPERBOUND32]], i32* [[P_STRIDE33]], i32 1, i32 0) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND31]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND32]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK3-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK3: omp_section_loop.header14: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK3: omp_section_loop.cond15: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK3: omp_section_loop.body16: -// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER17:%.*]] +// CHECK3: omp_section_loop.header17: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_IV23:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER16]] ], [ [[OMP_SECTION_LOOP_NEXT25:%.*]], [[OMP_SECTION_LOOP_INC20:%.*]] ] +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_COND18:%.*]] +// CHECK3: omp_section_loop.cond18: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_CMP24:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV23]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_SECTION_LOOP_CMP24]], label [[OMP_SECTION_LOOP_BODY19:%.*]], label [[OMP_SECTION_LOOP_EXIT21:%.*]] +// CHECK3: omp_section_loop.body19: +// CHECK3-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV23]], [[TMP9]] // CHECK3-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK3-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK3-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER:%.*]] [ +// CHECK3-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK3-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE28:%.*]] // CHECK3-NEXT: ] -// CHECK3: omp_section_loop.body.case23: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case23.split: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case23.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body.case25: +// CHECK3: omp_section_loop.body.case26: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case26.cncl: +// CHECK3-NEXT: br label [[SECTION_FINI37:%.*]] +// CHECK3: omp_section_loop.body.case26.cont: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case26.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body.case28: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM29:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM29]], i32 3) // CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK3: omp_section_loop.body.case25.split: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK3: omp_section_loop.body.case25.section.after26: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK3: omp_section_loop.body.case25.section.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK3: omp_section_loop.body16.sections.after: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK3: omp_section_loop.inc17: -// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK3: omp_section_loop.exit18: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK3: omp_section_loop.after19: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK3: omp_section_loop.after19sections.fini: +// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE28_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE28_CNCL:%.*]] +// CHECK3: omp_section_loop.body.case28.cncl: +// CHECK3-NEXT: br label [[SECTION_FINI37]] +// CHECK3: omp_section_loop.body.case28.cont: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTIONFINI:%.*]] +// CHECK3: omp_section_loop.body.case28.sectionfini: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTION_AFTER:%.*]] +// CHECK3: omp_section_loop.body.case28.section.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK3: omp_section_loop.body19.sections.after: +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_INC20]] +// CHECK3: omp_section_loop.inc20: +// CHECK3-NEXT: [[OMP_SECTION_LOOP_NEXT25]] = add nuw i32 [[OMP_SECTION_LOOP_IV23]], 1 +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_HEADER17]] +// CHECK3: omp_section_loop.exit21: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM35:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM35]]) +// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_AFTER22:%.*]] +// CHECK3: omp_section_loop.after22: +// CHECK3-NEXT: br label [[SECTION_FINISH36:%.*]] +// CHECK3: section_finish36: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK3: .ompfinalize38: +// CHECK3-NEXT: br label [[SECTION_FINI37]] +// CHECK3: section_fini37: // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[SUB40:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB40]], i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK3-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 +// CHECK3-NEXT: [[CMP43:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP43]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: // CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -1504,29 +1518,23 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK3-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[CMP44:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP44]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK3-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK3-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD45]], i32* [[I41]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK3-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TOBOOL46:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK3-NEXT: br i1 [[TOBOOL46]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 2) // CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK3-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK3: omp_section_loop.body.case.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK3: omp_section_loop.body.case23.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK3: omp_section_loop.body.case25.cncl: -// CHECK3-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -1537,30 +1545,30 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK3-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD48:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: store i32 [[ADD48]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: omp.precond.end: // CHECK3-NEXT: br label [[CANCEL_CONT]] // CHECK3: cancel.cont: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM51]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM52:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM52]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK3-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM53:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM53]], i8* [[TMP36]]) // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK3-NEXT: store i32 0, i32* [[R]], align 4 @@ -1584,50 +1592,58 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.par.region: // CHECK3-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK3: 3: -// CHECK3-NEXT: br label [[TMP4:%.*]] -// CHECK3: 4: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK3: omp.par.region1: +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 // CHECK3-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK3: .cncl5: +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK3-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK3-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK3: omp.par.region1.cncl: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE11:%.*]] +// CHECK3: .ompfinalize11: +// CHECK3-NEXT: br label [[OMP_PAR_REGION1_CNCL_FINISPLIT:%.*]] +// CHECK3: omp.par.region1.cncl.finisplit: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK3: .cont: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] -// CHECK3-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK3-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 +// CHECK3: omp.par.region1.cont: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1 +// CHECK3-NEXT: [[CONV7:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP8]] +// CHECK3-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD]] to i8 +// CHECK3-NEXT: store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1 // CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK3: .ompfinalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK3: 14: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK3: .cncl: +// CHECK3: omp.par.region.if: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK3-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK3: omp.par.region.if.cncl: +// CHECK3-NEXT: br label [[DOTOMPFINALIZE9:%.*]] +// CHECK3: .ompfinalize9: +// CHECK3-NEXT: br label [[OMP_PAR_REGION_IF_CNCL_FINISPLIT:%.*]] +// CHECK3: omp.par.region.if.cncl.finisplit: +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK3: .split: -// CHECK3-NEXT: br label [[TMP4]] +// CHECK3: omp.par.region.if.cont: +// CHECK3-NEXT: br label [[OMP_PAR_REGION1]] // CHECK3: omp.par.outlined.exit.exitStub: // CHECK3-NEXT: ret void // @@ -1716,14 +1732,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK3: .omp.sections.case.split: +// CHECK3-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3: .cancel.exit: +// CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case.cncl: -// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1732,13 +1748,13 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK3-NEXT: br label [[CANCEL_CONT]] +// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // @@ -1759,7 +1775,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -1780,25 +1796,23 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK3: .omp.sections.case.split: +// CHECK3-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3: .cancel.exit: +// CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK3: .cancel.continue: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case.cncl: -// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.case2: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK3: .omp.sections.case2.split: -// CHECK3-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK3: .omp.sections.case2.section.after: +// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK3: .cancel.exit4: +// CHECK3-NEXT: br label [[CANCEL_EXIT]] +// CHECK3: .cancel.continue5: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case2.cncl: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: @@ -1807,14 +1821,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK3-NEXT: br label [[CANCEL_CONT]] +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // // @@ -1861,7 +1875,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -1889,7 +1903,7 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK3-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -1911,14 +1925,14 @@ for (int i = 0; i < argc; ++i) { // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK3-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -1928,10 +1942,10 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.reduction.case2: @@ -1981,20 +1995,20 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LASTITER30:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LOWERBOUND31:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_UPPERBOUND32:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_STRIDE33:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I41:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -2017,8 +2031,8 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4 // CHECK4-NEXT: store i32 0, i32* [[P_UPPERBOUND]], align 4 // CHECK4-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]] @@ -2038,11 +2052,13 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]] // CHECK4-NEXT: ] // CHECK4: omp_section_loop.body.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 3) // CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case.split: +// CHECK4-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case.cncl: +// CHECK4-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK4: omp_section_loop.body.case.cont: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK4: omp_section_loop.body.case.section.after: // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -2052,97 +2068,109 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1 // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] // CHECK4: omp_section_loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM14]]) // CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK4: omp_section_loop.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK4: omp_section_loop.aftersections.fini: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK4: omp_section_loop.preheader13: -// CHECK4-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, i32* [[P_LASTITER28]], i32* [[P_LOWERBOUND29]], i32* [[P_UPPERBOUND30]], i32* [[P_STRIDE31]], i32 1, i32 0) -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND29]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 +// CHECK4-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK4: section_finish: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE15:%.*]] +// CHECK4: .ompfinalize15: +// CHECK4-NEXT: br label [[SECTION_FINI]] +// CHECK4: section_fini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER16:%.*]] +// CHECK4: omp_section_loop.preheader16: +// CHECK4-NEXT: store i32 0, i32* [[P_LOWERBOUND31]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[P_UPPERBOUND32]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[P_STRIDE33]], align 4 +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM34:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]], i32 34, i32* [[P_LASTITER30]], i32* [[P_LOWERBOUND31]], i32* [[P_UPPERBOUND32]], i32* [[P_STRIDE33]], i32 1, i32 0) +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND31]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND32]], align 4 // CHECK4-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK4: omp_section_loop.header14: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK4: omp_section_loop.cond15: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK4-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK4: omp_section_loop.body16: -// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER17:%.*]] +// CHECK4: omp_section_loop.header17: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_IV23:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER16]] ], [ [[OMP_SECTION_LOOP_NEXT25:%.*]], [[OMP_SECTION_LOOP_INC20:%.*]] ] +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_COND18:%.*]] +// CHECK4: omp_section_loop.cond18: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_CMP24:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV23]], [[TMP12]] +// CHECK4-NEXT: br i1 [[OMP_SECTION_LOOP_CMP24]], label [[OMP_SECTION_LOOP_BODY19:%.*]], label [[OMP_SECTION_LOOP_EXIT21:%.*]] +// CHECK4: omp_section_loop.body19: +// CHECK4-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV23]], [[TMP9]] // CHECK4-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK4-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK4-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK4-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK4-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER:%.*]] [ +// CHECK4-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK4-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE28:%.*]] // CHECK4-NEXT: ] -// CHECK4: omp_section_loop.body.case23: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) -// CHECK4-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK4-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case23.split: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK4: omp_section_loop.body.case23.section.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK4: omp_section_loop.body.case25: +// CHECK4: omp_section_loop.body.case26: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK4-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 +// CHECK4-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case26.cncl: +// CHECK4-NEXT: br label [[SECTION_FINI37:%.*]] +// CHECK4: omp_section_loop.body.case26.cont: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case26.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body.case28: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM29:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM29]], i32 3) // CHECK4-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK4-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK4: omp_section_loop.body.case25.split: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK4: omp_section_loop.body.case25.section.after26: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK4: omp_section_loop.body.case25.section.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK4: omp_section_loop.body16.sections.after: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK4: omp_section_loop.inc17: -// CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK4: omp_section_loop.exit18: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK4: omp_section_loop.after19: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK4: omp_section_loop.after19sections.fini: +// CHECK4-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE28_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE28_CNCL:%.*]] +// CHECK4: omp_section_loop.body.case28.cncl: +// CHECK4-NEXT: br label [[SECTION_FINI37]] +// CHECK4: omp_section_loop.body.case28.cont: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTIONFINI:%.*]] +// CHECK4: omp_section_loop.body.case28.sectionfini: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTION_AFTER:%.*]] +// CHECK4: omp_section_loop.body.case28.section.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK4: omp_section_loop.body19.sections.after: +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_INC20]] +// CHECK4: omp_section_loop.inc20: +// CHECK4-NEXT: [[OMP_SECTION_LOOP_NEXT25]] = add nuw i32 [[OMP_SECTION_LOOP_IV23]], 1 +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_HEADER17]] +// CHECK4: omp_section_loop.exit21: +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM35:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM35]]) +// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_AFTER22:%.*]] +// CHECK4: omp_section_loop.after22: +// CHECK4-NEXT: br label [[SECTION_FINISH36:%.*]] +// CHECK4: section_finish36: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK4: .ompfinalize38: +// CHECK4-NEXT: br label [[SECTION_FINI37]] +// CHECK4: section_fini37: // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[SUB40:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB40]], i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK4-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK4-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK4-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 +// CHECK4-NEXT: [[CMP43:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK4-NEXT: br i1 [[CMP43]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: // CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -2156,29 +2184,23 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.inner.for.cond: // CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK4-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[CMP44:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK4-NEXT: br i1 [[CMP44]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK4-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK4-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD45]], i32* [[I41]], align 4 // CHECK4-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK4-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK4-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK4-NEXT: [[TOBOOL46:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK4-NEXT: br i1 [[TOBOOL46]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 2) // CHECK4-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK4-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK4: .cancel.exit: // CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK4: omp_section_loop.body.case.cncl: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK4: omp_section_loop.body.case23.cncl: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK4: omp_section_loop.body.case25.cncl: -// CHECK4-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -2189,30 +2211,30 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: // CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK4-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD48:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK4-NEXT: store i32 [[ADD48]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: omp.precond.end: // CHECK4-NEXT: br label [[CANCEL_CONT]] // CHECK4: cancel.cont: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK4-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM51]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM52:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK4-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM52]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK4-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK4-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK4-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM53:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK4-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM53]], i8* [[TMP36]]) // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK4-NEXT: store i32 0, i32* [[R]], align 4 @@ -2236,50 +2258,58 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.par.region: // CHECK4-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK4-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK4-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK4: 3: -// CHECK4-NEXT: br label [[TMP4:%.*]] -// CHECK4: 4: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK4-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK4: omp.par.region1: +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 // CHECK4-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK4-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK4-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK4: .cncl5: +// CHECK4-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK4-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK4-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK4: omp.par.region1.cncl: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE11:%.*]] +// CHECK4: .ompfinalize11: +// CHECK4-NEXT: br label [[OMP_PAR_REGION1_CNCL_FINISPLIT:%.*]] +// CHECK4: omp.par.region1.cncl.finisplit: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK4: .cont: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK4-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK4-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] -// CHECK4-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK4-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 +// CHECK4: omp.par.region1.cont: +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8 +// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1 +// CHECK4-NEXT: [[CONV7:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP8]] +// CHECK4-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD]] to i8 +// CHECK4-NEXT: store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1 // CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK4: omp.par.region.parallel.after: // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK4: .ompfinalize: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK4: 14: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK4-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK4-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK4: .cncl: +// CHECK4: omp.par.region.if: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK4-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK4-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK4: omp.par.region.if.cncl: +// CHECK4-NEXT: br label [[DOTOMPFINALIZE9:%.*]] +// CHECK4: .ompfinalize9: +// CHECK4-NEXT: br label [[OMP_PAR_REGION_IF_CNCL_FINISPLIT:%.*]] +// CHECK4: omp.par.region.if.cncl.finisplit: +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK4: .split: -// CHECK4-NEXT: br label [[TMP4]] +// CHECK4: omp.par.region.if.cont: +// CHECK4-NEXT: br label [[OMP_PAR_REGION1]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // @@ -2368,14 +2398,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK4: .omp.sections.case.split: +// CHECK4-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK4: .cancel.exit: +// CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case.cncl: -// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: @@ -2384,13 +2414,13 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK4-NEXT: br label [[CANCEL_CONT]] +// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: cancel.cont: // CHECK4-NEXT: ret void // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK4-NEXT: br label [[CANCEL_CONT]] // @@ -2411,7 +2441,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -2432,25 +2462,23 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK4: .omp.sections.case.split: +// CHECK4-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK4: .cancel.exit: +// CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK4: .cancel.continue: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case.cncl: -// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.sections.case2: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK4-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK4-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK4-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK4: .omp.sections.case2.split: -// CHECK4-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK4: .omp.sections.case2.section.after: +// CHECK4-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK4-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK4: .cancel.exit4: +// CHECK4-NEXT: br label [[CANCEL_EXIT]] +// CHECK4: .cancel.continue5: // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case2.cncl: -// CHECK4-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: @@ -2459,14 +2487,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK4-NEXT: br label [[CANCEL_CONT]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: cancel.cont: // CHECK4-NEXT: ret void // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK4-NEXT: br label [[CANCEL_CONT]] // // @@ -2513,7 +2541,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -2541,7 +2569,7 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK4-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK4-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -2563,14 +2591,14 @@ for (int i = 0; i < argc; ++i) { // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK4-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK4-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK4-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -2580,10 +2608,10 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK4-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: .omp.reduction.case2: @@ -3873,20 +3901,20 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_LASTITER30:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_LOWERBOUND31:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_UPPERBOUND32:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[P_STRIDE33:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I41:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -3909,8 +3937,8 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4 // CHECK9-NEXT: store i32 0, i32* [[P_UPPERBOUND]], align 4 // CHECK9-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]] @@ -3930,11 +3958,13 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]] // CHECK9-NEXT: ] // CHECK9: omp_section_loop.body.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 3) // CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case.split: +// CHECK9-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case.cncl: +// CHECK9-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK9: omp_section_loop.body.case.cont: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK9: omp_section_loop.body.case.section.after: // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -3944,97 +3974,109 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1 // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] // CHECK9: omp_section_loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM14]]) // CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK9: omp_section_loop.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK9: omp_section_loop.aftersections.fini: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK9: omp_section_loop.preheader13: -// CHECK9-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, i32* [[P_LASTITER28]], i32* [[P_LOWERBOUND29]], i32* [[P_UPPERBOUND30]], i32* [[P_STRIDE31]], i32 1, i32 0) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND29]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 +// CHECK9-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK9: section_finish: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE15:%.*]] +// CHECK9: .ompfinalize15: +// CHECK9-NEXT: br label [[SECTION_FINI]] +// CHECK9: section_fini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER16:%.*]] +// CHECK9: omp_section_loop.preheader16: +// CHECK9-NEXT: store i32 0, i32* [[P_LOWERBOUND31]], align 4 +// CHECK9-NEXT: store i32 1, i32* [[P_UPPERBOUND32]], align 4 +// CHECK9-NEXT: store i32 1, i32* [[P_STRIDE33]], align 4 +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM34:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]], i32 34, i32* [[P_LASTITER30]], i32* [[P_LOWERBOUND31]], i32* [[P_UPPERBOUND32]], i32* [[P_STRIDE33]], i32 1, i32 0) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND31]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND32]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK9-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK9: omp_section_loop.header14: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK9: omp_section_loop.cond15: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK9-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK9: omp_section_loop.body16: -// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER17:%.*]] +// CHECK9: omp_section_loop.header17: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_IV23:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER16]] ], [ [[OMP_SECTION_LOOP_NEXT25:%.*]], [[OMP_SECTION_LOOP_INC20:%.*]] ] +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_COND18:%.*]] +// CHECK9: omp_section_loop.cond18: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_CMP24:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV23]], [[TMP12]] +// CHECK9-NEXT: br i1 [[OMP_SECTION_LOOP_CMP24]], label [[OMP_SECTION_LOOP_BODY19:%.*]], label [[OMP_SECTION_LOOP_EXIT21:%.*]] +// CHECK9: omp_section_loop.body19: +// CHECK9-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV23]], [[TMP9]] // CHECK9-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK9-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK9-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK9-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK9-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER:%.*]] [ +// CHECK9-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK9-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE28:%.*]] // CHECK9-NEXT: ] -// CHECK9: omp_section_loop.body.case23: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case23.split: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK9: omp_section_loop.body.case23.section.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK9: omp_section_loop.body.case25: +// CHECK9: omp_section_loop.body.case26: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK9-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case26.cncl: +// CHECK9-NEXT: br label [[SECTION_FINI37:%.*]] +// CHECK9: omp_section_loop.body.case26.cont: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case26.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body.case28: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM29:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM29]], i32 3) // CHECK9-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK9: omp_section_loop.body.case25.split: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK9: omp_section_loop.body.case25.section.after26: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK9: omp_section_loop.body.case25.section.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK9: omp_section_loop.body16.sections.after: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK9: omp_section_loop.inc17: -// CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK9: omp_section_loop.exit18: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK9: omp_section_loop.after19: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK9: omp_section_loop.after19sections.fini: +// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE28_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE28_CNCL:%.*]] +// CHECK9: omp_section_loop.body.case28.cncl: +// CHECK9-NEXT: br label [[SECTION_FINI37]] +// CHECK9: omp_section_loop.body.case28.cont: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTIONFINI:%.*]] +// CHECK9: omp_section_loop.body.case28.sectionfini: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTION_AFTER:%.*]] +// CHECK9: omp_section_loop.body.case28.section.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK9: omp_section_loop.body19.sections.after: +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_INC20]] +// CHECK9: omp_section_loop.inc20: +// CHECK9-NEXT: [[OMP_SECTION_LOOP_NEXT25]] = add nuw i32 [[OMP_SECTION_LOOP_IV23]], 1 +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_HEADER17]] +// CHECK9: omp_section_loop.exit21: +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM35:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM35]]) +// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_AFTER22:%.*]] +// CHECK9: omp_section_loop.after22: +// CHECK9-NEXT: br label [[SECTION_FINISH36:%.*]] +// CHECK9: section_finish36: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK9: .ompfinalize38: +// CHECK9-NEXT: br label [[SECTION_FINI37]] +// CHECK9: section_fini37: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[SUB40:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB40]], i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK9-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK9-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 +// CHECK9-NEXT: [[CMP43:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: br i1 [[CMP43]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -4048,29 +4090,23 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK9-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP44:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP44]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK9-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD45]], i32* [[I41]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK9-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK9-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TOBOOL46:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK9-NEXT: br i1 [[TOBOOL46]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK9-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 2) // CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK9-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: // CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK9: omp_section_loop.body.case.cncl: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK9: omp_section_loop.body.case23.cncl: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK9: omp_section_loop.body.case25.cncl: -// CHECK9-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: @@ -4081,30 +4117,30 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK9-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD48:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK9-NEXT: store i32 [[ADD48]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: omp.precond.end: // CHECK9-NEXT: br label [[CANCEL_CONT]] // CHECK9: cancel.cont: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK9-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM51]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM52:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK9-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM52]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK9-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK9-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM53:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK9-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM53]], i8* [[TMP36]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: store i32 0, i32* [[R]], align 4 @@ -4128,50 +4164,58 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.par.region: // CHECK9-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK9: 3: -// CHECK9-NEXT: br label [[TMP4:%.*]] -// CHECK9: 4: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK9-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK9: omp.par.region1: +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 // CHECK9-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK9-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK9-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK9: .cncl5: +// CHECK9-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK9-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK9-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK9: omp.par.region1.cncl: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE11:%.*]] +// CHECK9: .ompfinalize11: +// CHECK9-NEXT: br label [[OMP_PAR_REGION1_CNCL_FINISPLIT:%.*]] +// CHECK9: omp.par.region1.cncl.finisplit: // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK9: .cont: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK9-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK9-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] -// CHECK9-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK9-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 +// CHECK9: omp.par.region1.cont: +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK9-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1 +// CHECK9-NEXT: [[CONV7:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP8]] +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD]] to i8 +// CHECK9-NEXT: store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1 // CHECK9-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK9: omp.par.region.parallel.after: // CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK9: omp.par.pre_finalize: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK9: .ompfinalize: // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK9: 14: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK9-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK9: .cncl: +// CHECK9: omp.par.region.if: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK9-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK9-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK9: omp.par.region.if.cncl: +// CHECK9-NEXT: br label [[DOTOMPFINALIZE9:%.*]] +// CHECK9: .ompfinalize9: +// CHECK9-NEXT: br label [[OMP_PAR_REGION_IF_CNCL_FINISPLIT:%.*]] +// CHECK9: omp.par.region.if.cncl.finisplit: +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK9: .split: -// CHECK9-NEXT: br label [[TMP4]] +// CHECK9: omp.par.region.if.cont: +// CHECK9-NEXT: br label [[OMP_PAR_REGION1]] // CHECK9: omp.par.outlined.exit.exitStub: // CHECK9-NEXT: ret void // @@ -4260,14 +4304,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.sections.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK9: .omp.sections.case.split: +// CHECK9-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9: .cancel.exit: +// CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK9: .omp.sections.case.cncl: -// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.sections.exit: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: @@ -4276,13 +4320,13 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK9-NEXT: br label [[CANCEL_CONT]] +// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // @@ -4303,7 +4347,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -4324,25 +4368,23 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.sections.case: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK9: .omp.sections.case.split: +// CHECK9-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9: .cancel.exit: +// CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK9: .cancel.continue: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK9: .omp.sections.case.cncl: -// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.sections.case2: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK9-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK9: .omp.sections.case2.split: -// CHECK9-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK9: .omp.sections.case2.section.after: +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK9: .cancel.exit4: +// CHECK9-NEXT: br label [[CANCEL_EXIT]] +// CHECK9: .cancel.continue5: // CHECK9-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK9: .omp.sections.case2.cncl: -// CHECK9-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK9: .omp.sections.exit: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: @@ -4351,14 +4393,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK9-NEXT: br label [[CANCEL_CONT]] +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // // @@ -4405,7 +4447,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -4433,7 +4475,7 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK9-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -4455,14 +4497,14 @@ for (int i = 0; i < argc; ++i) { // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK9-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK9-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK9-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -4472,10 +4514,10 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: .omp.reduction.case2: @@ -4525,20 +4567,20 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_LASTITER28:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_LOWERBOUND29:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_UPPERBOUND30:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[P_STRIDE31:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_LASTITER30:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_LOWERBOUND31:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_UPPERBOUND32:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[P_STRIDE33:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTCAPTURE_EXPR_34:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[I36:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I41:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK10-NEXT: [[R:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -4561,8 +4603,8 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 0, i32* [[P_LOWERBOUND]], align 4 // CHECK10-NEXT: store i32 0, i32* [[P_UPPERBOUND]], align 4 // CHECK10-NEXT: store i32 1, i32* [[P_STRIDE]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], i32 34, i32* [[P_LASTITER]], i32* [[P_LOWERBOUND]], i32* [[P_UPPERBOUND]], i32* [[P_STRIDE]], i32 1, i32 0) // CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[P_LOWERBOUND]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_UPPERBOUND]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[TMP0]] @@ -4582,11 +4624,13 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE:%.*]] // CHECK10-NEXT: ] // CHECK10: omp_section_loop.body.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM10]], i32 3) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 3) // CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case.split: +// CHECK10-NEXT: br i1 [[TMP8]], label [[OMP_SECTION_LOOP_BODY_CASE_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case.cncl: +// CHECK10-NEXT: br label [[SECTION_FINI:%.*]] +// CHECK10: omp_section_loop.body.case.cont: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE_SECTION_AFTER:%.*]] // CHECK10: omp_section_loop.body.case.section.after: // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_SECTIONS_AFTER]] @@ -4596,97 +4640,109 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT]] = add nuw i32 [[OMP_SECTION_LOOP_IV]], 1 // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER]] // CHECK10: omp_section_loop.exit: -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM14]]) // CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER:%.*]] // CHECK10: omp_section_loop.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]] -// CHECK10: omp_section_loop.aftersections.fini: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]] -// CHECK10: omp_section_loop.preheader13: -// CHECK10-NEXT: store i32 0, i32* [[P_LOWERBOUND29]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[P_UPPERBOUND30]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[P_STRIDE31]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM32:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]], i32 34, i32* [[P_LASTITER28]], i32* [[P_LOWERBOUND29]], i32* [[P_UPPERBOUND30]], i32* [[P_STRIDE31]], i32 1, i32 0) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND29]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND30]], align 4 +// CHECK10-NEXT: br label [[SECTION_FINISH:%.*]] +// CHECK10: section_finish: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE15:%.*]] +// CHECK10: .ompfinalize15: +// CHECK10-NEXT: br label [[SECTION_FINI]] +// CHECK10: section_fini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_PREHEADER16:%.*]] +// CHECK10: omp_section_loop.preheader16: +// CHECK10-NEXT: store i32 0, i32* [[P_LOWERBOUND31]], align 4 +// CHECK10-NEXT: store i32 1, i32* [[P_UPPERBOUND32]], align 4 +// CHECK10-NEXT: store i32 1, i32* [[P_STRIDE33]], align 4 +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM34:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]], i32 34, i32* [[P_LASTITER30]], i32* [[P_LOWERBOUND31]], i32* [[P_UPPERBOUND32]], i32* [[P_STRIDE33]], i32 1, i32 0) +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[P_LOWERBOUND31]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[P_UPPERBOUND32]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] // CHECK10-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14:%.*]] -// CHECK10: omp_section_loop.header14: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_IV20:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER13]] ], [ [[OMP_SECTION_LOOP_NEXT22:%.*]], [[OMP_SECTION_LOOP_INC17:%.*]] ] -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_COND15:%.*]] -// CHECK10: omp_section_loop.cond15: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP21:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV20]], [[TMP12]] -// CHECK10-NEXT: br i1 [[OMP_SECTION_LOOP_CMP21]], label [[OMP_SECTION_LOOP_BODY16:%.*]], label [[OMP_SECTION_LOOP_EXIT18:%.*]] -// CHECK10: omp_section_loop.body16: -// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV20]], [[TMP9]] +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER17:%.*]] +// CHECK10: omp_section_loop.header17: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_IV23:%.*]] = phi i32 [ 0, [[OMP_SECTION_LOOP_PREHEADER16]] ], [ [[OMP_SECTION_LOOP_NEXT25:%.*]], [[OMP_SECTION_LOOP_INC20:%.*]] ] +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_COND18:%.*]] +// CHECK10: omp_section_loop.cond18: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_CMP24:%.*]] = icmp ult i32 [[OMP_SECTION_LOOP_IV23]], [[TMP12]] +// CHECK10-NEXT: br i1 [[OMP_SECTION_LOOP_CMP24]], label [[OMP_SECTION_LOOP_BODY19:%.*]], label [[OMP_SECTION_LOOP_EXIT21:%.*]] +// CHECK10: omp_section_loop.body19: +// CHECK10-NEXT: [[TMP13:%.*]] = add i32 [[OMP_SECTION_LOOP_IV23]], [[TMP9]] // CHECK10-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 1 // CHECK10-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], 0 -// CHECK10-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER:%.*]] [ -// CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE23:%.*]] -// CHECK10-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE25:%.*]] +// CHECK10-NEXT: switch i32 [[TMP15]], label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER:%.*]] [ +// CHECK10-NEXT: i32 0, label [[OMP_SECTION_LOOP_BODY_CASE26:%.*]] +// CHECK10-NEXT: i32 1, label [[OMP_SECTION_LOOP_BODY_CASE28:%.*]] // CHECK10-NEXT: ] -// CHECK10: omp_section_loop.body.case23: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM24:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM24]], i32 3) -// CHECK10-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 -// CHECK10-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE23_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE23_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case23.split: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]] -// CHECK10: omp_section_loop.body.case23.section.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK10: omp_section_loop.body.case25: +// CHECK10: omp_section_loop.body.case26: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM27]], i32 3) +// CHECK10-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 +// CHECK10-NEXT: br i1 [[TMP17]], label [[OMP_SECTION_LOOP_BODY_CASE26_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE26_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case26.cncl: +// CHECK10-NEXT: br label [[SECTION_FINI37:%.*]] +// CHECK10: omp_section_loop.body.case26.cont: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE26_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case26.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body.case28: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM29:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM29]], i32 3) // CHECK10-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 -// CHECK10-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]] -// CHECK10: omp_section_loop.body.case25.split: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]] -// CHECK10: omp_section_loop.body.case25.section.after26: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]] -// CHECK10: omp_section_loop.body.case25.section.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]] -// CHECK10: omp_section_loop.body16.sections.after: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC17]] -// CHECK10: omp_section_loop.inc17: -// CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT22]] = add nuw i32 [[OMP_SECTION_LOOP_IV20]], 1 -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER14]] -// CHECK10: omp_section_loop.exit18: -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM32]]) -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM33:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM33]]) -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19:%.*]] -// CHECK10: omp_section_loop.after19: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]] -// CHECK10: omp_section_loop.after19sections.fini: +// CHECK10-NEXT: br i1 [[TMP19]], label [[OMP_SECTION_LOOP_BODY_CASE28_CONT:%.*]], label [[OMP_SECTION_LOOP_BODY_CASE28_CNCL:%.*]] +// CHECK10: omp_section_loop.body.case28.cncl: +// CHECK10-NEXT: br label [[SECTION_FINI37]] +// CHECK10: omp_section_loop.body.case28.cont: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTIONFINI:%.*]] +// CHECK10: omp_section_loop.body.case28.sectionfini: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY_CASE28_SECTION_AFTER:%.*]] +// CHECK10: omp_section_loop.body.case28.section.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_BODY19_SECTIONS_AFTER]] +// CHECK10: omp_section_loop.body19.sections.after: +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_INC20]] +// CHECK10: omp_section_loop.inc20: +// CHECK10-NEXT: [[OMP_SECTION_LOOP_NEXT25]] = add nuw i32 [[OMP_SECTION_LOOP_IV23]], 1 +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_HEADER17]] +// CHECK10: omp_section_loop.exit21: +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM34]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM35:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM35]]) +// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_AFTER22:%.*]] +// CHECK10: omp_section_loop.after22: +// CHECK10-NEXT: br label [[SECTION_FINISH36:%.*]] +// CHECK10: section_finish36: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE38:%.*]] +// CHECK10: .ompfinalize38: +// CHECK10-NEXT: br label [[SECTION_FINI37]] +// CHECK10: section_fini37: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK10-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK10-NEXT: [[SUB35:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK10-NEXT: store i32 [[SUB35]], i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[SUB40:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK10-NEXT: store i32 [[SUB40]], i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP22]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK10-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM37:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM37]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 -// CHECK10-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -// CHECK10-NEXT: br i1 [[CMP38]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 +// CHECK10-NEXT: [[CMP43:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: br i1 [[CMP43]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_34]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 @@ -4700,29 +4756,23 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP39:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] -// CHECK10-NEXT: br i1 [[CMP39]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP44:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK10-NEXT: br i1 [[CMP44]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD40]], i32* [[I36]], align 4 +// CHECK10-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL]] +// CHECK10-NEXT: store i32 [[ADD45]], i32* [[I41]], align 4 // CHECK10-NEXT: [[TMP32:%.*]] = load float, float* @flag, align 4 -// CHECK10-NEXT: [[TOBOOL41:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 -// CHECK10-NEXT: br i1 [[TOBOOL41]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK10-NEXT: [[TOBOOL46:%.*]] = fcmp une float [[TMP32]], 0.000000e+00 +// CHECK10-NEXT: br i1 [[TOBOOL46]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM42:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) -// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM42]], i32 2) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]]) +// CHECK10-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 2) // CHECK10-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 // CHECK10-NEXT: br i1 [[TMP34]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK10: .cancel.exit: // CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] -// CHECK10: omp_section_loop.body.case.cncl: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT]] -// CHECK10: omp_section_loop.body.case23.cncl: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] -// CHECK10: omp_section_loop.body.case25.cncl: -// CHECK10-NEXT: br label [[OMP_SECTION_LOOP_EXIT18]] // CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: @@ -4733,30 +4783,30 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK10-NEXT: store i32 [[ADD43]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD48:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK10-NEXT: store i32 [[ADD48]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM45:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM45]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM44:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM44]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: omp.precond.end: // CHECK10-NEXT: br label [[CANCEL_CONT]] // CHECK10: cancel.cont: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM46:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) -// CHECK10-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM51:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM51]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM52:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14:[0-9]+]]) +// CHECK10-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM52]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK10-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP36]] to %struct.kmp_task_t_with_privates* // CHECK10-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) -// CHECK10-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM53:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) +// CHECK10-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM53]], i8* [[TMP36]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: store i32 0, i32* [[R]], align 4 @@ -4780,50 +4830,58 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.par.region: // CHECK10-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 -// CHECK10-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] -// CHECK10: 3: -// CHECK10-NEXT: br label [[TMP4:%.*]] -// CHECK10: 4: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 -// CHECK10-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK10-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 +// CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_PAR_REGION_IF:%.*]], label [[OMP_PAR_REGION1:%.*]] +// CHECK10: omp.par.region1: +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP3]] to i8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK10-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK10-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 // CHECK10-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK10-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -// CHECK10-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] -// CHECK10: .cncl5: +// CHECK10-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK10-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +// CHECK10-NEXT: br i1 [[TMP7]], label [[OMP_PAR_REGION1_CONT:%.*]], label [[OMP_PAR_REGION1_CNCL:%.*]] +// CHECK10: omp.par.region1.cncl: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE11:%.*]] +// CHECK10: .ompfinalize11: +// CHECK10-NEXT: br label [[OMP_PAR_REGION1_CNCL_FINISPLIT:%.*]] +// CHECK10: omp.par.region1.cncl.finisplit: // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -// CHECK10: .cont: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK10-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 -// CHECK10-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK10-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] -// CHECK10-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK10-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 +// CHECK10: omp.par.region1.cont: +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 0 +// CHECK10-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX5]], align 8 +// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 0 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX6]], align 1 +// CHECK10-NEXT: [[CONV7:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV7]], [[TMP8]] +// CHECK10-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD]] to i8 +// CHECK10-NEXT: store i8 [[CONV8]], i8* [[ARRAYIDX6]], align 1 // CHECK10-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK10: omp.par.region.parallel.after: // CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK10: omp.par.pre_finalize: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE:%.*]] +// CHECK10: .ompfinalize: // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK10: 14: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK10-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 -// CHECK10-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] -// CHECK10: .cncl: +// CHECK10: omp.par.region.if: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK10-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 1) +// CHECK10-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +// CHECK10-NEXT: br i1 [[TMP13]], label [[OMP_PAR_REGION_IF_CONT:%.*]], label [[OMP_PAR_REGION_IF_CNCL:%.*]] +// CHECK10: omp.par.region.if.cncl: +// CHECK10-NEXT: br label [[DOTOMPFINALIZE9:%.*]] +// CHECK10: .ompfinalize9: +// CHECK10-NEXT: br label [[OMP_PAR_REGION_IF_CNCL_FINISPLIT:%.*]] +// CHECK10: omp.par.region.if.cncl.finisplit: +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK10: .split: -// CHECK10-NEXT: br label [[TMP4]] +// CHECK10: omp.par.region.if.cont: +// CHECK10-NEXT: br label [[OMP_PAR_REGION1]] // CHECK10: omp.par.outlined.exit.exitStub: // CHECK10-NEXT: ret void // @@ -4912,14 +4970,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK10-NEXT: ] // CHECK10: .omp.sections.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK10: .omp.sections.case.split: +// CHECK10-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK10: .cancel.exit: +// CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK10: .omp.sections.case.cncl: -// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.sections.exit: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: @@ -4928,13 +4986,13 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -// CHECK10-NEXT: br label [[CANCEL_CONT]] +// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: cancel.cont: // CHECK10-NEXT: ret void // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB19]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK10-NEXT: br label [[CANCEL_CONT]] // @@ -4955,7 +5013,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) // CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -4976,25 +5034,23 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK10-NEXT: ] // CHECK10: .omp.sections.case: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] -// CHECK10: .omp.sections.case.split: +// CHECK10-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK10: .cancel.exit: +// CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] +// CHECK10: .cancel.continue: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK10: .omp.sections.case.cncl: -// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.sections.case2: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) // CHECK10-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK10-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK10-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] -// CHECK10: .omp.sections.case2.split: -// CHECK10-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] -// CHECK10: .omp.sections.case2.section.after: +// CHECK10-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK10-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK10: .cancel.exit4: +// CHECK10-NEXT: br label [[CANCEL_EXIT]] +// CHECK10: .cancel.continue5: // CHECK10-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK10: .omp.sections.case2.cncl: -// CHECK10-NEXT: br label [[OMP_INNER_FOR_END]] // CHECK10: .omp.sections.exit: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: @@ -5003,14 +5059,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM5]]) -// CHECK10-NEXT: br label [[CANCEL_CONT]] +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM7]]) +// CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: cancel.cont: // CHECK10-NEXT: ret void // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) // CHECK10-NEXT: br label [[CANCEL_CONT]] // // @@ -5057,7 +5113,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK10-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB31:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -5085,7 +5141,7 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB33:[0-9]+]]) // CHECK10-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) // CHECK10-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK10-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] @@ -5107,14 +5163,14 @@ for (int i = 0; i < argc; ++i) { // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35:[0-9]+]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) // CHECK10-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK10-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* // CHECK10-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK10-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK10-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK10-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB36:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK10-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK10-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK10-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -5124,10 +5180,10 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 // CHECK10-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK10-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK10-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK10-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK10-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB35]]) // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: .omp.reduction.case2: From 1710516604ccadff3068fd1bb5025cd9a64615e8 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 21:48:07 -0500 Subject: [PATCH 39/50] clang-format --- clang/lib/CodeGen/CodeGenFunction.h | 3 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 19 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 233 +++++++++--------- .../Frontend/OpenMPIRBuilderTest.cpp | 11 +- 4 files changed, 121 insertions(+), 145 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 7925e68202949..462e3b1382c51 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1802,7 +1802,8 @@ class CodeGenFunction : public CodeGenTypeCache { CGBuilderTy::InsertPointGuard IPG(CGF.Builder); // MK: needed? CGF.Builder.restoreIP(IP); - llvm::BasicBlock * DestBB = llvm::splitBB(CGF.Builder, false, ".ompfinalize"); + llvm::BasicBlock *DestBB = + llvm::splitBB(CGF.Builder, false, ".ompfinalize"); // llvm::BasicBlock *IPBB = IP.getBlock(); // llvm::BasicBlock *DestBB = IPBB->getUniqueSuccessor(); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index eb5232d2d8aa7..51a74d59ed33c 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -105,8 +105,6 @@ class OpenMPIRBuilder { /// at the time, and location, the callback is invoked. using FinalizeCallbackTy = function_ref; - - private: enum class RegionKind { /// Sentinel object so we don't always have to check whether the stack is @@ -123,8 +121,6 @@ class OpenMPIRBuilder { struct OMPRegionInfo; - - /// An irregular exit out of a region, such as by cancellation. struct OMPRegionBreakInfo { /// The end of this basic block is current end of the path for breaking out @@ -147,8 +143,6 @@ class OpenMPIRBuilder { void assertOK() const; }; - - /// An OpenMP region with a single entry and single exit (unless containing a /// irregular exit) that may be associated with a construct. struct OMPRegionInfo { @@ -168,8 +162,7 @@ class OpenMPIRBuilder { OMPRegionInfo(RegionKind Kind, omp::Directive DK, bool IsCancellable); /// Register an irregular exit to this region. - void addBreak(BasicBlock *BB, omp::Directive Reason, - OMPRegionInfo *Target) ; + void addBreak(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo *Target); /// Consistency self-check. void assertOK() const; @@ -186,9 +179,6 @@ class OpenMPIRBuilder { /// the toplevel region if not present. OMPRegionInfo *getInnermostRegion(omp::Directive DK); - - - /// @{ /// Push a new region to the region stack. Must eventually be popped again /// using exitRegion. @@ -199,9 +189,8 @@ class OpenMPIRBuilder { } /// @} - - - /// Pop a region from the region stack. Net yet rejoined irregular exits fall through the outer surrounding region. + /// Pop a region from the region stack. Net yet rejoined irregular exits fall + /// through the outer surrounding region. void exitRegion(OMPRegionInfo *R); public: @@ -1328,7 +1317,7 @@ class OpenMPIRBuilder { EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional = false, - bool IsCancellable = false); + bool IsCancellable = false); /// Get the platform-specific name separator. /// \param Parts different parts of the final name that needs separation diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 4fc2cebc3fb36..754b07b955925 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -517,7 +517,7 @@ void OpenMPIRBuilder::finalize(Function *Fn) { OpenMPIRBuilder::OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) { RegionStack.emplace_back(new OMPRegionInfo( - RegionKind::Function, omp::OMPD_unknown, /*IsCancellable*/false )); + RegionKind::Function, omp::OMPD_unknown, /*IsCancellable*/ false)); } OpenMPIRBuilder::~OpenMPIRBuilder() { @@ -697,13 +697,13 @@ OpenMPIRBuilder::OMPRegionInfo::OMPRegionInfo(RegionKind Kind, assertOK(); } -void OpenMPIRBuilder::OMPRegionInfo::addBreak(BasicBlock *BB, omp::Directive Reason, - OMPRegionInfo *Target) { - assert(IsCancellable && - "Only cancellable region may have irregular exits"); - assert(!BB->getTerminator() && "Irregular exit must not rejoin the cfg"); - Breaks.emplace_back(BB, Reason, Target); - assertOK(); +void OpenMPIRBuilder::OMPRegionInfo::addBreak(BasicBlock *BB, + omp::Directive Reason, + OMPRegionInfo *Target) { + assert(IsCancellable && "Only cancellable region may have irregular exits"); + assert(!BB->getTerminator() && "Irregular exit must not rejoin the cfg"); + Breaks.emplace_back(BB, Reason, Target); + assertOK(); } void OpenMPIRBuilder::OMPRegionInfo::assertOK() const { @@ -741,7 +741,7 @@ void OpenMPIRBuilder::OMPRegionInfo::assertOK() const { OpenMPIRBuilder::OMPRegionInfo * OpenMPIRBuilder::getInnermostRegion(omp::Directive DK) { - for ( const std::unique_ptr &R : reverse(RegionStack)) { + for (const std::unique_ptr &R : reverse(RegionStack)) { if (R->Kind == RegionKind::Directive && R->DK == DK) return R.get(); } @@ -760,10 +760,11 @@ OpenMPIRBuilder::enterRegion(OpenMPIRBuilder::RegionKind Kind, } void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R) { - assert(RegionStack.size() >= 2 && "Expect at least two regions on the stack: toplevel and the one exiting"); + assert( + RegionStack.size() >= 2 && + "Expect at least two regions on the stack: toplevel and the one exiting"); assert(RegionStack.back().get() == R && "balanced region push/pop required"); - // Trickle down no yet handled breaks. OMPRegionInfo *Innermost = RegionStack.back().get(); OMPRegionInfo *NewInnermost = RegionStack.rbegin()[1].get(); @@ -794,11 +795,12 @@ void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R) { #endif for (OMPRegionBreakInfo &Break : Innermost->Breaks) { - if (Break.Target == R) { - assert(!Break.BB && "Irregular exit must have been handled by this region"); - } else { - NewInnermost->addBreak(Break.BB, Break.Reason, Break.Target); - } + if (Break.Target == R) { + assert(!Break.BB && + "Irregular exit must have been handled by this region"); + } else { + NewInnermost->addBreak(Break.BB, Break.Reason, Break.Target); + } } Innermost->Breaks.clear(); @@ -873,22 +875,22 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, // Create condition for cancel if necessary. BasicBlock *ContBB = nullptr; if (IfCondition) { - // EntryBB - // | | - // | ThenBB (".if") - // | | - // ContBB - BasicBlock* EntryBB = Builder.GetInsertBlock(); - ContBB = splitBB(Builder, /*CreateBranch*/false); - BasicBlock *ThenBB = BasicBlock::Create( - Builder.getContext(), EntryBB->getName() + ".if", ContBB->getParent(), ContBB); + // EntryBB + // | | + // | ThenBB (".if") + // | | + // ContBB + BasicBlock *EntryBB = Builder.GetInsertBlock(); + ContBB = splitBB(Builder, /*CreateBranch*/ false); + BasicBlock *ThenBB = + BasicBlock::Create(Builder.getContext(), EntryBB->getName() + ".if", + ContBB->getParent(), ContBB); Builder.CreateCondBr(IfCondition, ThenBB, ContBB); Builder.SetInsertPoint(ThenBB); Builder.CreateBr(ContBB); Builder.SetInsertPoint(ThenBB->getTerminator()); } - Value *CancelKind = nullptr; switch (CancelledDirective) { #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ @@ -910,7 +912,6 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, // The actual cancel logic is shared with others, e.g., cancel_barriers. emitCancelationCheckImpl(Loc, Result, CancelledDirective, OMPD_cancel); - if (ContBB) return {ContBB, ContBB->begin()}; return Builder.saveIP(); @@ -956,7 +957,7 @@ void OpenMPIRBuilder::emitOffloadingEntry(Constant *Addr, StringRef Name, void OpenMPIRBuilder::emitCancelationCheckImpl( LocationDescription Loc, Value *CancelFlag, - omp::Directive CancelledDirective, omp::Directive CancelledBy) { + omp::Directive CancelledDirective, omp::Directive CancelledBy) { assert(isLastFinalizationInfoCancellable(CancelledDirective) && "Unexpected cancellation!"); @@ -966,36 +967,37 @@ void OpenMPIRBuilder::emitCancelationCheckImpl( // BB // br i1 CancelFlag - // | | + // | | // | PreCancellationBlock (".cncl.fini") // | | // | CancellationBlock (".cncl") // | | // NonCancellationBlock (".cont") - BasicBlock *NonCancellationBlock = splitBBWithSuffix(Builder, /*CreateBranch*/false, ".cont"); + BasicBlock *NonCancellationBlock = + splitBBWithSuffix(Builder, /*CreateBranch*/ false, ".cont"); - // BasicBlock *PreCancellationBlock = BasicBlock::Create(Ctx, BB->getName() + ".cncl.fini", BB->getParent(), NonCancellationBlock); - BasicBlock *CancellationBlock =BasicBlock::Create(Ctx, BB->getName() + ".cncl", BB->getParent(), NonCancellationBlock); + // BasicBlock *PreCancellationBlock = BasicBlock::Create(Ctx, BB->getName() + + // ".cncl.fini", BB->getParent(), NonCancellationBlock); + BasicBlock *CancellationBlock = BasicBlock::Create( + Ctx, BB->getName() + ".cncl", BB->getParent(), NonCancellationBlock); // Jump to them based on the return value. Value *Cmp = Builder.CreateIsNull(CancelFlag); Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock, /* TODO weight */ nullptr, nullptr); - - - // TODO: Clang's codegen emits finalization code only once and inserts a // switch to jump back to the target code path (CGF.EmitBranchThroughCleanup). // Currently in the OpenMPIRBuilder, we emit the finialization multiple times // for each path exiting the region (non-cancellation and each cancellation // check). - RegionStack.back()->addBreak(CancellationBlock, CancelledBy, getInnermostRegion(CancelledDirective)); + RegionStack.back()->addBreak(CancellationBlock, CancelledBy, + getInnermostRegion(CancelledDirective)); // The continuation block is where code generation continues. - Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); + Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); } IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( @@ -1077,15 +1079,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( BasicBlock *PRegExitBB = PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); - - - - - - // FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); - // OMPRegionInfo *ParallelRegion = enterRegion(OMPD_parallel, IsCancellable , FiniCBWrapper ); - OMPRegionInfo *ParallelRegion = enterRegion(OMPD_parallel, IsCancellable ); + // OMPRegionInfo *ParallelRegion = enterRegion(OMPD_parallel, IsCancellable , + // FiniCBWrapper ); + OMPRegionInfo *ParallelRegion = enterRegion(OMPD_parallel, IsCancellable); // Generate the privatization allocas in the block that will become the entry // of the outlined function. @@ -1243,7 +1240,6 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( } #endif - #if 0 // Unless cancellation has been detected by a barrier itself, need to // synchronize between threads (after finalization). @@ -1286,32 +1282,34 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( //emitRegionExit(PreFiniIP, ParallelRegion); #endif - for (OMPRegionBreakInfo & Break : ParallelRegion->Breaks) { - Builder.SetInsertPoint(Break.BB); + for (OMPRegionBreakInfo &Break : ParallelRegion->Breaks) { + Builder.SetInsertPoint(Break.BB); - if (FiniCB) { - BasicBlock *AfterFini = splitBBWithSuffix(Builder, true, ".finisplit"); - FiniCB(Builder.saveAndClearIP() ); - Builder.SetInsertPoint(AfterFini); - } + if (FiniCB) { + BasicBlock *AfterFini = splitBBWithSuffix(Builder, true, ".finisplit"); + FiniCB(Builder.saveAndClearIP()); + Builder.SetInsertPoint(AfterFini); + } - // Unless cancellation has been detected by a barrier itself, need to - // synchronize between threads (after finalization). - if (Break.Reason != OMPD_barrier) { - Builder.restoreIP( emitBarrierImpl(Loc, Break.Reason, /*ForceSimpleCall*/ false, /*CheckCancelFlag*/ false)); - } + // Unless cancellation has been detected by a barrier itself, need to + // synchronize between threads (after finalization). + if (Break.Reason != OMPD_barrier) { + Builder.restoreIP(emitBarrierImpl(Loc, Break.Reason, + /*ForceSimpleCall*/ false, + /*CheckCancelFlag*/ false)); + } - // If the break was targeting this parallel region, rejoin after it. - if (Break.Target == ParallelRegion) { - Builder.CreateBr(PRegExitBB); - Builder.ClearInsertionPoint(); - } + // If the break was targeting this parallel region, rejoin after it. + if (Break.Target == ParallelRegion) { + Builder.CreateBr(PRegExitBB); + Builder.ClearInsertionPoint(); + } - Break.BB = Builder.GetInsertBlock() ; - assert(! Break.BB || !Break.BB->getTerminator()); + Break.BB = Builder.GetInsertBlock(); + assert(!Break.BB || !Break.BB->getTerminator()); } - // exitRegion(ParallelRegion, PRegPreFiniBB, FiniCB); + // exitRegion(ParallelRegion, PRegPreFiniBB, FiniCB); exitRegion(ParallelRegion); OI.OuterAllocaBB = OuterAllocaBlock; @@ -1508,8 +1506,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( if (!updateToLocation(Loc)) return Loc.IP; - - OMPRegionInfo* SectionsRegion = enterRegion(OMPD_sections, IsCancellable); + OMPRegionInfo *SectionsRegion = enterRegion(OMPD_sections, IsCancellable); // Each section is emitted as a switch case // Each finalization callback is handled from clang.EmitOMPSectionDirective() @@ -1559,35 +1556,27 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( InsertPointTy AfterIP = applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait); - - Builder.restoreIP(AfterIP); - BasicBlock* Finish = splitBB(Builder, true, "section_finish"); + BasicBlock *Finish = splitBB(Builder, true, "section_finish"); if (FiniCB) { - Builder.SetInsertPoint(Finish); - Finish = splitBB(Builder, true, "section_fini"); - FiniCB(Builder.saveAndClearIP()); + Builder.SetInsertPoint(Finish); + Finish = splitBB(Builder, true, "section_fini"); + FiniCB(Builder.saveAndClearIP()); } - - - for (OMPRegionBreakInfo& Break : SectionsRegion->Breaks) { - if (Break.Target == SectionsRegion) { - Builder.SetInsertPoint(Break.BB); - Builder.CreateBr(Finish); - Break.BB = nullptr; - } else if (FiniCB) { - Builder.SetInsertPoint(Break.BB); - Break.BB = splitBBWithSuffix(Builder, /* CreateBranch */ true, ".finisplit"); - FiniCB(Builder.saveAndClearIP()); - } + for (OMPRegionBreakInfo &Break : SectionsRegion->Breaks) { + if (Break.Target == SectionsRegion) { + Builder.SetInsertPoint(Break.BB); + Builder.CreateBr(Finish); + Break.BB = nullptr; + } else if (FiniCB) { + Builder.SetInsertPoint(Break.BB); + Break.BB = + splitBBWithSuffix(Builder, /* CreateBranch */ true, ".finisplit"); + FiniCB(Builder.saveAndClearIP()); + } } - - - - - exitRegion(SectionsRegion); return {Finish, Finish->begin()}; @@ -1642,7 +1631,7 @@ OpenMPIRBuilder::createSection(const LocationDescription &Loc, // Since we are using Finalization Callback here, HasFinalize // and IsCancellable have to be true return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, {}, - /*Conditional*/ false, + /*Conditional*/ false, /*IsCancellable*/ true); } @@ -1964,7 +1953,7 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, // avoid that the callback encounters degenerate BBs. BodyGenCB(CL->getBodyIP(), CL->getIndVar()); - //exitRegion(LoopRegion, nullptr, {}); + // exitRegion(LoopRegion, nullptr, {}); exitRegion(LoopRegion); #ifndef NDEBUG @@ -3391,11 +3380,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd( OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, - bool IsCancellable) { - - - OMPRegionInfo* Region = enterRegion(OMPD, IsCancellable); + bool IsCancellable) { + OMPRegionInfo *Region = enterRegion(OMPD, IsCancellable); // Create inlined region's entry and body blocks, in preparation // for conditional creation @@ -3414,53 +3401,55 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( BodyGenCB(/* AllocaIP */ InsertPointTy(), /* CodeGenIP */ Builder.saveAndClearIP()); - - // Exits are handled the following way: // // 1. For the regular region exit, \p FinCB is used by the caller to emit - // finalization code somehwere on the control path exiting the region. exitRegion itself does nothing. + // finalization code somehwere on the control path exiting the region. + // exitRegion itself does nothing. // // 2. For irregular region exits that rejoing with the control flow after - // this region, exitRegion emits a branch to \p FinBB containing the finalization code. This is typically that same code as for case 1 avoiding emitting the same finialization code multiple times. + // this region, exitRegion emits a branch to \p FinBB containing the + // finalization code. This is typically that same code as for case 1 + // avoiding emitting the same finialization code multiple times. // // 3. For irregular region exits that rejoin a surrounding region, exitRegion - // calls \p FinCB to insert the finalization code into the exiting control path. The irregular exit is then added as an irregular exit of the sourrounding loop that, opon its exit, can add its own finialization code and/or rejoin the control flow there. + // calls \p FinCB to insert the finalization code into the exiting control + // path. The irregular exit is then added as an irregular exit of the + // sourrounding loop that, opon its exit, can add its own finialization + // code and/or rejoin the control flow there. BasicBlock *FiniStartBB = FiniBB; - if (FiniCB ) { - Builder.SetInsertPoint(FiniBB, FiniBB->begin()); - FiniBB = splitBBWithSuffix(Builder, /*CreateBranch*/ true, ".finisplit"); - FiniCB(Builder.saveAndClearIP()); + if (FiniCB) { + Builder.SetInsertPoint(FiniBB, FiniBB->begin()); + FiniBB = splitBBWithSuffix(Builder, /*CreateBranch*/ true, ".finisplit"); + FiniCB(Builder.saveAndClearIP()); } // emit exit call and do any needed finalization. auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && - FiniBB->getTerminator()->getSuccessor(0) == ExitBB && + FiniBB->getTerminator()->getSuccessor(0) == ExitBB && "Unexpected control flow graph state!!"); emitCommonDirectiveExit(OMPD, FinIP, ExitCall); - - for (OMPRegionBreakInfo& Break : Region->Breaks) { - if (Break.Target == Region) { - Builder.SetInsertPoint(Break.BB); - Builder.CreateBr(FiniStartBB); - Break.BB = nullptr; - } else if (FiniCB) { - Builder.SetInsertPoint(Break.BB); - Break.BB = splitBBWithSuffix(Builder, /* CreateBranch */ true, ".finisplit"); - FiniCB(Builder.saveAndClearIP()); - } + for (OMPRegionBreakInfo &Break : Region->Breaks) { + if (Break.Target == Region) { + Builder.SetInsertPoint(Break.BB); + Builder.CreateBr(FiniStartBB); + Break.BB = nullptr; + } else if (FiniCB) { + Builder.SetInsertPoint(Break.BB); + Break.BB = + splitBBWithSuffix(Builder, /* CreateBranch */ true, ".finisplit"); + FiniCB(Builder.saveAndClearIP()); + } } - exitRegion(Region); - // FIXME: Only added to not break tests. if (FiniStartBB != FiniBB) - MergeBlockIntoPredecessor(FiniStartBB); + MergeBlockIntoPredecessor(FiniStartBB); assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && "Unexpected Control Flow State!"); @@ -3516,8 +3505,6 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( Builder.restoreIP(FinIP); - - if (!ExitCall) return Builder.saveIP(); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 1f168e1fb8c28..43d58b6d3ae53 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -3507,12 +3507,11 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { return Builder.saveIP(); }; - - InsertPointTy AfterIP = - OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, /* FiniCB */ {}, - /* IfCondition */ nullptr, - /* NumThreads */ nullptr, OMP_PROC_BIND_default, - /* IsCancellable */ false); + InsertPointTy AfterIP = OMPBuilder.createParallel( + Loc, OuterAllocaIP, BodyGenCB, PrivCB, /* FiniCB */ {}, + /* IfCondition */ nullptr, + /* NumThreads */ nullptr, OMP_PROC_BIND_default, + /* IsCancellable */ false); Builder.restoreIP(AfterIP); OpenMPIRBuilder::ReductionInfo ReductionInfos[] = { From 02f03f2a245fde524fc5cdad251f580398d8457c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 22:25:28 -0500 Subject: [PATCH 40/50] cleanup --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 7 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 184 +++--------------- 2 files changed, 35 insertions(+), 156 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 51a74d59ed33c..74f007a306950 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -866,12 +866,14 @@ class OpenMPIRBuilder { /// Generate control flow and cleanup for cancellation. /// + /// \param Loc Source location used fir debug info and ident_t. /// \param CancelFlag Flag indicating if the cancellation is performed. /// \param CancelledDirective The kind of directive that is cancled. + /// \param CancelReason Cause of the irregular exit. /// \param ExitCB Extra code to be generated in the exit block. void emitCancelationCheckImpl(LocationDescription Loc, Value *CancelFlag, omp::Directive CancelledDirective, - omp::Directive CancelledBy); + omp::Directive CancelReason); /// Generate a barrier runtime call. /// @@ -1306,9 +1308,6 @@ class OpenMPIRBuilder { /// \param Conditional indicate if the entry call result will be used /// to evaluate a conditional of whether a thread will execute /// body code or not. - /// \param HasFinalize indicate if the directive will require finalization - /// and has a finalization callback in the stack that - /// should be called. /// \param IsCancellable if HasFinalize is set to true, indicate if the /// the directive should be cancellable. /// \return The insertion point after the region diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 754b07b955925..687265f6ddb85 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -700,6 +700,8 @@ OpenMPIRBuilder::OMPRegionInfo::OMPRegionInfo(RegionKind Kind, void OpenMPIRBuilder::OMPRegionInfo::addBreak(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo *Target) { + // TODO: Multiple breaks with same Target/Reasons can be combined. + assert(IsCancellable && "Only cancellable region may have irregular exits"); assert(!BB->getTerminator() && "Irregular exit must not rejoin the cfg"); Breaks.emplace_back(BB, Reason, Target); @@ -768,32 +770,6 @@ void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R) { // Trickle down no yet handled breaks. OMPRegionInfo *Innermost = RegionStack.back().get(); OMPRegionInfo *NewInnermost = RegionStack.rbegin()[1].get(); - -#if 0 - for (OMPRegionBreakInfo &Break : reverse(Innermost->Breaks)) { - assert(!Break.BB->getTerminator() && "Expect BB not yet connected back to the cfg"); - assert(Innermost->IsCancellable && - "surrounding region must be cancellable"); - Builder.SetInsertPoint(Break.BB); - - - - if (Break.Target == Innermost) { - // Join common finialization block - Builder.CreateBr(FinBB); - Break.BB = nullptr; - } else if (FinCB) { - // Emit dedicated fininalization since we cannot use use the one for the - // regular exit. - // TODO: Implement switch-on-source-bb-index scheme like Clang's - // EmitBranchThroughCleanup does. - Break.BB = splitBB(Builder, true, ".fini"); - FinCB(Builder.saveIP()); - Builder.SetInsertPoint(Break.BB); - } - } -#endif - for (OMPRegionBreakInfo &Break : Innermost->Breaks) { if (Break.Target == R) { assert(!Break.BB && @@ -802,7 +778,6 @@ void OpenMPIRBuilder::exitRegion(OMPRegionInfo *R) { NewInnermost->addBreak(Break.BB, Break.Reason, Break.Target); } } - Innermost->Breaks.clear(); RegionStack.pop_back(); NewInnermost->assertOK(); @@ -860,7 +835,7 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, Args); if (UseCancelBarrier && CheckCancelFlag) - emitCancelationCheckImpl(Loc, Result, OMPD_parallel, OMPD_barrier); + emitCancelationCheckImpl(Loc, Result, /* CancelledDirective */ OMPD_parallel, /* CancelReason */ OMPD_barrier); return Builder.saveIP(); } @@ -910,7 +885,7 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); // The actual cancel logic is shared with others, e.g., cancel_barriers. - emitCancelationCheckImpl(Loc, Result, CancelledDirective, OMPD_cancel); + emitCancelationCheckImpl(Loc, Result, CancelledDirective, /* CancelReason */ OMPD_cancel); if (ContBB) return {ContBB, ContBB->begin()}; @@ -957,7 +932,7 @@ void OpenMPIRBuilder::emitOffloadingEntry(Constant *Addr, StringRef Name, void OpenMPIRBuilder::emitCancelationCheckImpl( LocationDescription Loc, Value *CancelFlag, - omp::Directive CancelledDirective, omp::Directive CancelledBy) { + omp::Directive CancelledDirective, omp::Directive CancelReason) { assert(isLastFinalizationInfoCancellable(CancelledDirective) && "Unexpected cancellation!"); @@ -965,7 +940,9 @@ void OpenMPIRBuilder::emitCancelationCheckImpl( BasicBlock *BB = Builder.GetInsertBlock(); LLVMContext &Ctx = BB->getContext(); - // BB + // Building the folloing control flow: +// + // BB: // br i1 CancelFlag // | | // | PreCancellationBlock (".cncl.fini") @@ -975,10 +952,7 @@ void OpenMPIRBuilder::emitCancelationCheckImpl( // NonCancellationBlock (".cont") BasicBlock *NonCancellationBlock = - splitBBWithSuffix(Builder, /*CreateBranch*/ false, ".cont"); - - // BasicBlock *PreCancellationBlock = BasicBlock::Create(Ctx, BB->getName() + - // ".cncl.fini", BB->getParent(), NonCancellationBlock); + splitBBWithSuffix(Builder, /* CreateBranch */ false, ".cont"); BasicBlock *CancellationBlock = BasicBlock::Create( Ctx, BB->getName() + ".cncl", BB->getParent(), NonCancellationBlock); @@ -987,13 +961,8 @@ void OpenMPIRBuilder::emitCancelationCheckImpl( Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock, /* TODO weight */ nullptr, nullptr); - // TODO: Clang's codegen emits finalization code only once and inserts a - // switch to jump back to the target code path (CGF.EmitBranchThroughCleanup). - // Currently in the OpenMPIRBuilder, we emit the finialization multiple times - // for each path exiting the region (non-cancellation and each cancellation - // check). - - RegionStack.back()->addBreak(CancellationBlock, CancelledBy, +// Register an irregular exit to be handled by the surrounding construct. + RegionStack.back()->addBreak(CancellationBlock, CancelReason, getInnermostRegion(CancelledDirective)); // The continuation block is where code generation continues. @@ -1079,9 +1048,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( BasicBlock *PRegExitBB = PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); - // FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); - // OMPRegionInfo *ParallelRegion = enterRegion(OMPD_parallel, IsCancellable , - // FiniCBWrapper ); + OMPRegionInfo *ParallelRegion = enterRegion(OMPD_parallel, IsCancellable); // Generate the privatization allocas in the block that will become the entry @@ -1210,78 +1177,21 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( } for (Instruction *I : ToBeDeleted) - I->eraseFromParent(); // FIXME: Don't add temporary instructions!! + I->eraseFromParent(); }; -#if 0 - // Adjust the finalization stack, verify the adjustment, and call the - // finalize function a last time to finalize values between the pre-fini - // block and the exit block if we left the parallel "the normal way". - auto FiniInfo = FinalizationStack.pop_back_val(); - (void)FiniInfo; - assert(FiniInfo.DK == OMPD_parallel && - "Unexpected finalization stack state!"); -#endif - -#if 0 - auto FiniCBWrapper = [&](InsertPointTy IP, omp::Directive BreakReason, OMPRegionInfo *FinRegion) { - if (BreakReason != OMPD_barrier) { - emitBarrierImpl(Loc, BreakReason, /*ForceSimpleCall*/ false, /*CheckCancelFlag*/ false); - } - -#if 0 - // FIXME: This is broken - // 1. Should be done after the FiniCB - // 2. It may deadlock - if (LeaveReason != OMPD_unknown) { - createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), - omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, - /* CheckCancelFlag */ false); - } -#endif - -#if 0 - // Unless cancellation has been detected by a barrier itself, need to - // synchronize between threads (after finalization). - Builder.SetInsertPoint(CancellationBlock); - if (CancelledDirective == OMPD_parallel && CancelledBy != OMPD_barrier) - emitBarrierImpl(Loc, CancelledBy, /*ForceSimpleCall*/ false, /*CheckCancelFlag*/ false); -#endif - - if (FiniCB) - FiniCB(IP); - }; -#endif - Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); +// Emit frontend finializations (eg. destructors) at the end of the regular exit. if (FiniCB) { InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); FiniCB(PreFiniIP); } -#if 0 - for (auto& B : reverse(ParallelRegion->Breaks)) { - Builder.SetInsertPoint(B.BB); - - if (FiniCB) { - B.BB = splitBB(Builder, true, ".fini"); - FiniCB(Builder.saveIP(), B.Reason, ParallelRegion); - Builder.SetInsertPoint( B.BB); - } - - if (B.Target == OMPD_parallel) { - Builder.CreateBr(PRegExitBB); - B.BB = nullptr; - } - } - ParallelRegion->Breaks.erase( llvm::remove_if(ParallelRegion->Breaks, [](const OMPRegionBreak& B) { - return !B.BB; - }), ParallelRegion->Breaks.end() ); - //emitRegionExit(PreFiniIP, ParallelRegion); -#endif +// Also emit finializations to each irregular exit. +// Note that we cannot reuse the regular exit finialization code (like EmitOMPInlinedRegion and createSections) because some of the cancellation may need an additional barrier. for (OMPRegionBreakInfo &Break : ParallelRegion->Breaks) { Builder.SetInsertPoint(Break.BB); @@ -1309,7 +1219,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( assert(!Break.BB || !Break.BB->getTerminator()); } - // exitRegion(ParallelRegion, PRegPreFiniBB, FiniCB); + exitRegion(ParallelRegion); OI.OuterAllocaBB = OuterAllocaBlock; @@ -1589,43 +1499,6 @@ OpenMPIRBuilder::createSection(const LocationDescription &Loc, if (!updateToLocation(Loc)) return Loc.IP; -#if 0 - auto &SectionsFini = FinalizationStack.back(); - assert(SectionsFini.DK == OMPD_sections); - - auto FiniCBWrapper = [&](InsertPointTy IP, omp::Directive CancelledDirective, - omp::Directive CancelledBy) { - assert(CancelledDirective == OMPD_sections); - -#if 0 - if (IP.getBlock()->end() != IP.getPoint()) - return FiniCB(IP,LeavingRegion, CancelledBy); - // This must be done otherwise any nested constructs using FinalizeOMPRegion - // will fail because that function requires the Finalization Basic Block to - // have a terminator, which is already removed by EmitOMPRegionBody. - // IP is currently at cancelation block. - // We need to backtrack to the condition block to fetch - // the exit block and create a branch from cancelation - // to exit block. - IRBuilder<>::InsertPointGuard IPG(Builder); - Builder.restoreIP(IP); - auto *CaseBB = Loc.IP.getBlock(); - auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); - auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); - Instruction *I = Builder.CreateBr(ExitBB); - IP = InsertPointTy(I->getParent(), I->getIterator()); - -#endif - auto UserFini = splitBB(Builder, true, ".section_userfini"); - - if (FiniCB) - FiniCB(Builder.saveIP(), CancelledDirective, CancelledBy); - - if (SectionsFini.FiniCB) - SectionsFini.FiniCB({UserFini, UserFini->begin()}, CancelledDirective, - CancelledBy); - }; -#endif Directive OMPD = Directive::OMPD_section; // Since we are using Finalization Callback here, HasFinalize @@ -1953,7 +1826,6 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, // avoid that the callback encounters degenerate BBs. BodyGenCB(CL->getBodyIP(), CL->getIndVar()); - // exitRegion(LoopRegion, nullptr, {}); exitRegion(LoopRegion); #ifndef NDEBUG @@ -3399,29 +3271,36 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( // generate body BodyGenCB(/* AllocaIP */ InsertPointTy(), - /* CodeGenIP */ Builder.saveAndClearIP()); + /* CodeGenIP */ Builder.saveIP()); - // Exits are handled the following way: + // Exits are handled the following way: // // 1. For the regular region exit, \p FinCB is used by the caller to emit // finalization code somehwere on the control path exiting the region. // exitRegion itself does nothing. // // 2. For irregular region exits that rejoing with the control flow after - // this region, exitRegion emits a branch to \p FinBB containing the + // this region, exitRegion emits a branch to FiniBB containing the // finalization code. This is typically that same code as for case 1 // avoiding emitting the same finialization code multiple times. // // 3. For irregular region exits that rejoin a surrounding region, exitRegion - // calls \p FinCB to insert the finalization code into the exiting control + // calls FiniCB to insert the finalization code into the exiting control // path. The irregular exit is then added as an irregular exit of the - // sourrounding loop that, opon its exit, can add its own finialization + // sourrounding loop that, upon its exit, can add its own finialization // code and/or rejoin the control flow there. + // + // TODO: Clang's codegen emits finalization code only once and inserts a + // switch to jump back to the target code path (CGF.EmitBranchThroughCleanup). + // Currently in the OpenMPIRBuilder, we emit the finialization multiple times + // for each path exiting the region (non-cancellation and each cancellation + // check). + BasicBlock *FiniStartBB = FiniBB; if (FiniCB) { Builder.SetInsertPoint(FiniBB, FiniBB->begin()); - FiniBB = splitBBWithSuffix(Builder, /*CreateBranch*/ true, ".finisplit"); + FiniBB = splitBBWithSuffix(Builder, /* CreateBranch */ true, ".finisplit"); FiniCB(Builder.saveAndClearIP()); } @@ -3432,6 +3311,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( "Unexpected control flow graph state!!"); emitCommonDirectiveExit(OMPD, FinIP, ExitCall); + for (OMPRegionBreakInfo &Break : Region->Breaks) { if (Break.Target == Region) { Builder.SetInsertPoint(Break.BB); @@ -3735,7 +3615,7 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, // else // return; - auto *UI = Builder.CreateUnreachable(); // Don't do that + auto *UI = Builder.CreateUnreachable(); BasicBlock *CheckBB = UI->getParent(); BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry"); From b2405495c78c32ed8f82b440b73c681286c4ad49 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 22:40:11 -0500 Subject: [PATCH 41/50] cleanup --- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 43d58b6d3ae53..9bad3a244ab04 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -825,7 +825,6 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize(); F->setName("func"); - BB->setName("entry"); IRBuilder<> Builder(BB); BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); @@ -2138,7 +2137,7 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) { MasterEndCI = nullptr; } } - ASSERT_NE(MasterEndCI, nullptr); + EXPECT_NE(MasterEndCI, nullptr); EXPECT_EQ(MasterEndCI->arg_size(), 2U); EXPECT_TRUE(isa(MasterEndCI->getArgOperand(0))); EXPECT_EQ(MasterEndCI->getArgOperand(1), MasterEntryCI->getArgOperand(1)); @@ -3508,7 +3507,7 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { }; InsertPointTy AfterIP = OMPBuilder.createParallel( - Loc, OuterAllocaIP, BodyGenCB, PrivCB, /* FiniCB */ {}, + Loc, OuterAllocaIP, BodyGenCB, PrivCB, {}, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false); From bcc9d0ef156c71f2e4c701bbbc48d805c2321398 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 22:51:47 -0500 Subject: [PATCH 42/50] cleanup --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 32 ++------------------- clang/lib/CodeGen/CodeGenFunction.h | 15 ++++------ clang/test/OpenMP/critical_codegen_attr.cpp | 8 +++--- clang/test/OpenMP/masked_codegen.cpp | 4 +-- clang/test/OpenMP/master_codegen.cpp | 4 +-- 5 files changed, 15 insertions(+), 48 deletions(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 6cde25bcae026..8796fa810bb50 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1714,8 +1714,7 @@ void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { if (CGM.getLangOpts().OpenMPIRBuilder && - !IsInsideNonOpenMPIRBuilderHandledRegion) { - llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + !IsInsideNonOpenMPIRBuilderHandledRegion) { // Check if we have any if clause associated with the directive. llvm::Value *IfCond = nullptr; if (const auto *C = S.getSingleClause()) @@ -1739,29 +1738,6 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; - auto CancelCB = [this](InsertPointTy IP, - llvm::omp::Directive CanceledDirective, - llvm::omp::Directive CanceledBy) { - llvm_unreachable("TODO"); -#if 0 - assert(CanceledDirective == OMPD_parallel); - if (CanceledBy == OMPD_unknown) - return; - - auto &Stack = OMPCancelStack.Stack; - - Builder.restoreIP(IP); - auto CurBB = IP.getBlock(); - llvm::BasicBlock *ContBB = nullptr; - ContBB = splitBBWithSuffix(Builder, /*CreateBranch*/ false, ".cnclsplit"); - - - auto &ExitDest = Stack.back().ExitBlock; - auto Dest = JumpDest(ContBB, ExitDest.getScopeDepth(), NextCleanupDestIndex++); - - EmitBranchThroughCleanup(Dest); -#endif - }; // Privatization callback that performs appropriate action for // shared/private/firstprivate/lastprivate/copyin/... variables. // @@ -1788,17 +1764,12 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); - // Builder.restoreIP( OMPBuilder.createParallel(Builder, AllocaIP, - // BodyGenCB, PrivCB, FiniCB, IfCond, NumThreads, ProcBind, - // S.hasCancel())); CGM.getOpenMPRuntime().emitIRBuilderParallel(*this, CS, BodyGenCB, PrivCB, FiniCB, IfCond, NumThreads, ProcBind, S.hasCancel()); return; } - CGNonOpenMPIRBuilderRegion RegionScope(*this); - // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); @@ -1820,6 +1791,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; { + CGNonOpenMPIRBuilderRegion RegionScope(*this); auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 462e3b1382c51..db5f82a07b906 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -37,7 +37,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/CFGPrinter.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" @@ -1798,20 +1797,16 @@ class CodeGenFunction : public CodeGenTypeCache { /// \param CGF The Codegen function this belongs to /// \param IP Insertion point for generating the finalization code. static void FinalizeOMPRegion(CodeGenFunction &CGF, - InsertPointTy IP) { // TODO: move to .cpp file - CGBuilderTy::InsertPointGuard IPG(CGF.Builder); // MK: needed? + InsertPointTy IP) { + CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.restoreIP(IP); llvm::BasicBlock *DestBB = - llvm::splitBB(CGF.Builder, false, ".ompfinalize"); + llvm::splitBB(CGF.Builder, /*CreateBranch*/false, ".ompfinalize"); - // llvm::BasicBlock *IPBB = IP.getBlock(); - // llvm::BasicBlock *DestBB = IPBB->getUniqueSuccessor(); - // assert(DestBB && "Finalization block should have one successor!"); + - // erase and replace with cleanup branch. - // IPBB->getTerminator()->eraseFromParent(); // Don't do this! - // CGF.Builder.SetInsertPoint(IPBB); + CodeGenFunction::JumpDest Dest = CGF.getJumpDestInCurrentScope(DestBB); CGF.EmitBranchThroughCleanup(Dest); } diff --git a/clang/test/OpenMP/critical_codegen_attr.cpp b/clang/test/OpenMP/critical_codegen_attr.cpp index 2b73a7a06f9b2..35f851c6a79c0 100644 --- a/clang/test/OpenMP/critical_codegen_attr.cpp +++ b/clang/test/OpenMP/critical_codegen_attr.cpp @@ -33,8 +33,8 @@ int main() { // ALL: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) // ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) // ALL-NEXT: store i8 2, i8* [[A_ADDR]] -// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] -// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] // IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] // IRBUILDER: [[FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) @@ -44,8 +44,8 @@ int main() { // ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) // IRBUILDER-NEXT: call {{.*}}void [[FOO]]() // NORMAL-NEXT: invoke {{.*}}void [[FOO]]() -// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] -// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] // IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] // IRBUILDER: [[FINALIZE]] // ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) diff --git a/clang/test/OpenMP/masked_codegen.cpp b/clang/test/OpenMP/masked_codegen.cpp index 992d5658a5f3b..1921356e8bdb4 100644 --- a/clang/test/OpenMP/masked_codegen.cpp +++ b/clang/test/OpenMP/masked_codegen.cpp @@ -33,8 +33,8 @@ int main() { // ALL-NEXT: br i1 [[IS_MASKED]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] // ALL: [[THEN]] // ALL-NEXT: store i8 2, i8* [[A_ADDR]] -// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] -// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] // IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] // IRBUILDER: [[FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_masked([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) diff --git a/clang/test/OpenMP/master_codegen.cpp b/clang/test/OpenMP/master_codegen.cpp index cb202ffc0a175..af0dca8ea31b7 100644 --- a/clang/test/OpenMP/master_codegen.cpp +++ b/clang/test/OpenMP/master_codegen.cpp @@ -33,8 +33,8 @@ int main() { // ALL-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] // ALL: [[THEN]] // ALL-NEXT: store i8 2, i8* [[A_ADDR]] -// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] -// IRBUILDER: [[AFTER]] +// IRBUILDER-NEXT: br label %[[AFTER:[^ ,]+]] +// IRBUILDER: [[AFTER]] // IRBUILDER-NEXT: br label %[[FINALIZE:[^ ,]+]] // IRBUILDER: [[FINALIZE]] // ALL-NEXT: call {{.*}}void @__kmpc_end_master([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) From 317c40c8099099a6084582e17f34246821112f10 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 23:00:22 -0500 Subject: [PATCH 43/50] clang cleanup --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 16 +--------------- clang/lib/CodeGen/CGStmtOpenMP.cpp | 12 ++++-------- 2 files changed, 5 insertions(+), 23 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 0adc0ff2807f0..fb6ba3bdee18c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1240,16 +1240,8 @@ struct PushAndPopStackRAII { CGF.EmitBranchThroughCleanup(Dest); }; - // llvm_unreachable("TODO: set UserManaged=true"); - // TODO: Remove this once we emit parallel regions through the - // OpenMPIRBuilder as it can do this setup internally. - // llvm::OpenMPIRBuilder::FinalizationInfo FI{{}, Kind, HasCancel, - // /*UserManaged*/ true}; OMPBuilder->pushFinalizationCB(std::move(FI)); - } - ~PushAndPopStackRAII() { - // if (OMPBuilder) - // OMPBuilder->popFinalizationCB(); } + ~PushAndPopStackRAII() { } llvm::OpenMPIRBuilder *OMPBuilder; }; } // namespace @@ -2150,13 +2142,8 @@ void CGOpenMPRuntime::emitIRBuilderParallel( using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; auto BodyGenCBWrapper = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - // CGF.OMPCancelStack.enter(CGF, OMPD_parallel, /* HasCancel*/ true); - if (BodyGenCB) BodyGenCB(AllocaIP, CodeGenIP); - - // CGF.Builder.ClearInsertionPoint(); - int a = 0; }; OpenMPIRBuilderRegionInfo CGSI(*CS, OMPD_parallel); @@ -2166,7 +2153,6 @@ void CGOpenMPRuntime::emitIRBuilderParallel( AllocaInsertPt->getIterator()); Builder.restoreIP(OMPBuilder.createParallel( Builder, AllocaIP, BodyGenCBWrapper, PrivCB, FiniCB, - // CancelCB, IfCond, NumThreads, ProcBind, IsCancellable)); } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 8796fa810bb50..1d8017640f1b7 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4129,19 +4129,16 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); - auto FiniCB = [this](InsertPointTy ExitingIP) { - OMPBuilderCBHelpers::FinalizeOMPRegion(*this, ExitingIP); + auto FiniCB = [this](InsertPointTy IP) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { Builder.restoreIP(CodeGenIP); - auto FiniBB = splitBBWithSuffix(Builder, false, ".sectionfini"); + llvm::BasicBlock * FiniBB = splitBBWithSuffix(Builder, false, ".sectionfini"); OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, *FiniBB); - // OMPBuilderCBHelpers::EmitOMPRegionBody(*this, - // SectionRegionBodyStmt, CodeGenIP, FiniBB); - EmitStmt(SectionRegionBodyStmt); Builder.CreateBr(FiniBB); @@ -6934,9 +6931,8 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { llvm::Value *IfCondition = nullptr; if (IfCond) IfCondition = EvaluateExprAsBool(IfCond); - Builder.restoreIP( + return Builder.restoreIP( OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); - return; } CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, From 0fce69bc7af50964d023bf93491227a0359dd789 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 23:26:16 -0500 Subject: [PATCH 44/50] remove PushAndPopStackRAII --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 45 +-------------------------- clang/lib/CodeGen/CGStmtOpenMP.cpp | 3 ++ 2 files changed, 4 insertions(+), 44 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index fb6ba3bdee18c..9039e0c63e65b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1204,47 +1204,7 @@ CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { return UDRMap.lookup(D); } -namespace { -// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR -// Builder if one is present. -struct PushAndPopStackRAII { - CodeGenFunction::CGNonOpenMPIRBuilderRegion NonOMPBuilderScope; - PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, - bool HasCancel, llvm::omp::Directive Kind) - : OMPBuilder(OMPBuilder), NonOMPBuilderScope(CGF) { - if (!OMPBuilder) - return; - // The following callback is the crucial part of clangs cleanup process. - // - // NOTE: - // Once the OpenMPIRBuilder is used to create parallel regions (and - // similar), the cancellation destination (Dest below) is determined via - // IP. That means if we have variables to finalize we split the block at IP, - // use the new block (=BB) as destination to build a JumpDest (via - // getJumpDestInCurrentScope(BB)) which then is fed to - // EmitBranchThroughCleanup. Furthermore, there will not be the need - // to push & pop an FinalizationInfo object. - // The FiniCB will still be needed but at the point where the - // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. - auto CancelCB = [&CGF, Kind](llvm::OpenMPIRBuilder::InsertPointTy IP, - llvm::omp::Directive CanceledDirective, - llvm::omp::Directive CanceledBy) { - assert(CanceledDirective == Kind); - assert(IP.getBlock()->end() == IP.getPoint() && - "Clang CG should cause non-terminated block!"); - CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - CGF.Builder.restoreIP(IP); - CodeGenFunction::JumpDest Dest = - CGF.getOMPCancelDestination(OMPD_parallel); - CGF.EmitBranchThroughCleanup(Dest); - }; - - } - ~PushAndPopStackRAII() { } - llvm::OpenMPIRBuilder *OMPBuilder; -}; -} // namespace static llvm::Function *emitParallelOrTeamsOutlinedFunction( CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, @@ -1273,10 +1233,7 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction( dyn_cast(&D)) HasCancel = OPFD->hasCancel(); - // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new - // parallel region to make cancellation barriers work properly. - llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); - PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); + CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 1d8017640f1b7..90ebae8c01de3 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3730,6 +3730,8 @@ static void emitScanBasedDirective( static bool emitWorksharingDirective(CodeGenFunction &CGF, const OMPLoopDirective &S, bool HasCancel) { + CodeGenFunction:: CGNonOpenMPIRBuilderRegion NonOmpBuilderScope(CGF); + bool HasLastprivates; if (llvm::any_of(S.getClausesOfKind(), [](const OMPReductionClause *C) { @@ -3908,6 +3910,7 @@ static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, } void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { + CGNonOpenMPIRBuilderRegion NonOmpBuilderScope(*this); const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); const auto *CS = dyn_cast(CapturedStmt); bool HasLastprivates = false; From 4096a6e1f3cc9dbd62bf9dd73228dba1d4b3b5e4 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 23:35:14 -0500 Subject: [PATCH 45/50] remove createIRBuilderParallel --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 29 --------------------------- clang/lib/CodeGen/CGOpenMPRuntime.h | 9 --------- clang/lib/CodeGen/CGStmtOpenMP.cpp | 10 ++++++--- clang/lib/CodeGen/CodeGenFunction.h | 2 ++ 4 files changed, 9 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 9039e0c63e65b..9ae76f96f2a76 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2082,36 +2082,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, } } -void CGOpenMPRuntime::emitIRBuilderParallel( - CodeGenFunction &CGF, const CapturedStmt *CS, - llvm::OpenMPIRBuilder::BodyGenCallbackTy BodyGenCB, - llvm::OpenMPIRBuilder::PrivatizeCallbackTy PrivCB, - llvm::OpenMPIRBuilder::FinalizeCallbackTy FiniCB, - // llvm:: OpenMPIRBuilder:: CancellationCallbackTy CancelCB, - llvm::Value *IfCond, llvm::Value *NumThreads, - llvm::omp::ProcBindKind ProcBind, bool IsCancellable) { - auto &Builder = CGF.Builder; - auto AllocaInsertPt = CGF.AllocaInsertPt; - - // FIXME: CGCapturedStmtInfo is an abstract class, CGOpenMPOutlinedRegionInfo - // would be correct here. - // CodeGenFunction:: CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); - - using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - auto BodyGenCBWrapper = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - if (BodyGenCB) - BodyGenCB(AllocaIP, CodeGenIP); - }; - - OpenMPIRBuilderRegionInfo CGSI(*CS, OMPD_parallel); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); - llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(AllocaInsertPt->getParent(), - AllocaInsertPt->getIterator()); - Builder.restoreIP(OMPBuilder.createParallel( - Builder, AllocaIP, BodyGenCBWrapper, PrivCB, FiniCB, - IfCond, NumThreads, ProcBind, IsCancellable)); -} // If we're inside an (outlined) parallel region, use the region info's // thread-ID variable (it is passed in a first argument of the outlined function diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 963f945cc893a..7fc6a7e278e51 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1022,15 +1022,6 @@ class CGOpenMPRuntime { ArrayRef CapturedVars, const Expr *IfCond, llvm::Value *NumThreads); - void emitIRBuilderParallel( - CodeGenFunction &CGF, const CapturedStmt *CS, - llvm::OpenMPIRBuilder::BodyGenCallbackTy BodyGenCB, - llvm::OpenMPIRBuilder::PrivatizeCallbackTy PrivCB, - llvm::OpenMPIRBuilder::FinalizeCallbackTy FiniCB, - // llvm:: OpenMPIRBuilder:: CancellationCallbackTy CancelCB, - llvm::Value *IfCondition, llvm::Value *NumThreads, - llvm::omp::ProcBindKind ProcBind, bool IsCancellable); - /// Emits a critical region. /// \param CriticalName Name of the critical region. /// \param CriticalOpGen Generator for the statement associated with the given diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 90ebae8c01de3..9b1267234c206 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1715,6 +1715,8 @@ void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { if (CGM.getLangOpts().OpenMPIRBuilder && !IsInsideNonOpenMPIRBuilderHandledRegion) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + // Check if we have any if clause associated with the directive. llvm::Value *IfCond = nullptr; if (const auto *C = S.getSingleClause()) @@ -1764,12 +1766,14 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); - CGM.getOpenMPRuntime().emitIRBuilderParallel(*this, CS, BodyGenCB, PrivCB, - FiniCB, IfCond, NumThreads, - ProcBind, S.hasCancel()); + Builder.restoreIP( + OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, + IfCond, NumThreads, ProcBind, S.hasCancel())); return; } + + // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index db5f82a07b906..b3a7d3609a8f4 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -489,6 +489,8 @@ class CodeGenFunction : public CodeGenTypeCache { ~CGCapturedStmtRAII() { CGF.CapturedStmtInfo = PrevCapturedStmtInfo; } }; + /// While in a region handled by Clang's CGOpenMPRuntime, do not use the OpenMPIRBuilder which requires all surrounding regions to be handled by OpenMPIRBuilder as well. + /// /// Required until everything can be handled by OpenMPIRBuilder. /// Isn't the ultimate solution to mixing OpenMPIRBuilder and /// non-OpenMPIRBuilder codegen either, but works with the current regression From aef32c536320a3c3759fb685a288ee901042ad6f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 23:49:50 -0500 Subject: [PATCH 46/50] unify CodeGenFunction:: NonOpenMPIRBuilderRegion NonOmpBuilderScope(CGF); --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 20 ++++++++++++-------- clang/lib/CodeGen/CodeGenFunction.h | 6 +++--- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 9b1267234c206..a199ba67e0fbf 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1540,6 +1540,8 @@ static void emitCommonOMPParallelDirective( CodeGenFunction &CGF, const OMPExecutableDirective &S, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, const CodeGenBoundParametersTy &CodeGenBoundParameters) { + CodeGenFunction:: NonOpenMPIRBuilderRegion NonBuilderScope(CGF); + const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); llvm::Value *NumThreads = nullptr; llvm::Function *OutlinedFn = @@ -1795,7 +1797,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; { - CGNonOpenMPIRBuilderRegion RegionScope(*this); + auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, @@ -3734,7 +3736,7 @@ static void emitScanBasedDirective( static bool emitWorksharingDirective(CodeGenFunction &CGF, const OMPLoopDirective &S, bool HasCancel) { - CodeGenFunction:: CGNonOpenMPIRBuilderRegion NonOmpBuilderScope(CGF); + CodeGenFunction:: NonOpenMPIRBuilderRegion NonOmpBuilderScope(CGF); bool HasLastprivates; if (llvm::any_of(S.getClausesOfKind(), @@ -3864,7 +3866,7 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { return; } - CGNonOpenMPIRBuilderRegion Scope(*this); + HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); }; { @@ -3914,7 +3916,9 @@ static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, } void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { - CGNonOpenMPIRBuilderRegion NonOmpBuilderScope(*this); + NonOpenMPIRBuilderRegion NonOmpBuilderScope(*this); + + const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); const auto *CS = dyn_cast(CapturedStmt); bool HasLastprivates = false; @@ -4158,7 +4162,7 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { return; } - CGNonOpenMPIRBuilderRegion NonOpenMPIRBuilderRegion(*this); + LexicalScope Scope(*this, S.getSourceRange()); EmitStopPoint(&S); EmitStmt(S.getAssociatedStmt()); @@ -4502,7 +4506,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, OMPTaskDataTy &Data) { - CGNonOpenMPIRBuilderRegion NonIrBuilderScope(*this); + NonOpenMPIRBuilderRegion NonOmpBuilderScope(*this); // Emit outlined function for task construct. const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); @@ -5046,7 +5050,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { - CGNonOpenMPIRBuilderRegion Scope(*this); + NonOpenMPIRBuilderRegion NonOmpBuilderScope(*this); // Emit outlined function for task construct. const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); @@ -5065,7 +5069,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Check if we should emit tied or untied task. Data.Tied = !S.getSingleClause(); auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { - CGNonOpenMPIRBuilderRegion Scope(CGF); + NonOpenMPIRBuilderRegion NonOmpBuilderScope(CGF); CGF.EmitStmt(CS->getCapturedStmt()); }; auto &&TaskGen = [&S, SharedsTy, CapturedStruct, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index b3a7d3609a8f4..e84964327136b 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -496,18 +496,18 @@ class CodeGenFunction : public CodeGenTypeCache { /// non-OpenMPIRBuilder codegen either, but works with the current regression /// tests so far. bool IsInsideNonOpenMPIRBuilderHandledRegion = false; - class CGNonOpenMPIRBuilderRegion { + class NonOpenMPIRBuilderRegion { private: CodeGenFunction &CGF; bool PreviousIsInsideNonOpenMPIRBuilderHandledRegion; public: - CGNonOpenMPIRBuilderRegion(CodeGenFunction &CGF) + NonOpenMPIRBuilderRegion(CodeGenFunction &CGF) : CGF(CGF), PreviousIsInsideNonOpenMPIRBuilderHandledRegion( CGF.IsInsideNonOpenMPIRBuilderHandledRegion) { CGF.IsInsideNonOpenMPIRBuilderHandledRegion = true; } - ~CGNonOpenMPIRBuilderRegion() { + ~NonOpenMPIRBuilderRegion() { CGF.IsInsideNonOpenMPIRBuilderHandledRegion = PreviousIsInsideNonOpenMPIRBuilderHandledRegion; } From f72c8dc032fd6b8318208018ca31cf0087337422 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 23:51:07 -0500 Subject: [PATCH 47/50] clang-format --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 5 --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 23 ++++++------- clang/lib/CodeGen/CodeGenFunction.h | 16 ++++----- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 33 +++++++++---------- .../Frontend/OpenMPIRBuilderTest.cpp | 10 +++--- 5 files changed, 38 insertions(+), 49 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 9ae76f96f2a76..b75587f3aac74 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1204,8 +1204,6 @@ CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { return UDRMap.lookup(D); } - - static llvm::Function *emitParallelOrTeamsOutlinedFunction( CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -1233,7 +1231,6 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction( dyn_cast(&D)) HasCancel = OPFD->hasCancel(); - CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); @@ -2082,8 +2079,6 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, } } - - // If we're inside an (outlined) parallel region, use the region info's // thread-ID variable (it is passed in a first argument of the outlined function // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index a199ba67e0fbf..e2fb1db034dbd 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1540,7 +1540,7 @@ static void emitCommonOMPParallelDirective( CodeGenFunction &CGF, const OMPExecutableDirective &S, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, const CodeGenBoundParametersTy &CodeGenBoundParameters) { - CodeGenFunction:: NonOpenMPIRBuilderRegion NonBuilderScope(CGF); + CodeGenFunction::NonOpenMPIRBuilderRegion NonBuilderScope(CGF); const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); llvm::Value *NumThreads = nullptr; @@ -1716,8 +1716,8 @@ void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { if (CGM.getLangOpts().OpenMPIRBuilder && - !IsInsideNonOpenMPIRBuilderHandledRegion) { - llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + !IsInsideNonOpenMPIRBuilderHandledRegion) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); // Check if we have any if clause associated with the directive. llvm::Value *IfCond = nullptr; @@ -1769,13 +1769,11 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); Builder.restoreIP( - OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, + OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, IfCond, NumThreads, ProcBind, S.hasCancel())); return; } - - // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); @@ -3736,7 +3734,7 @@ static void emitScanBasedDirective( static bool emitWorksharingDirective(CodeGenFunction &CGF, const OMPLoopDirective &S, bool HasCancel) { - CodeGenFunction:: NonOpenMPIRBuilderRegion NonOmpBuilderScope(CGF); + CodeGenFunction::NonOpenMPIRBuilderRegion NonOmpBuilderScope(CGF); bool HasLastprivates; if (llvm::any_of(S.getClausesOfKind(), @@ -3916,8 +3914,7 @@ static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, } void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { - NonOpenMPIRBuilderRegion NonOmpBuilderScope(*this); - + NonOpenMPIRBuilderRegion NonOmpBuilderScope(*this); const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); const auto *CS = dyn_cast(CapturedStmt); @@ -4147,7 +4144,8 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { Builder.restoreIP(CodeGenIP); - llvm::BasicBlock * FiniBB = splitBBWithSuffix(Builder, false, ".sectionfini"); + llvm::BasicBlock *FiniBB = + splitBBWithSuffix(Builder, false, ".sectionfini"); OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, *FiniBB); EmitStmt(SectionRegionBodyStmt); @@ -4162,7 +4160,6 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { return; } - LexicalScope Scope(*this, S.getSourceRange()); EmitStopPoint(&S); EmitStmt(S.getAssociatedStmt()); @@ -5050,7 +5047,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { - NonOpenMPIRBuilderRegion NonOmpBuilderScope(*this); + NonOpenMPIRBuilderRegion NonOmpBuilderScope(*this); // Emit outlined function for task construct. const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); @@ -5069,7 +5066,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Check if we should emit tied or untied task. Data.Tied = !S.getSingleClause(); auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { - NonOpenMPIRBuilderRegion NonOmpBuilderScope(CGF); + NonOpenMPIRBuilderRegion NonOmpBuilderScope(CGF); CGF.EmitStmt(CS->getCapturedStmt()); }; auto &&TaskGen = [&S, SharedsTy, CapturedStruct, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index e84964327136b..02db151925a34 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -489,8 +489,10 @@ class CodeGenFunction : public CodeGenTypeCache { ~CGCapturedStmtRAII() { CGF.CapturedStmtInfo = PrevCapturedStmtInfo; } }; - /// While in a region handled by Clang's CGOpenMPRuntime, do not use the OpenMPIRBuilder which requires all surrounding regions to be handled by OpenMPIRBuilder as well. - /// + /// While in a region handled by Clang's CGOpenMPRuntime, do not use the + /// OpenMPIRBuilder which requires all surrounding regions to be handled by + /// OpenMPIRBuilder as well. + /// /// Required until everything can be handled by OpenMPIRBuilder. /// Isn't the ultimate solution to mixing OpenMPIRBuilder and /// non-OpenMPIRBuilder codegen either, but works with the current regression @@ -502,7 +504,7 @@ class CodeGenFunction : public CodeGenTypeCache { bool PreviousIsInsideNonOpenMPIRBuilderHandledRegion; public: - NonOpenMPIRBuilderRegion(CodeGenFunction &CGF) + NonOpenMPIRBuilderRegion(CodeGenFunction &CGF) : CGF(CGF), PreviousIsInsideNonOpenMPIRBuilderHandledRegion( CGF.IsInsideNonOpenMPIRBuilderHandledRegion) { CGF.IsInsideNonOpenMPIRBuilderHandledRegion = true; @@ -1798,17 +1800,13 @@ class CodeGenFunction : public CodeGenTypeCache { /// Emit the Finalization for an OMP region /// \param CGF The Codegen function this belongs to /// \param IP Insertion point for generating the finalization code. - static void FinalizeOMPRegion(CodeGenFunction &CGF, - InsertPointTy IP) { + static void FinalizeOMPRegion(CodeGenFunction &CGF, InsertPointTy IP) { CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.restoreIP(IP); llvm::BasicBlock *DestBB = - llvm::splitBB(CGF.Builder, /*CreateBranch*/false, ".ompfinalize"); - - + llvm::splitBB(CGF.Builder, /*CreateBranch*/ false, ".ompfinalize"); - CodeGenFunction::JumpDest Dest = CGF.getJumpDestInCurrentScope(DestBB); CGF.EmitBranchThroughCleanup(Dest); } diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 687265f6ddb85..2834b89d2c6cf 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -700,7 +700,7 @@ OpenMPIRBuilder::OMPRegionInfo::OMPRegionInfo(RegionKind Kind, void OpenMPIRBuilder::OMPRegionInfo::addBreak(BasicBlock *BB, omp::Directive Reason, OMPRegionInfo *Target) { - // TODO: Multiple breaks with same Target/Reasons can be combined. + // TODO: Multiple breaks with same Target/Reasons can be combined. assert(IsCancellable && "Only cancellable region may have irregular exits"); assert(!BB->getTerminator() && "Irregular exit must not rejoin the cfg"); @@ -835,7 +835,9 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, Args); if (UseCancelBarrier && CheckCancelFlag) - emitCancelationCheckImpl(Loc, Result, /* CancelledDirective */ OMPD_parallel, /* CancelReason */ OMPD_barrier); + emitCancelationCheckImpl(Loc, Result, + /* CancelledDirective */ OMPD_parallel, + /* CancelReason */ OMPD_barrier); return Builder.saveIP(); } @@ -885,7 +887,8 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); // The actual cancel logic is shared with others, e.g., cancel_barriers. - emitCancelationCheckImpl(Loc, Result, CancelledDirective, /* CancelReason */ OMPD_cancel); + emitCancelationCheckImpl(Loc, Result, CancelledDirective, + /* CancelReason */ OMPD_cancel); if (ContBB) return {ContBB, ContBB->begin()}; @@ -940,8 +943,8 @@ void OpenMPIRBuilder::emitCancelationCheckImpl( BasicBlock *BB = Builder.GetInsertBlock(); LLVMContext &Ctx = BB->getContext(); - // Building the folloing control flow: -// + // Building the folloing control flow: + // // BB: // br i1 CancelFlag // | | @@ -961,7 +964,7 @@ void OpenMPIRBuilder::emitCancelationCheckImpl( Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock, /* TODO weight */ nullptr, nullptr); -// Register an irregular exit to be handled by the surrounding construct. + // Register an irregular exit to be handled by the surrounding construct. RegionStack.back()->addBreak(CancellationBlock, CancelReason, getInnermostRegion(CancelledDirective)); @@ -1048,7 +1051,6 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( BasicBlock *PRegExitBB = PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); - OMPRegionInfo *ParallelRegion = enterRegion(OMPD_parallel, IsCancellable); // Generate the privatization allocas in the block that will become the entry @@ -1180,18 +1182,19 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( I->eraseFromParent(); }; - - Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); -// Emit frontend finializations (eg. destructors) at the end of the regular exit. + // Emit frontend finializations (eg. destructors) at the end of the regular + // exit. if (FiniCB) { InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); FiniCB(PreFiniIP); } -// Also emit finializations to each irregular exit. -// Note that we cannot reuse the regular exit finialization code (like EmitOMPInlinedRegion and createSections) because some of the cancellation may need an additional barrier. + // Also emit finializations to each irregular exit. + // Note that we cannot reuse the regular exit finialization code (like + // EmitOMPInlinedRegion and createSections) because some of the cancellation + // may need an additional barrier. for (OMPRegionBreakInfo &Break : ParallelRegion->Breaks) { Builder.SetInsertPoint(Break.BB); @@ -1219,7 +1222,6 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( assert(!Break.BB || !Break.BB->getTerminator()); } - exitRegion(ParallelRegion); OI.OuterAllocaBB = OuterAllocaBlock; @@ -1499,7 +1501,6 @@ OpenMPIRBuilder::createSection(const LocationDescription &Loc, if (!updateToLocation(Loc)) return Loc.IP; - Directive OMPD = Directive::OMPD_section; // Since we are using Finalization Callback here, HasFinalize // and IsCancellable have to be true @@ -3289,14 +3290,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( // path. The irregular exit is then added as an irregular exit of the // sourrounding loop that, upon its exit, can add its own finialization // code and/or rejoin the control flow there. - // + // // TODO: Clang's codegen emits finalization code only once and inserts a // switch to jump back to the target code path (CGF.EmitBranchThroughCleanup). // Currently in the OpenMPIRBuilder, we emit the finialization multiple times // for each path exiting the region (non-cancellation and each cancellation // check). - BasicBlock *FiniStartBB = FiniBB; if (FiniCB) { Builder.SetInsertPoint(FiniBB, FiniBB->begin()); @@ -3311,7 +3311,6 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( "Unexpected control flow graph state!!"); emitCommonDirectiveExit(OMPD, FinIP, ExitCall); - for (OMPRegionBreakInfo &Break : Region->Breaks) { if (Break.Target == Region) { Builder.SetInsertPoint(Break.BB); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 9bad3a244ab04..22344c8a9e2f2 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -3506,11 +3506,11 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { return Builder.saveIP(); }; - InsertPointTy AfterIP = OMPBuilder.createParallel( - Loc, OuterAllocaIP, BodyGenCB, PrivCB, {}, - /* IfCondition */ nullptr, - /* NumThreads */ nullptr, OMP_PROC_BIND_default, - /* IsCancellable */ false); + InsertPointTy AfterIP = + OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, {}, + /* IfCondition */ nullptr, + /* NumThreads */ nullptr, OMP_PROC_BIND_default, + /* IsCancellable */ false); Builder.restoreIP(AfterIP); OpenMPIRBuilder::ReductionInfo ReductionInfos[] = { From f8198f70dcd6ae9461e94553790f08200c0a0455 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 23:56:48 -0500 Subject: [PATCH 48/50] remote OpenMPIrBuilder captureStmt type --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index b75587f3aac74..044c9873ffcb2 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -63,8 +63,6 @@ class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { InlinedRegion, /// Region with outlined function for standalone 'target' directive. TargetRegion, - /// Handled by OpenMPIRBuilder. - OpenMPIRBuilderRegion, }; CGOpenMPRegionInfo(const CapturedStmt &CS, @@ -112,24 +110,6 @@ class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { bool HasCancel; }; -class OpenMPIRBuilderRegionInfo final : public CGOpenMPRegionInfo { -public: - OpenMPIRBuilderRegionInfo(const CapturedStmt &CS, OpenMPDirectiveKind Kind) - : CGOpenMPRegionInfo( - CS, OpenMPIRBuilderRegion, - [](CodeGenFunction &, PrePostActionTy &) { - llvm_unreachable("Should never be called"); - }, - Kind, /*HasCancel*/ true) {} - - static bool classof(const CGCapturedStmtInfo *Info) { - return CGOpenMPRegionInfo::classof(Info) && - cast(Info)->getRegionKind() == - OpenMPIRBuilderRegion; - } - const VarDecl *getThreadIDVariable() const override { return nullptr; } -}; - /// API for captured statement code generation in OpenMP constructs. class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: From 482256db8a920efbf8d082804b2027fdeb05dfab Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 2 May 2022 23:58:37 -0500 Subject: [PATCH 49/50] cleanup --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index e2fb1db034dbd..d520d254c9eb1 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1718,7 +1718,6 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { if (CGM.getLangOpts().OpenMPIRBuilder && !IsInsideNonOpenMPIRBuilderHandledRegion) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); - // Check if we have any if clause associated with the directive. llvm::Value *IfCond = nullptr; if (const auto *C = S.getSingleClause()) @@ -1795,7 +1794,6 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; { - auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, @@ -3864,7 +3862,6 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { return; } - HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); }; { @@ -4159,7 +4156,6 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { return; } - LexicalScope Scope(*this, S.getSourceRange()); EmitStopPoint(&S); EmitStmt(S.getAssociatedStmt()); From fdb6ddcfeb62be7dbc502a4a4ed7c5be11c1c9b6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 3 May 2022 00:01:24 -0500 Subject: [PATCH 50/50] remove viewCFG --- llvm/include/llvm/Analysis/CFGPrinter.h | 93 ++----- llvm/include/llvm/Analysis/RegionPrinter.h | 16 +- llvm/include/llvm/IR/IRBuilder.h | 9 +- llvm/lib/Analysis/CFGPrinter.cpp | 63 +---- llvm/lib/Analysis/RegionPrinter.cpp | 276 +-------------------- llvm/lib/Support/GraphWriter.cpp | 2 +- 6 files changed, 30 insertions(+), 429 deletions(-) diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h index 0e2fa81b7bd8e..768cda59c57de 100644 --- a/llvm/include/llvm/Analysis/CFGPrinter.h +++ b/llvm/include/llvm/Analysis/CFGPrinter.h @@ -30,7 +30,6 @@ #include "llvm/Support/FormatVariadic.h" namespace llvm { - template struct GraphTraits; class CFGViewerPass : public PassInfoMixin { public: @@ -53,7 +52,7 @@ class CFGOnlyPrinterPass : public PassInfoMixin { }; class DOTFuncInfo { -public: +private: const Function *F; const BlockFrequencyInfo *BFI; const BranchProbabilityInfo *BPI; @@ -61,22 +60,13 @@ class DOTFuncInfo { bool ShowHeat; bool EdgeWeights; bool RawWeights; - const BasicBlock *HighlightBB; - const Instruction *HighlightInst; public: - // DOTFuncInfo(const Function *F) : DOTFuncInfo(F, nullptr, nullptr, 0) {} - DOTFuncInfo(const Function *F, const BasicBlock *HighlightBB = nullptr, - const Instruction *HighlightInst = nullptr) - : DOTFuncInfo(F, nullptr, nullptr, 0, HighlightBB, HighlightInst) {} + DOTFuncInfo(const Function *F) : DOTFuncInfo(F, nullptr, nullptr, 0) {} + DOTFuncInfo(const Function *F, const BlockFrequencyInfo *BFI, const BranchProbabilityInfo *BPI, uint64_t MaxFreq) - : DOTFuncInfo(F, BFI, BPI, 0, nullptr, nullptr) {} - DOTFuncInfo(const Function *F, const BlockFrequencyInfo *BFI, - const BranchProbabilityInfo *BPI, uint64_t MaxFreq, - const BasicBlock *HighlightBB, const Instruction *HighlightInst) - : F(F), BFI(BFI), BPI(BPI), MaxFreq(MaxFreq), HighlightBB(HighlightBB), - HighlightInst(HighlightInst) { + : F(F), BFI(BFI), BPI(BPI), MaxFreq(MaxFreq) { ShowHeat = false; EdgeWeights = !!BPI; // Print EdgeWeights when BPI is available. RawWeights = !!BFI; // Print RawWeights when BFI is available. @@ -158,44 +148,13 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } static std::string getCompleteNodeLabel( - const BasicBlock *Node, DOTFuncInfo *CFGInfo, + const BasicBlock *Node, DOTFuncInfo *, llvm::function_ref - HandleBasicBlock = {}, + HandleBasicBlock = [](raw_string_ostream &OS, + const BasicBlock &Node) -> void { OS << Node; }, llvm::function_ref - HandleComment = eraseComment, - unsigned LongestCol = 0) { - - auto BasicBlockHandler = HandleBasicBlock; - if (!BasicBlockHandler) { - BasicBlockHandler = [CFGInfo, - LongestCol](raw_string_ostream &OS, - const BasicBlock &Node) -> void { - if (!CFGInfo || !CFGInfo->HighlightBB) { - OS << Node; - return; - } - - Node.printAsOperand(OS, false); - OS << ":\n"; - for (auto &&Inst : Node) { - if (&Inst == CFGInfo->HighlightInst) { - OS << '<'; - for (unsigned I = 2; I < LongestCol; ++I) - OS << '-'; - OS << ">\n"; - } - OS << Inst << "\n"; - } - if (CFGInfo->HighlightBB == &Node && !CFGInfo->HighlightInst) { - OS << '<'; - for (unsigned I = 2; I < LongestCol; ++I) - OS << '-'; - OS << '>'; - } - }; - } - - // enum { MaxColumns = 80 }; + HandleComment = eraseComment) { + enum { MaxColumns = 80 }; std::string Str; raw_string_ostream OS(Str); @@ -204,25 +163,23 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { OS << ":"; } - BasicBlockHandler(OS, *Node); - unsigned LongCol = 1; + HandleBasicBlock(OS, *Node); std::string OutStr = OS.str(); if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); // Process string output to make it nicer... unsigned ColNum = 0; - // unsigned LastSpace = 0; + unsigned LastSpace = 0; for (unsigned i = 0; i != OutStr.length(); ++i) { if (OutStr[i] == '\n') { // Left justify OutStr[i] = '\\'; OutStr.insert(OutStr.begin() + i + 1, 'l'); ColNum = 0; - // LastSpace = 0; + LastSpace = 0; } else if (OutStr[i] == ';') { // Delete comments! unsigned Idx = OutStr.find('\n', i + 1); // Find end of line HandleComment(OutStr, i, Idx); -#if 0 } else if (ColNum == MaxColumns) { // Wrap lines. // Wrap very long names even though we can't find a space. if (!LastSpace) @@ -231,19 +188,11 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { ColNum = i - LastSpace; LastSpace = 0; i += 3; // The loop will advance 'i' again. -#endif } else ++ColNum; - LongCol = std::max(LongCol, ColNum); - // if (OutStr[i] == ' ') - // LastSpace = i; - } - - if (!HandleBasicBlock && CFGInfo && CFGInfo->HighlightBB && !LongestCol) { - return getCompleteNodeLabel(Node, CFGInfo, HandleBasicBlock, - HandleComment, LongCol); + if (OutStr[i] == ' ') + LastSpace = i; } - return OutStr; } @@ -332,9 +281,6 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } std::string getNodeAttributes(const BasicBlock *Node, DOTFuncInfo *CFGInfo) { - if (Node == CFGInfo->HighlightBB) { - return "style=filled,fillcolor=olivedrab1"; - } if (!CFGInfo->showHeatColors()) return ""; @@ -352,17 +298,6 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { bool isNodeHidden(const BasicBlock *Node, const DOTFuncInfo *CFGInfo); void computeDeoptOrUnreachablePaths(const Function *F); }; - -void viewCFG(const Function *F); -void viewCFG(const Function &F); -void viewCFG(const BasicBlock *BB); -void viewCFG(const BasicBlock &BB); -void viewCFG(const Instruction *I); -void viewCFG(const Instruction &I); - -// RegionPrinter.cpp -void viewRegion(const Function *F); -void viewRegion(const Function &F); } // End llvm namespace namespace llvm { diff --git a/llvm/include/llvm/Analysis/RegionPrinter.h b/llvm/include/llvm/Analysis/RegionPrinter.h index e2598e2390e47..154ac35c486ad 100644 --- a/llvm/include/llvm/Analysis/RegionPrinter.h +++ b/llvm/include/llvm/Analysis/RegionPrinter.h @@ -18,12 +18,8 @@ namespace llvm { class FunctionPass; class Function; class RegionInfo; - class BasicBlock; - class Instruction; FunctionPass *createRegionViewerPass(); - FunctionPass *createRegionViewerPass(const BasicBlock *BB, - const Instruction *Inst); FunctionPass *createRegionOnlyViewerPass(); FunctionPass *createRegionPrinterPass(); FunctionPass *createRegionOnlyPrinterPass(); @@ -36,8 +32,7 @@ namespace llvm { /// Includes the instructions in each BasicBlock. /// /// @param RI The analysis to display. - void viewRegion(RegionInfo *RI); - void viewRegion(RegionInfo &RI); + void viewRegion(llvm::RegionInfo *RI); /// Analyze the regions of a function and open its GraphViz /// visualization in a viewer. @@ -48,14 +43,7 @@ namespace llvm { /// manager currently holds. /// /// @param F Function to analyze. - void viewRegion(const Function *F); - void viewRegion(const Function &F); - - void viewRegion(const BasicBlock *I); - void viewRegion(const BasicBlock &I); - - void viewRegion(const Instruction *I); - void viewRegion(const Instruction &I); + void viewRegion(const llvm::Function *F); /// Open a viewer to display the GraphViz vizualization of the analysis /// result. diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index b7e24e943da2d..5ac7890905720 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -241,10 +241,7 @@ class IRBuilderBase { /// Creates a new insertion point at the given location. InsertPoint(BasicBlock *InsertBlock, BasicBlock::iterator InsertPoint) - : Block(InsertBlock), Point(InsertPoint) { - assert(!isSet() || InsertBlock->end() == InsertPoint || - InsertPoint->getParent() == InsertBlock); - } + : Block(InsertBlock), Point(InsertPoint) {} /// Returns true if this insert point is set. bool isSet() const { return (Block != nullptr); } @@ -2554,10 +2551,6 @@ class IRBuilder : public IRBuilderBase { // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRBuilder<>, LLVMBuilderRef) -void viewCFG(const llvm::IRBuilderBase *Builder); -void viewCFG(const llvm::IRBuilderBase &Builder); -void viewCFG(const llvm::IRBuilderBase::InsertPoint *IP); -void viewCFG(const llvm::IRBuilderBase::InsertPoint &IP); } // end namespace llvm #endif // LLVM_IR_IRBUILDER_H diff --git a/llvm/lib/Analysis/CFGPrinter.cpp b/llvm/lib/Analysis/CFGPrinter.cpp index d9c82b12128d5..f8eba1a00f28d 100644 --- a/llvm/lib/Analysis/CFGPrinter.cpp +++ b/llvm/lib/Analysis/CFGPrinter.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/CFGPrinter.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -129,7 +128,7 @@ PreservedAnalyses CFGViewerPass::run(Function &F, FunctionAnalysisManager &AM) { return PreservedAnalyses::all(); auto *BFI = &AM.getResult(F); auto *BPI = &AM.getResult(F); - ::viewCFG(F, BFI, BPI, getMaxFreq(F, BFI)); + viewCFG(F, BFI, BPI, getMaxFreq(F, BFI)); return PreservedAnalyses::all(); } @@ -145,7 +144,7 @@ struct CFGOnlyViewerLegacyPass : public FunctionPass { return false; auto *BPI = &getAnalysis().getBPI(); auto *BFI = &getAnalysis().getBFI(); - ::viewCFG(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true); + viewCFG(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true); return false; } @@ -170,7 +169,7 @@ PreservedAnalyses CFGOnlyViewerPass::run(Function &F, return PreservedAnalyses::all(); auto *BFI = &AM.getResult(F); auto *BPI = &AM.getResult(F); - ::viewCFG(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true); + viewCFG(F, BFI, BPI, getMaxFreq(F, BFI), /*CFGOnly=*/true); return PreservedAnalyses::all(); } @@ -333,59 +332,3 @@ bool DOTGraphTraits::isNodeHidden(const BasicBlock *Node, } return false; } - -void llvm::viewCFG(const Function *F) { - if (!F) - return; - F->viewCFG(); -} -void llvm::viewCFG(const Function &F) { return viewCFG(&F); } - -void llvm::viewCFG(const BasicBlock *BB) { - if (!BB) - return; - auto *F = BB->getParent(); - DOTFuncInfo CFGInfo(F, BB, nullptr); - ViewGraph(&CFGInfo, "cfg" + F->getName(), false); -} -void llvm::viewCFG(const BasicBlock &BB) { return viewCFG(&BB); } - -void llvm::viewCFG(const Instruction *I) { - if (!I) - return; - auto *BB = I->getParent(); - auto *F = BB->getParent(); - DOTFuncInfo CFGInfo(F, BB, I); - ViewGraph(&CFGInfo, "cfg" + F->getName(), false); -} -void llvm::viewCFG(const Instruction &I) { return viewCFG(&I); } - -void llvm::viewCFG(const llvm::IRBuilderBase *Builder) { - if (!Builder) - return; - return viewCFG(Builder->saveIP()); -} -void llvm::viewCFG(const llvm::IRBuilderBase &Builder) { - return viewCFG(&Builder); -} - -void llvm::viewCFG(const llvm::IRBuilderBase::InsertPoint *IP) { - if (!IP) - return; - if (!IP->isSet()) - return; - - assert(IP->isSet()); - BasicBlock *Block = IP->getBlock(); - BasicBlock::iterator Point = IP->getPoint(); - Function *F = Block->getParent(); - - // if (!CFGFuncName.empty() && !getName().contains(CFGFuncName)) - // return; - Instruction *Inst = (Point == Block->end()) ? nullptr : &*Point; - DOTFuncInfo CFGInfo(F, Block, Inst); - ViewGraph(&CFGInfo, "cfg" + F->getName(), false); -} -void llvm::viewCFG(const llvm::IRBuilderBase ::InsertPoint &IP) { - return viewCFG(&IP); -} diff --git a/llvm/lib/Analysis/RegionPrinter.cpp b/llvm/lib/Analysis/RegionPrinter.cpp index dd68c5c300088..bb1676c7a9030 100644 --- a/llvm/lib/Analysis/RegionPrinter.cpp +++ b/llvm/lib/Analysis/RegionPrinter.cpp @@ -30,56 +30,7 @@ onlySimpleRegions("only-simple-regions", cl::Hidden, cl::init(false)); -namespace { -struct HighlightingRegionInfo { - RegionInfo *RI; - const Function *F; - const BasicBlock *HighlightBB; - const Instruction *HighlightInst; - - HighlightingRegionInfo() = delete; - - HighlightingRegionInfo(RegionInfo *RI, const Function *F, - const BasicBlock *HighlightBB = nullptr, - const Instruction *HighlightInst = nullptr) - : RI(RI), F(F), HighlightBB(HighlightBB), HighlightInst(HighlightInst) {} - -public: - RegionInfo *getRegionInfo() const { return RI; } - const Function *getFunction() const { return F; } -}; - -#if 0 - struct HighlightingRegionInfoPassGraphTraits { - static HighlightingRegionInfo* getGraph(RegionInfoPass* RIP) { - // ... - return nullptr; - } - }; -#endif -} // namespace - namespace llvm { -template <> -struct GraphTraits - : public GraphTraits> { - using nodes_iterator = df_iterator, - false, GraphTraits>>; - - static NodeRef getEntryNode(HighlightingRegionInfo *G) { - return GraphTraits>::getEntryNode( - G->RI->getTopLevelRegion()); - } - - static nodes_iterator nodes_begin(HighlightingRegionInfo *G) { - return nodes_iterator::begin(getEntryNode(G)); - } - - static nodes_iterator nodes_end(HighlightingRegionInfo *G) { - return nodes_iterator::end(getEntryNode(G)); - } -}; - template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { @@ -183,159 +134,16 @@ struct DOTGraphTraits : public DOTGraphTraits { printRegionCluster(*G->getTopLevelRegion(), GW, 4); } }; - } //end namespace llvm namespace { + struct RegionInfoPassGraphTraits { static RegionInfo *getGraph(RegionInfoPass *RIP) { return &RIP->getRegionInfo(); } }; -} // namespace - -namespace llvm { - -#if 0 - template <> - struct GraphTraits : public GraphTraits { - using Base = GraphTraits; - // using Base::NodeRef; - }; -#endif - -template <> -struct DOTGraphTraits - : public DOTGraphTraits { - using Base = DOTGraphTraits; - using Traits = GraphTraits; - - DOTGraphTraits(bool IsSimple = false) : Base(IsSimple) {} - -#if 0 - static std::string getGraphName(const HighlightingRegionInfo *G) { - return Base::getGraphName(G->RI); - } - - std::string getNodeLabel(RegionNode *Node, HighlightingRegionInfo *G) { - return Base::getNodeLabel(Node, G->RI); - } - - std::string getEdgeAttributes( - RegionNode *SrcNode, - Traits::ChildIteratorType CI, - HighlightingRegionInfo *G) { - return Base::getEdgeAttributes(SrcNode, CI, G->RI); - } -#endif - - static std::string getGraphName(const HighlightingRegionInfo *) { - return "Region Graph"; - } - - std::string getNodeLabel(RegionNode *Node, HighlightingRegionInfo *G) { - // return Base::getNodeLabel(Node, reinterpret_cast(G->RI->getTopLevelRegion())); - - if (!Node->isSubRegion()) { - BasicBlock *BB = Node->getNodeAs(); - - DOTFuncInfo CFGInfo(G->F, G->HighlightBB, G->HighlightInst); - if (isSimple()) - return DOTGraphTraits::getSimpleNodeLabel(BB, &CFGInfo); - else - return DOTGraphTraits::getCompleteNodeLabel(BB, - &CFGInfo); - } - - return "Not implemented"; - } - - static std::string getNodeAttributes(RegionNode *R, - HighlightingRegionInfo *G) { - auto HighlightBB = G->HighlightBB; - if (!R->isSubRegion() && R->getNodeAs() == HighlightBB) { - return "penwidth=5.0,style=filled"; - } - - return ""; - } - - std::string getEdgeAttributes(RegionNode *srcNode, - Traits::ChildIteratorType CI, - HighlightingRegionInfo *G) { - RegionNode *destNode = *CI; - - if (srcNode->isSubRegion() || destNode->isSubRegion()) - return ""; - - // In case of a backedge, do not use it to define the layout of the nodes. - BasicBlock *srcBB = srcNode->getNodeAs(); - BasicBlock *destBB = destNode->getNodeAs(); - - Region *R = G->RI->getRegionFor(destBB); - - while (R && R->getParent()) - if (R->getParent()->getEntry() == destBB) - R = R->getParent(); - else - break; - - if (R && R->getEntry() == destBB && R->contains(srcBB)) - return "constraint=false"; - - return ""; - } - - static void printRegionCluster(const Region &R, - GraphWriter &GW, - unsigned depth = 0, - const BasicBlock *HighlightBB = nullptr, - const Instruction *HighlightInst = nullptr) { - raw_ostream &O = GW.getOStream(); - O.indent(2 * depth) << "subgraph cluster_" << static_cast(&R) - << " {\n"; - O.indent(2 * (depth + 1)) << "label = \"\";\n"; - - if (!onlySimpleRegions || R.isSimple()) { - O.indent(2 * (depth + 1)) << "style = filled;\n"; - O.indent(2 * (depth + 1)) - << "color = " << ((R.getDepth() * 2 % 12) + 1) << "\n"; - - } else { - O.indent(2 * (depth + 1)) << "style = solid;\n"; - O.indent(2 * (depth + 1)) - << "color = " << ((R.getDepth() * 2 % 12) + 2) << "\n"; - } - - for (const auto &RI : R) - printRegionCluster(*RI, GW, depth + 1, HighlightBB, HighlightInst); - - const RegionInfo &RI = *static_cast(R.getRegionInfo()); - - for (auto *BB : R.blocks()) - if (RI.getRegionFor(BB) == &R) - O.indent(2 * (depth + 1)) - << "Node" - << static_cast(RI.getTopLevelRegion()->getBBNode(BB)) - << ";\n"; - - O.indent(2 * depth) << "}\n"; - } - - static void - addCustomGraphFeatures(const HighlightingRegionInfo *G, - GraphWriter &GW) { - raw_ostream &O = GW.getOStream(); - O << "\tcolorscheme = \"paired12\"\n"; - printRegionCluster(*G->RI->getTopLevelRegion(), GW, 4, G->HighlightBB, - G->HighlightInst); - } -}; -} // namespace llvm - -namespace { struct RegionPrinter : public DOTGraphTraitsPrinter { @@ -360,43 +168,15 @@ struct RegionOnlyPrinter }; char RegionOnlyPrinter::ID = 0; -struct RegionViewer : public FunctionPass { - using Base = FunctionPass; - +struct RegionViewer + : public DOTGraphTraitsViewer { static char ID; - RegionViewer() : RegionViewer(nullptr, nullptr) {} - - RegionViewer(const BasicBlock *HighlightBB, const Instruction *HighlightInst) - : FunctionPass(ID), HighlightBB(HighlightBB), - HighlightInst(HighlightInst) { + RegionViewer() + : DOTGraphTraitsViewer("reg", ID) { initializeRegionViewerPass(*PassRegistry::getPassRegistry()); } - - virtual bool processFunction(Function &F, RegionInfo &Analysis) { - return true; - } - - bool runOnFunction(Function &F) override { - auto &Analysis = getAnalysis().getRegionInfo(); - - if (!processFunction(F, Analysis)) - return false; - - HighlightingRegionInfo Graph(&Analysis, &F, HighlightBB, HighlightInst); - ViewGraph(&Graph, "reg", false, - Twine("Region Graph for '") + F.getName().str() + "' function"); - - return false; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - AU.addRequired(); - } - -private: - const BasicBlock *HighlightBB; - const Instruction *HighlightInst; }; char RegionViewer::ID = 0; @@ -412,7 +192,7 @@ struct RegionOnlyViewer }; char RegionOnlyViewer::ID = 0; -} // end anonymous namespace +} //end anonymous namespace INITIALIZE_PASS(RegionPrinter, "dot-regions", "Print regions of function to 'dot' file", true, true) @@ -439,11 +219,6 @@ FunctionPass* llvm::createRegionViewerPass() { return new RegionViewer(); } -FunctionPass *llvm::createRegionViewerPass(const BasicBlock *BB, - const Instruction *Inst) { - return new RegionViewer(BB, Inst); -} - FunctionPass* llvm::createRegionOnlyViewerPass() { return new RegionOnlyViewer(); } @@ -474,48 +249,15 @@ static void invokeFunctionPass(const Function *F, FunctionPass *ViewerPass) { FPM.doFinalization(); } -void llvm::viewRegion(RegionInfo *RI) { - if (!RI) - return; - viewRegionInfo(RI, false); -} -void llvm::viewRegion(RegionInfo &RI) { return viewRegion(&RI); } +void llvm::viewRegion(RegionInfo *RI) { viewRegionInfo(RI, false); } void llvm::viewRegion(const Function *F) { - if (!F) - return; invokeFunctionPass(F, createRegionViewerPass()); } -void llvm::viewRegion(const Function &F) { viewRegion(&F); } void llvm::viewRegionOnly(RegionInfo *RI) { viewRegionInfo(RI, true); } void llvm::viewRegionOnly(const Function *F) { - if (!F) - return; invokeFunctionPass(F, createRegionOnlyViewerPass()); } - -void llvm::viewRegion(const llvm::BasicBlock *BB) { - if (!BB) - return; - - auto F = BB->getParent(); - - invokeFunctionPass(F, createRegionViewerPass(BB, nullptr)); -} -void llvm::viewRegion(const llvm::BasicBlock &BB) { return viewRegion(&BB); } - -void llvm::viewRegion(const llvm::Instruction *Inst) { - if (!Inst) - return; - - auto Block = Inst->getParent(); - auto F = Inst->getFunction(); - - invokeFunctionPass(F, createRegionViewerPass(Block, Inst)); -} - -void llvm::viewRegion(const llvm::Instruction &I) { return viewRegion(&I); } - #endif diff --git a/llvm/lib/Support/GraphWriter.cpp b/llvm/lib/Support/GraphWriter.cpp index 838fc40610b2f..e875e18a7e92e 100644 --- a/llvm/lib/Support/GraphWriter.cpp +++ b/llvm/lib/Support/GraphWriter.cpp @@ -256,7 +256,7 @@ bool llvm::DisplayGraph(StringRef FilenameRef, bool wait, if (!Viewer && S.TryFindProgram("xdg-open", ViewerPath)) Viewer = VK_XDGOpen; #ifdef _WIN32 - if (!Viewer && S.TryFindProgram("cmd.exe", ViewerPath)) { + if (!Viewer && S.TryFindProgram("cmd", ViewerPath)) { Viewer = VK_CmdStart; } #endif