diff --git a/llvm/lib/SYCLNativeCPUUtils/compiler_passes/vecz/source/transform/control_flow_conversion_pass.cpp b/llvm/lib/SYCLNativeCPUUtils/compiler_passes/vecz/source/transform/control_flow_conversion_pass.cpp index bee78619ff6e..664b6bc7302a 100644 --- a/llvm/lib/SYCLNativeCPUUtils/compiler_passes/vecz/source/transform/control_flow_conversion_pass.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/compiler_passes/vecz/source/transform/control_flow_conversion_pass.cpp @@ -115,6 +115,15 @@ class ControlFlowConversionState::Impl : public ControlFlowConversionState { Instruction *persistedCombinedDivergentExitMask = nullptr; }; + struct LoopMasksInfoMap : DenseMap { + using DenseMap::DenseMap; + + using DenseMap::at; + LoopMasksInfo &at(Loop *val) { + return const_cast(DenseMap::at(val)); + } + }; + /// @brief Create loop masks for the specified loop and its subloops. /// @param[in] L Loop which should have its LoopMasksInfo created. void createLoopMasks(Loop *L); @@ -360,7 +369,7 @@ class ControlFlowConversionState::Impl : public ControlFlowConversionState { BasicBlock *functionExitBlock = nullptr; DenseSet blends; - DenseMap LoopMasks; + LoopMasksInfoMap LoopMasks; }; STATISTIC(VeczCFGFail, @@ -555,11 +564,6 @@ bool ControlFlowConversionState::Impl::convertToDataFlow() { LI = &AM.getResult(F); UVR = &AM.getResult(F); - // Make sure every loop has an entry in the masks table before we start. - for (auto *L : *LI) { - createLoopMasks(L); - } - if (!VU.choices().linearizeBOSCC()) { ROSCCGadget ROSCC(*this); ROSCC.run(F); @@ -579,6 +583,13 @@ bool ControlFlowConversionState::Impl::convertToDataFlow() { } } + // Make sure every loop has an entry in the masks table. + // Do this after duplicateUniformRegions() since it may create additional + // loops. + for (auto *L : *LI) { + createLoopMasks(L); + } + // Reserve space for the masks table and default-construct all entries, to // avoid re-hashing/element relocation on access. MaskInfos.reserve(F.size()); @@ -960,7 +971,7 @@ bool ControlFlowConversionState::Impl::createExitMasks(BasicBlock &BB, } bool ControlFlowConversionState::Impl::createLoopExitMasks(LoopTag <ag) { - auto &LMask = LoopMasks[LTag.loop]; + auto &LMask = LoopMasks.at(LTag.loop); // If the Loop already has a CombinedExitMasks we have already processed it. if (LMask.combinedDivergentExitMask) { return true; @@ -1038,7 +1049,7 @@ bool ControlFlowConversionState::Impl::createLoopExitMasks(LoopTag <ag) { if (exitingLTag->loop != LTag.loop) { if (Loop *nestedLoop = nextInnerLoopLeft(exitingBlock, exitBlock)) { maskUpdateOperand = - LoopMasks[nestedLoop] + LoopMasks.at(nestedLoop) .updatedPersistedDivergentExitMasks[exitingBlock]; } } @@ -1095,7 +1106,7 @@ bool ControlFlowConversionState::Impl::createCombinedLoopExitMask( SmallVector exitEdges; auto *const Loop = LTag.loop; Loop->getExitEdges(exitEdges); - auto &LMask = LoopMasks[Loop]; + auto &LMask = LoopMasks.at(Loop); for (const Loop::Edge &EE : exitEdges) { BasicBlock *exitingBlock = const_cast(EE.first); BasicBlock *exitBlock = const_cast(EE.second); @@ -1836,7 +1847,7 @@ bool ControlFlowConversionState::Impl::computeDivergentLoopPureExit( BasicBlockTag &pureExitTag = DR->getOrCreateTag(pureExit); // Set the tags. - auto &LMask = LoopMasks[LTag.loop]; + auto &LMask = LoopMasks.at(LTag.loop); MaskInfos[pureExit].entryMask = LMask.persistedCombinedDivergentExitMask; pureExitTag.outermostExitedLoop = <ag; @@ -2174,7 +2185,7 @@ bool ControlFlowConversionState::Impl::generateDivergentLoopResults( bool ControlFlowConversionState::Impl::generateDivergentLoopResultUpdates( Value *LLV, LoopTag <ag) { - auto &LMask = LoopMasks[LTag.loop]; + auto &LMask = LoopMasks.at(LTag.loop); Value *mask = LMask.combinedDivergentExitMask; VECZ_ERROR_IF(!mask, "Divergent loop does not have an exit mask"); PHINode *PHI = LTag.loopResultPrevs[LLV]; @@ -2285,7 +2296,7 @@ bool ControlFlowConversionState::Impl::blendDivergentLoopExitMasks( } } - auto &LMask = LoopMasks[LTag.loop]; + auto &LMask = LoopMasks.at(LTag.loop); for (const Loop::Edge &EE : exitEdges) { BasicBlock *exitingBlock = const_cast(EE.first); BasicBlock *exitBlock = const_cast(EE.second); @@ -3055,7 +3066,7 @@ bool ControlFlowConversionState::Impl::blendInstructions() { } } - auto &LMask = LoopMasks[LTag->loop]; + auto &LMask = LoopMasks.at(LTag->loop); for (auto &UPREM : LMask.updatedPersistedDivergentExitMasks) { if (UPREM.first != header) { blendMap[UPREM.second][header] = @@ -3122,7 +3133,7 @@ bool ControlFlowConversionState::Impl::blendInstructions() { auto *const srcLoop = srcTag.loop; if (srcLoop && srcLoop->isLoopDivergent()) { if (dst != srcLoop->header) { - auto &srcMasks = LoopMasks[srcLoop->loop]; + auto &srcMasks = LoopMasks.at(srcLoop->loop); const auto &headerTag = DR->getTag(srcLoop->header); // If 'opDef' is an update loop exit mask, set an entry point in