@@ -283,7 +283,7 @@ class SimplifyCFGOpt {
283283 bool tryToSimplifyUncondBranchWithICmpInIt (ICmpInst *ICI,
284284 IRBuilder<> &Builder);
285285
286- bool hoistCommonCodeFromSuccessors (BasicBlock *BB , bool EqTermsOnly);
286+ bool hoistCommonCodeFromSuccessors (Instruction *TI , bool EqTermsOnly);
287287 bool hoistSuccIdenticalTerminatorToSwitchOrIf (
288288 Instruction *TI, Instruction *I1,
289289 SmallVectorImpl<Instruction *> &OtherSuccTIs);
@@ -1615,19 +1615,39 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
16151615 return false ;
16161616}
16171617
1618+ static bool isSafeCheapLoadStore (const Instruction *I,
1619+ const TargetTransformInfo &TTI) {
1620+ // Not handle volatile or atomic.
1621+ if (auto *L = dyn_cast<LoadInst>(I)) {
1622+ if (!L->isSimple ())
1623+ return false ;
1624+ } else if (auto *S = dyn_cast<StoreInst>(I)) {
1625+ if (!S->isSimple ())
1626+ return false ;
1627+ } else
1628+ return false ;
1629+
1630+ // llvm.masked.load/store use i32 for alignment while load/store use i64.
1631+ // That's why we have the alignment limitation.
1632+ // FIXME: Update the prototype of the intrinsics?
1633+ return TTI.hasConditionalLoadStoreForType (getLoadStoreType (I)) &&
1634+ getLoadStoreAlignment (I) < Value::MaximumAlignment;
1635+ }
1636+
16181637// / Hoist any common code in the successor blocks up into the block. This
16191638// / function guarantees that BB dominates all successors. If EqTermsOnly is
16201639// / given, only perform hoisting in case both blocks only contain a terminator.
16211640// / In that case, only the original BI will be replaced and selects for PHIs are
16221641// / added.
1623- bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors (BasicBlock *BB ,
1642+ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors (Instruction *TI ,
16241643 bool EqTermsOnly) {
16251644 // This does very trivial matching, with limited scanning, to find identical
16261645 // instructions in the two blocks. In particular, we don't want to get into
16271646 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
16281647 // such, we currently just scan for obviously identical instructions in an
16291648 // identical order, possibly separated by the same number of non-identical
16301649 // instructions.
1650+ BasicBlock *BB = TI->getParent ();
16311651 unsigned int SuccSize = succ_size (BB);
16321652 if (SuccSize < 2 )
16331653 return false ;
@@ -1639,7 +1659,63 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
16391659 if (Succ->hasAddressTaken () || !Succ->getSinglePredecessor ())
16401660 return false ;
16411661
1642- auto *TI = BB->getTerminator ();
1662+ auto *BI = dyn_cast<BranchInst>(TI);
1663+ if (BI && HoistLoadsStoresWithCondFaulting &&
1664+ Options.HoistLoadsStoresWithCondFaulting ) {
1665+ SmallVector<Instruction *, 2 > SpeculatedConditionalLoadsStores;
1666+ for (auto *Succ : successors (BB)) {
1667+ for (Instruction &I : drop_end (*Succ)) {
1668+ if (!isSafeCheapLoadStore (&I, TTI) ||
1669+ SpeculatedConditionalLoadsStores.size () ==
1670+ HoistLoadsStoresWithCondFaultingThreshold)
1671+ return false ;
1672+ SpeculatedConditionalLoadsStores.push_back (&I);
1673+ }
1674+ }
1675+
1676+ // TODO: Move below code to a function to share with #96878.
1677+ if (SpeculatedConditionalLoadsStores.empty ())
1678+ return false ;
1679+
1680+ auto &Context = BI->getParent ()->getContext ();
1681+ auto *VCondTy = FixedVectorType::get (Type::getInt1Ty (Context), 1 );
1682+ auto *Cond = BI->getOperand (0 );
1683+ IRBuilder<> Builder (BI);
1684+ Value *Mask1 = Builder.CreateBitCast (Cond, VCondTy);
1685+ Value *Mask0 = Builder.CreateBitCast (
1686+ Builder.CreateXor (Cond, ConstantInt::getTrue (Context)), VCondTy);
1687+ for (auto *I : SpeculatedConditionalLoadsStores) {
1688+ Value *Mask = I->getParent () == BI->getSuccessor (0 ) ? Mask1 : Mask0;
1689+ assert (!getLoadStoreType (I)->isVectorTy () && " not implemented" );
1690+ auto *Op0 = I->getOperand (0 );
1691+ Instruction *MaskedLoadStore = nullptr ;
1692+ if (auto *LI = dyn_cast<LoadInst>(I)) {
1693+ // Handle Load.
1694+ auto *Ty = I->getType ();
1695+ MaskedLoadStore = Builder.CreateMaskedLoad (FixedVectorType::get (Ty, 1 ),
1696+ Op0, LI->getAlign (), Mask);
1697+ I->replaceAllUsesWith (Builder.CreateBitCast (MaskedLoadStore, Ty));
1698+ } else {
1699+ // Handle Store.
1700+ auto *StoredVal =
1701+ Builder.CreateBitCast (Op0, FixedVectorType::get (Op0->getType (), 1 ));
1702+ MaskedLoadStore = Builder.CreateMaskedStore (
1703+ StoredVal, I->getOperand (1 ), cast<StoreInst>(I)->getAlign (), Mask);
1704+ }
1705+ I->dropUBImplyingAttrsAndUnknownMetadata (
1706+ {LLVMContext::MD_range, LLVMContext::MD_annotation});
1707+ // FIXME: DIAssignID is not supported for masked store yet.
1708+ // (Verifier::visitDIAssignIDMetadata)
1709+ at::deleteAssignmentMarkers (I);
1710+ I->eraseMetadataIf ([](unsigned MDKind, MDNode *Node) {
1711+ return Node->getMetadataID () == Metadata::DIAssignIDKind;
1712+ });
1713+ MaskedLoadStore->copyMetadata (*I);
1714+ I->eraseFromParent ();
1715+ }
1716+
1717+ return true ;
1718+ }
16431719
16441720 // The second of pair is a SkipFlags bitmask.
16451721 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned >;
@@ -2998,25 +3074,6 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
29983074 return BIEndProb < Likely;
29993075}
30003076
3001- static bool isSafeCheapLoadStore (const Instruction *I,
3002- const TargetTransformInfo &TTI) {
3003- // Not handle volatile or atomic.
3004- if (auto *L = dyn_cast<LoadInst>(I)) {
3005- if (!L->isSimple ())
3006- return false ;
3007- } else if (auto *S = dyn_cast<StoreInst>(I)) {
3008- if (!S->isSimple ())
3009- return false ;
3010- } else
3011- return false ;
3012-
3013- // llvm.masked.load/store use i32 for alignment while load/store use i64.
3014- // That's why we have the alignment limitation.
3015- // FIXME: Update the prototype of the intrinsics?
3016- return TTI.hasConditionalLoadStoreForType (getLoadStoreType (I)) &&
3017- getLoadStoreAlignment (I) < Value::MaximumAlignment;
3018- }
3019-
30203077// / Speculate a conditional basic block flattening the CFG.
30213078// /
30223079// / Note that this is a very risky transform currently. Speculating
@@ -7436,7 +7493,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
74367493 return requestResimplify ();
74377494
74387495 if (HoistCommon &&
7439- hoistCommonCodeFromSuccessors (SI-> getParent () , !Options.HoistCommonInsts ))
7496+ hoistCommonCodeFromSuccessors (SI, !Options.HoistCommonInsts ))
74407497 return requestResimplify ();
74417498
74427499 return false ;
@@ -7794,8 +7851,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
77947851 // can hoist it up to the branching block.
77957852 if (BI->getSuccessor (0 )->getSinglePredecessor ()) {
77967853 if (BI->getSuccessor (1 )->getSinglePredecessor ()) {
7797- if (HoistCommon && hoistCommonCodeFromSuccessors (
7798- BI-> getParent () , !Options.HoistCommonInsts ))
7854+ if (HoistCommon &&
7855+ hoistCommonCodeFromSuccessors (BI , !Options.HoistCommonInsts ))
77997856 return requestResimplify ();
78007857 } else {
78017858 // If Successor #1 has multiple preds, we may be able to conditionally
0 commit comments