@@ -1662,21 +1662,43 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
16621662// / \endcode
16631663// /
16641664// / So we need to turn hoisted load/store into cload/cstore.
1665+ // /
1666+ // / \param BI The branch instruction.
1667+ // / \param SpeculatedConditionalLoadsStores The load/store instructions that
1668+ // / will be speculated.
1669+ // / \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
16651670static void hoistConditionalLoadsStores (
16661671 BranchInst *BI,
16671672 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1668- bool Invert) {
1673+ std::optional< bool > Invert) {
16691674 auto &Context = BI->getParent ()->getContext ();
16701675 auto *VCondTy = FixedVectorType::get (Type::getInt1Ty (Context), 1 );
16711676 auto *Cond = BI->getOperand (0 );
16721677 // Construct the condition if needed.
16731678 BasicBlock *BB = BI->getParent ();
1674- IRBuilder<> Builder (SpeculatedConditionalLoadsStores.back ());
1675- Value *Mask = Builder.CreateBitCast (
1676- Invert ? Builder.CreateXor (Cond, ConstantInt::getTrue (Context)) : Cond,
1677- VCondTy);
1679+ IRBuilder<> Builder (
1680+ Invert.has_value () ? SpeculatedConditionalLoadsStores.back () : BI);
1681+ Value *Mask = nullptr ;
1682+ Value *MaskFalse = nullptr ;
1683+ Value *MaskTrue = nullptr ;
1684+ if (Invert.has_value ()) {
1685+ Mask = Builder.CreateBitCast (
1686+ *Invert ? Builder.CreateXor (Cond, ConstantInt::getTrue (Context)) : Cond,
1687+ VCondTy);
1688+ } else {
1689+ MaskFalse = Builder.CreateBitCast (
1690+ Builder.CreateXor (Cond, ConstantInt::getTrue (Context)), VCondTy);
1691+ MaskTrue = Builder.CreateBitCast (Cond, VCondTy);
1692+ }
1693+ auto PeekThroughBitcasts = [](Value *V) {
1694+ while (auto *BitCast = dyn_cast<BitCastInst>(V))
1695+ V = BitCast->getOperand (0 );
1696+ return V;
1697+ };
16781698 for (auto *I : SpeculatedConditionalLoadsStores) {
1679- IRBuilder<> Builder (I);
1699+ IRBuilder<> Builder (Invert.has_value () ? I : BI);
1700+ if (!Invert.has_value ())
1701+ Mask = I->getParent () == BI->getSuccessor (0 ) ? MaskTrue : MaskFalse;
16801702 // We currently assume conditional faulting load/store is supported for
16811703 // scalar types only when creating new instructions. This can be easily
16821704 // extended for vector types in the future.
@@ -1688,12 +1710,14 @@ static void hoistConditionalLoadsStores(
16881710 auto *Ty = I->getType ();
16891711 PHINode *PN = nullptr ;
16901712 Value *PassThru = nullptr ;
1691- for (User *U : I->users ())
1692- if ((PN = dyn_cast<PHINode>(U))) {
1693- PassThru = Builder.CreateBitCast (PN->getIncomingValueForBlock (BB),
1694- FixedVectorType::get (Ty, 1 ));
1695- break ;
1696- }
1713+ if (Invert.has_value ())
1714+ for (User *U : I->users ())
1715+ if ((PN = dyn_cast<PHINode>(U))) {
1716+ PassThru = Builder.CreateBitCast (
1717+ PeekThroughBitcasts (PN->getIncomingValueForBlock (BB)),
1718+ FixedVectorType::get (Ty, 1 ));
1719+ break ;
1720+ }
16971721 MaskedLoadStore = Builder.CreateMaskedLoad (
16981722 FixedVectorType::get (Ty, 1 ), Op0, LI->getAlign (), Mask, PassThru);
16991723 Value *NewLoadStore = Builder.CreateBitCast (MaskedLoadStore, Ty);
@@ -1702,8 +1726,8 @@ static void hoistConditionalLoadsStores(
17021726 I->replaceAllUsesWith (NewLoadStore);
17031727 } else {
17041728 // Handle Store.
1705- auto *StoredVal =
1706- Builder. CreateBitCast (Op0, FixedVectorType::get (Op0->getType (), 1 ));
1729+ auto *StoredVal = Builder. CreateBitCast (
1730+ PeekThroughBitcasts (Op0) , FixedVectorType::get (Op0->getType (), 1 ));
17071731 MaskedLoadStore = Builder.CreateMaskedStore (
17081732 StoredVal, I->getOperand (1 ), cast<StoreInst>(I)->getAlign (), Mask);
17091733 }
@@ -3155,7 +3179,8 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
31553179 return HaveRewritablePHIs;
31563180}
31573181
3158- static bool isProfitableToSpeculate (const BranchInst *BI, bool Invert,
3182+ static bool isProfitableToSpeculate (const BranchInst *BI,
3183+ std::optional<bool > Invert,
31593184 const TargetTransformInfo &TTI) {
31603185 // If the branch is non-unpredictable, and is predicted to *not* branch to
31613186 // the `then` block, then avoid speculating it.
@@ -3166,7 +3191,10 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
31663191 if (!extractBranchWeights (*BI, TWeight, FWeight) || (TWeight + FWeight) == 0 )
31673192 return true ;
31683193
3169- uint64_t EndWeight = Invert ? TWeight : FWeight;
3194+ if (!Invert.has_value ())
3195+ return false ;
3196+
3197+ uint64_t EndWeight = *Invert ? TWeight : FWeight;
31703198 BranchProbability BIEndProb =
31713199 BranchProbability::getBranchProbability (EndWeight, TWeight + FWeight);
31723200 BranchProbability Likely = TTI.getPredictableBranchThreshold ();
@@ -8034,6 +8062,35 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
80348062 if (HoistCommon &&
80358063 hoistCommonCodeFromSuccessors (BI, !Options.HoistCommonInsts ))
80368064 return requestResimplify ();
8065+
8066+ if (BI && HoistLoadsStoresWithCondFaulting &&
8067+ Options.HoistLoadsStoresWithCondFaulting &&
8068+ isProfitableToSpeculate (BI, std::nullopt , TTI)) {
8069+ SmallVector<Instruction *, 2 > SpeculatedConditionalLoadsStores;
8070+ auto CanSpeculateConditionalLoadsStores = [&]() {
8071+ for (auto *Succ : successors (BB)) {
8072+ for (Instruction &I : *Succ) {
8073+ if (I.isTerminator ()) {
8074+ if (I.getNumSuccessors () > 1 )
8075+ return false ;
8076+ continue ;
8077+ } else if (!isSafeCheapLoadStore (&I, TTI) ||
8078+ SpeculatedConditionalLoadsStores.size () ==
8079+ HoistLoadsStoresWithCondFaultingThreshold) {
8080+ return false ;
8081+ }
8082+ SpeculatedConditionalLoadsStores.push_back (&I);
8083+ }
8084+ }
8085+ return !SpeculatedConditionalLoadsStores.empty ();
8086+ };
8087+
8088+ if (CanSpeculateConditionalLoadsStores ()) {
8089+ hoistConditionalLoadsStores (BI, SpeculatedConditionalLoadsStores,
8090+ std::nullopt );
8091+ return requestResimplify ();
8092+ }
8093+ }
80378094 } else {
80388095 // If Successor #1 has multiple preds, we may be able to conditionally
80398096 // execute Successor #0 if it branches to Successor #1.
0 commit comments