@@ -283,7 +283,7 @@ class SimplifyCFGOpt {
283283 bool tryToSimplifyUncondBranchWithICmpInIt (ICmpInst *ICI,
284284 IRBuilder<> &Builder);
285285
286- bool hoistCommonCodeFromSuccessors (BasicBlock *BB , bool EqTermsOnly);
286+ bool hoistCommonCodeFromSuccessors (Instruction *TI , bool EqTermsOnly);
287287 bool hoistSuccIdenticalTerminatorToSwitchOrIf (
288288 Instruction *TI, Instruction *I1,
289289 SmallVectorImpl<Instruction *> &OtherSuccTIs);
@@ -1611,19 +1611,155 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
16111611 return false ;
16121612}
16131613
1614+ // / If the target supports conditional faulting,
1615+ // / we look for the following pattern:
1616+ // / \code
1617+ // / BB:
1618+ // / ...
1619+ // / %cond = icmp ult %x, %y
1620+ // / br i1 %cond, label %TrueBB, label %FalseBB
1621+ // / FalseBB:
1622+ // / store i32 1, ptr %q, align 4
1623+ // / ...
1624+ // / TrueBB:
1625+ // / %maskedloadstore = load i32, ptr %b, align 4
1626+ // / store i32 %maskedloadstore, ptr %p, align 4
1627+ // / ...
1628+ // / \endcode
1629+ // /
1630+ // / and transform it into:
1631+ // /
1632+ // / \code
1633+ // / BB:
1634+ // / ...
1635+ // / %cond = icmp ult %x, %y
1636+ // / %maskedloadstore = cload i32, ptr %b, %cond
1637+ // / cstore i32 %maskedloadstore, ptr %p, %cond
1638+ // / cstore i32 1, ptr %q, ~%cond
1639+ // / br i1 %cond, label %TrueBB, label %FalseBB
1640+ // / FalseBB:
1641+ // / ...
1642+ // / TrueBB:
1643+ // / ...
1644+ // / \endcode
1645+ // /
1646+ // / where cload/cstore are represented by llvm.masked.load/store intrinsics,
1647+ // / e.g.
1648+ // /
1649+ // / \code
1650+ // / %vcond = bitcast i1 %cond to <1 x i1>
1651+ // / %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1652+ // / (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1653+ // / %maskedloadstore = bitcast <1 x i32> %v0 to i32
1654+ // / call void @llvm.masked.store.v1i32.p0
1655+ // / (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1656+ // / %cond.not = xor i1 %cond, true
1657+ // / %vcond.not = bitcast i1 %cond.not to <1 x i>
1658+ // / call void @llvm.masked.store.v1i32.p0
1659+ // / (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1660+ // / \endcode
1661+ // /
1662+ // / So we need to turn hoisted load/store into cload/cstore.
1663+ static void hoistConditionalLoadsStores (
1664+ BranchInst *BI,
1665+ SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1666+ bool Invert) {
1667+ auto &Context = BI->getParent ()->getContext ();
1668+ auto *VCondTy = FixedVectorType::get (Type::getInt1Ty (Context), 1 );
1669+ auto *Cond = BI->getOperand (0 );
1670+ // Construct the condition if needed.
1671+ BasicBlock *BB = BI->getParent ();
1672+ IRBuilder<> Builder (SpeculatedConditionalLoadsStores.back ());
1673+ Value *Mask = Builder.CreateBitCast (
1674+ Invert ? Builder.CreateXor (Cond, ConstantInt::getTrue (Context)) : Cond,
1675+ VCondTy);
1676+ for (auto *I : SpeculatedConditionalLoadsStores) {
1677+ IRBuilder<> Builder (I);
1678+ // We currently assume conditional faulting load/store is supported for
1679+ // scalar types only when creating new instructions. This can be easily
1680+ // extended for vector types in the future.
1681+ assert (!getLoadStoreType (I)->isVectorTy () && " not implemented" );
1682+ auto *Op0 = I->getOperand (0 );
1683+ CallInst *MaskedLoadStore = nullptr ;
1684+ if (auto *LI = dyn_cast<LoadInst>(I)) {
1685+ // Handle Load.
1686+ auto *Ty = I->getType ();
1687+ PHINode *PN = nullptr ;
1688+ Value *PassThru = nullptr ;
1689+ for (User *U : I->users ())
1690+ if ((PN = dyn_cast<PHINode>(U))) {
1691+ PassThru = Builder.CreateBitCast (PN->getIncomingValueForBlock (BB),
1692+ FixedVectorType::get (Ty, 1 ));
1693+ break ;
1694+ }
1695+ MaskedLoadStore = Builder.CreateMaskedLoad (
1696+ FixedVectorType::get (Ty, 1 ), Op0, LI->getAlign (), Mask, PassThru);
1697+ Value *NewLoadStore = Builder.CreateBitCast (MaskedLoadStore, Ty);
1698+ if (PN)
1699+ PN->setIncomingValue (PN->getBasicBlockIndex (BB), NewLoadStore);
1700+ I->replaceAllUsesWith (NewLoadStore);
1701+ } else {
1702+ // Handle Store.
1703+ auto *StoredVal =
1704+ Builder.CreateBitCast (Op0, FixedVectorType::get (Op0->getType (), 1 ));
1705+ MaskedLoadStore = Builder.CreateMaskedStore (
1706+ StoredVal, I->getOperand (1 ), cast<StoreInst>(I)->getAlign (), Mask);
1707+ }
1708+ // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1709+ // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1710+ //
1711+ // !nonnull, !align : Not support pointer type, no need to keep.
1712+ // !range: Load type is changed from scalar to vector, but the metadata on
1713+ // vector specifies a per-element range, so the semantics stay the
1714+ // same. Keep it.
1715+ // !annotation: Not impact semantics. Keep it.
1716+ if (const MDNode *Ranges = I->getMetadata (LLVMContext::MD_range))
1717+ MaskedLoadStore->addRangeRetAttr (getConstantRangeFromMetadata (*Ranges));
1718+ I->dropUBImplyingAttrsAndUnknownMetadata ({LLVMContext::MD_annotation});
1719+ // FIXME: DIAssignID is not supported for masked store yet.
1720+ // (Verifier::visitDIAssignIDMetadata)
1721+ at::deleteAssignmentMarkers (I);
1722+ I->eraseMetadataIf ([](unsigned MDKind, MDNode *Node) {
1723+ return Node->getMetadataID () == Metadata::DIAssignIDKind;
1724+ });
1725+ MaskedLoadStore->copyMetadata (*I);
1726+ I->eraseFromParent ();
1727+ }
1728+ }
1729+
1730+ static bool isSafeCheapLoadStore (const Instruction *I,
1731+ const TargetTransformInfo &TTI) {
1732+ // Not handle volatile or atomic.
1733+ if (auto *L = dyn_cast<LoadInst>(I)) {
1734+ if (!L->isSimple ())
1735+ return false ;
1736+ } else if (auto *S = dyn_cast<StoreInst>(I)) {
1737+ if (!S->isSimple ())
1738+ return false ;
1739+ } else
1740+ return false ;
1741+
1742+ // llvm.masked.load/store use i32 for alignment while load/store use i64.
1743+ // That's why we have the alignment limitation.
1744+ // FIXME: Update the prototype of the intrinsics?
1745+ return TTI.hasConditionalLoadStoreForType (getLoadStoreType (I)) &&
1746+ getLoadStoreAlignment (I) < Value::MaximumAlignment;
1747+ }
1748+
16141749// / Hoist any common code in the successor blocks up into the block. This
16151750// / function guarantees that BB dominates all successors. If EqTermsOnly is
16161751// / given, only perform hoisting in case both blocks only contain a terminator.
16171752// / In that case, only the original BI will be replaced and selects for PHIs are
16181753// / added.
1619- bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors (BasicBlock *BB ,
1754+ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors (Instruction *TI ,
16201755 bool EqTermsOnly) {
16211756 // This does very trivial matching, with limited scanning, to find identical
16221757 // instructions in the two blocks. In particular, we don't want to get into
16231758 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
16241759 // such, we currently just scan for obviously identical instructions in an
16251760 // identical order, possibly separated by the same number of non-identical
16261761 // instructions.
1762+ BasicBlock *BB = TI->getParent ();
16271763 unsigned int SuccSize = succ_size (BB);
16281764 if (SuccSize < 2 )
16291765 return false ;
@@ -1635,8 +1771,6 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
16351771 if (Succ->hasAddressTaken () || !Succ->getSinglePredecessor ())
16361772 return false ;
16371773
1638- auto *TI = BB->getTerminator ();
1639-
16401774 // The second of pair is a SkipFlags bitmask.
16411775 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned >;
16421776 SmallVector<SuccIterPair, 8 > SuccIterPairs;
@@ -2997,25 +3131,6 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
29973131 return BIEndProb < Likely;
29983132}
29993133
3000- static bool isSafeCheapLoadStore (const Instruction *I,
3001- const TargetTransformInfo &TTI) {
3002- // Not handle volatile or atomic.
3003- if (auto *L = dyn_cast<LoadInst>(I)) {
3004- if (!L->isSimple ())
3005- return false ;
3006- } else if (auto *S = dyn_cast<StoreInst>(I)) {
3007- if (!S->isSimple ())
3008- return false ;
3009- } else
3010- return false ;
3011-
3012- // llvm.masked.load/store use i32 for alignment while load/store use i64.
3013- // That's why we have the alignment limitation.
3014- // FIXME: Update the prototype of the intrinsics?
3015- return TTI.hasConditionalLoadStoreForType (getLoadStoreType (I)) &&
3016- getLoadStoreAlignment (I) < Value::MaximumAlignment;
3017- }
3018-
30193134// / Speculate a conditional basic block flattening the CFG.
30203135// /
30213136// / Note that this is a very risky transform currently. Speculating
@@ -3267,118 +3382,8 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
32673382 BB->splice (BI->getIterator (), ThenBB, ThenBB->begin (),
32683383 std::prev (ThenBB->end ()));
32693384
3270- // If the target supports conditional faulting,
3271- // we look for the following pattern:
3272- // \code
3273- // BB:
3274- // ...
3275- // %cond = icmp ult %x, %y
3276- // br i1 %cond, label %TrueBB, label %FalseBB
3277- // FalseBB:
3278- // store i32 1, ptr %q, align 4
3279- // ...
3280- // TrueBB:
3281- // %maskedloadstore = load i32, ptr %b, align 4
3282- // store i32 %maskedloadstore, ptr %p, align 4
3283- // ...
3284- // \endcode
3285- //
3286- // and transform it into:
3287- //
3288- // \code
3289- // BB:
3290- // ...
3291- // %cond = icmp ult %x, %y
3292- // %maskedloadstore = cload i32, ptr %b, %cond
3293- // cstore i32 %maskedloadstore, ptr %p, %cond
3294- // cstore i32 1, ptr %q, ~%cond
3295- // br i1 %cond, label %TrueBB, label %FalseBB
3296- // FalseBB:
3297- // ...
3298- // TrueBB:
3299- // ...
3300- // \endcode
3301- //
3302- // where cload/cstore are represented by llvm.masked.load/store intrinsics,
3303- // e.g.
3304- //
3305- // \code
3306- // %vcond = bitcast i1 %cond to <1 x i1>
3307- // %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
3308- // (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
3309- // %maskedloadstore = bitcast <1 x i32> %v0 to i32
3310- // call void @llvm.masked.store.v1i32.p0
3311- // (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
3312- // %cond.not = xor i1 %cond, true
3313- // %vcond.not = bitcast i1 %cond.not to <1 x i>
3314- // call void @llvm.masked.store.v1i32.p0
3315- // (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
3316- // \endcode
3317- //
3318- // So we need to turn hoisted load/store into cload/cstore.
3319- auto &Context = BI->getParent ()->getContext ();
3320- auto *VCondTy = FixedVectorType::get (Type::getInt1Ty (Context), 1 );
3321- auto *Cond = BI->getOperand (0 );
3322- Value *Mask = nullptr ;
3323- // Construct the condition if needed.
3324- if (!SpeculatedConditionalLoadsStores.empty ()) {
3325- IRBuilder<> Builder (SpeculatedConditionalLoadsStores.back ());
3326- Mask = Builder.CreateBitCast (
3327- Invert ? Builder.CreateXor (Cond, ConstantInt::getTrue (Context)) : Cond,
3328- VCondTy);
3329- }
3330- for (auto *I : SpeculatedConditionalLoadsStores) {
3331- IRBuilder<> Builder (I);
3332- // We currently assume conditional faulting load/store is supported for
3333- // scalar types only when creating new instructions. This can be easily
3334- // extended for vector types in the future.
3335- assert (!getLoadStoreType (I)->isVectorTy () && " not implemented" );
3336- auto *Op0 = I->getOperand (0 );
3337- CallInst *MaskedLoadStore = nullptr ;
3338- if (auto *LI = dyn_cast<LoadInst>(I)) {
3339- // Handle Load.
3340- auto *Ty = I->getType ();
3341- PHINode *PN = nullptr ;
3342- Value *PassThru = nullptr ;
3343- for (User *U : I->users ())
3344- if ((PN = dyn_cast<PHINode>(U))) {
3345- PassThru = Builder.CreateBitCast (PN->getIncomingValueForBlock (BB),
3346- FixedVectorType::get (Ty, 1 ));
3347- break ;
3348- }
3349- MaskedLoadStore = Builder.CreateMaskedLoad (
3350- FixedVectorType::get (Ty, 1 ), Op0, LI->getAlign (), Mask, PassThru);
3351- Value *NewLoadStore = Builder.CreateBitCast (MaskedLoadStore, Ty);
3352- if (PN)
3353- PN->setIncomingValue (PN->getBasicBlockIndex (BB), NewLoadStore);
3354- I->replaceAllUsesWith (NewLoadStore);
3355- } else {
3356- // Handle Store.
3357- auto *StoredVal =
3358- Builder.CreateBitCast (Op0, FixedVectorType::get (Op0->getType (), 1 ));
3359- MaskedLoadStore = Builder.CreateMaskedStore (
3360- StoredVal, I->getOperand (1 ), cast<StoreInst>(I)->getAlign (), Mask);
3361- }
3362- // For non-debug metadata, only !annotation, !range, !nonnull and !align are
3363- // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
3364- //
3365- // !nonnull, !align : Not support pointer type, no need to keep.
3366- // !range: Load type is changed from scalar to vector, but the metadata on
3367- // vector specifies a per-element range, so the semantics stay the
3368- // same. Keep it.
3369- // !annotation: Not impact semantics. Keep it.
3370- if (const MDNode *Ranges = I->getMetadata (LLVMContext::MD_range))
3371- MaskedLoadStore->addRangeRetAttr (getConstantRangeFromMetadata (*Ranges));
3372- I->dropUBImplyingAttrsAndUnknownMetadata ({LLVMContext::MD_annotation});
3373- // FIXME: DIAssignID is not supported for masked store yet.
3374- // (Verifier::visitDIAssignIDMetadata)
3375- at::deleteAssignmentMarkers (I);
3376- I->eraseMetadataIf ([](unsigned MDKind, MDNode *Node) {
3377- return Node->getMetadataID () == Metadata::DIAssignIDKind;
3378- });
3379- MaskedLoadStore->copyMetadata (*I);
3380- I->eraseFromParent ();
3381- }
3385+ if (!SpeculatedConditionalLoadsStores.empty ())
3386+ hoistConditionalLoadsStores (BI, SpeculatedConditionalLoadsStores, Invert);
33823387
33833388 // Insert selects and rewrite the PHI operands.
33843389 IRBuilder<NoFolder> Builder (BI);
@@ -7449,7 +7454,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
74497454 return requestResimplify ();
74507455
74517456 if (HoistCommon &&
7452- hoistCommonCodeFromSuccessors (SI-> getParent () , !Options.HoistCommonInsts ))
7457+ hoistCommonCodeFromSuccessors (SI, !Options.HoistCommonInsts ))
74537458 return requestResimplify ();
74547459
74557460 return false ;
@@ -7807,8 +7812,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
78077812 // can hoist it up to the branching block.
78087813 if (BI->getSuccessor (0 )->getSinglePredecessor ()) {
78097814 if (BI->getSuccessor (1 )->getSinglePredecessor ()) {
7810- if (HoistCommon && hoistCommonCodeFromSuccessors (
7811- BI-> getParent () , !Options.HoistCommonInsts ))
7815+ if (HoistCommon &&
7816+ hoistCommonCodeFromSuccessors (BI , !Options.HoistCommonInsts ))
78127817 return requestResimplify ();
78137818 } else {
78147819 // If Successor #1 has multiple preds, we may be able to conditionally
0 commit comments