-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[X86,SimplifyCFG] Support hoisting load/store with conditional faulting (Part II) #108812
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
1077be1
9d73fdd
15e25fb
40e2130
d6d50c4
de28ed9
8ec9409
fc7df9a
dfe6cc6
1ad8714
5a5de39
b1bda56
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1664,18 +1664,35 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1, | |
| static void hoistConditionalLoadsStores( | ||
| BranchInst *BI, | ||
| SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores, | ||
| bool Invert) { | ||
| std::optional<bool> Invert) { | ||
| auto &Context = BI->getParent()->getContext(); | ||
| auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1); | ||
| auto *Cond = BI->getOperand(0); | ||
| // Construct the condition if needed. | ||
| BasicBlock *BB = BI->getParent(); | ||
| IRBuilder<> Builder(SpeculatedConditionalLoadsStores.back()); | ||
| Value *Mask = Builder.CreateBitCast( | ||
| Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond, | ||
| VCondTy); | ||
| IRBuilder<> Builder( | ||
| Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI); | ||
| Value *Mask = nullptr; | ||
| Value *MaskFalse = nullptr; | ||
| Value *MaskTrue = nullptr; | ||
| if (Invert.has_value()) { | ||
| Mask = Builder.CreateBitCast( | ||
| *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond, | ||
| VCondTy); | ||
| } else { | ||
| MaskFalse = Builder.CreateBitCast( | ||
| Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy); | ||
| MaskTrue = Builder.CreateBitCast(Cond, VCondTy); | ||
| } | ||
| auto PeekThroughBitcasts = [](Value *V) { | ||
| while (auto *BitCast = dyn_cast<BitCastInst>(V)) | ||
| V = BitCast->getOperand(0); | ||
| return V; | ||
| }; | ||
| for (auto *I : SpeculatedConditionalLoadsStores) { | ||
| IRBuilder<> Builder(I); | ||
| IRBuilder<> Builder(Invert.has_value() ? I : BI); | ||
| if (!Invert.has_value()) | ||
| Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse; | ||
| // We currently assume conditional faulting load/store is supported for | ||
| // scalar types only when creating new instructions. This can be easily | ||
| // extended for vector types in the future. | ||
|
|
@@ -1687,12 +1704,14 @@ static void hoistConditionalLoadsStores( | |
| auto *Ty = I->getType(); | ||
| PHINode *PN = nullptr; | ||
| Value *PassThru = nullptr; | ||
| for (User *U : I->users()) | ||
| if ((PN = dyn_cast<PHINode>(U))) { | ||
| PassThru = Builder.CreateBitCast(PN->getIncomingValueForBlock(BB), | ||
| FixedVectorType::get(Ty, 1)); | ||
| break; | ||
| } | ||
| if (Invert.has_value()) | ||
| for (User *U : I->users()) | ||
| if ((PN = dyn_cast<PHINode>(U))) { | ||
| PassThru = Builder.CreateBitCast( | ||
| PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)), | ||
| FixedVectorType::get(Ty, 1)); | ||
| break; | ||
| } | ||
| MaskedLoadStore = Builder.CreateMaskedLoad( | ||
| FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru); | ||
| Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty); | ||
|
|
@@ -1701,8 +1720,8 @@ static void hoistConditionalLoadsStores( | |
| I->replaceAllUsesWith(NewLoadStore); | ||
| } else { | ||
| // Handle Store. | ||
| auto *StoredVal = | ||
| Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1)); | ||
| auto *StoredVal = Builder.CreateBitCast( | ||
| PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1)); | ||
| MaskedLoadStore = Builder.CreateMaskedStore( | ||
| StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask); | ||
| } | ||
|
|
@@ -3151,7 +3170,8 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, | |
| return HaveRewritablePHIs; | ||
| } | ||
|
|
||
| static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert, | ||
| static bool isProfitableToSpeculate(const BranchInst *BI, | ||
| std::optional<bool> Invert, | ||
| const TargetTransformInfo &TTI) { | ||
| // If the branch is non-unpredictable, and is predicted to *not* branch to | ||
| // the `then` block, then avoid speculating it. | ||
|
|
@@ -3162,7 +3182,10 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert, | |
| if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0) | ||
| return true; | ||
|
|
||
| uint64_t EndWeight = Invert ? TWeight : FWeight; | ||
| if (!Invert.has_value()) | ||
| return false; | ||
|
|
||
| uint64_t EndWeight = *Invert ? TWeight : FWeight; | ||
| BranchProbability BIEndProb = | ||
| BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight); | ||
| BranchProbability Likely = TTI.getPredictableBranchThreshold(); | ||
|
|
@@ -7854,6 +7877,35 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { | |
| if (HoistCommon && | ||
| hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts)) | ||
| return requestResimplify(); | ||
|
|
||
| if (BI && HoistLoadsStoresWithCondFaulting && | ||
| Options.HoistLoadsStoresWithCondFaulting && | ||
| isProfitableToSpeculate(BI, std::nullopt, TTI)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, from you code, it seems the hoist can happen only when TWeight = FWeight = 0. ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, we need a meaningful ratio here, but we haven't enabled PGO. So let's leave it when we do PGO tuning. |
||
| SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores; | ||
| auto CanSpeculateConditionalLoadsStores = [&]() { | ||
| for (auto *Succ : successors(BB)) { | ||
| for (Instruction &I : *Succ) { | ||
| if (I.isTerminator()) { | ||
| if (I.getNumSuccessors() > 1) | ||
| return false; | ||
|
Comment on lines
+7894
to
+7895
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add test case for this?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
| continue; | ||
| } else if (!isSafeCheapLoadStore(&I, TTI) || | ||
| SpeculatedConditionalLoadsStores.size() == | ||
| HoistLoadsStoresWithCondFaultingThreshold) { | ||
|
Comment on lines
+7898
to
+7899
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should consider branch probability for this, e.g.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea, done! |
||
| return false; | ||
| } | ||
| SpeculatedConditionalLoadsStores.push_back(&I); | ||
| } | ||
| } | ||
| return !SpeculatedConditionalLoadsStores.empty(); | ||
| }; | ||
|
|
||
| if (CanSpeculateConditionalLoadsStores()) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems the lambda is used once, maybe looks better?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The advantage to use lambda is we can direct break inner loop by return. We have to use goto or more flags if change to non lambda code. |
||
| hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, | ||
| std::nullopt); | ||
| return requestResimplify(); | ||
| } | ||
| } | ||
| } else { | ||
| // If Successor #1 has multiple preds, we may be able to conditionally | ||
| // execute Successor #0 if it branches to Successor #1. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -276,34 +276,32 @@ if.false: ; preds = %if.true, %entry | |
| } | ||
|
|
||
| ;; Both of successor 0 and successor 1 have a single predecessor. | ||
| ;; TODO: Support transform for this case. | ||
| define void @single_predecessor(ptr %p, ptr %q, i32 %a) { | ||
| define i32 @single_predecessor(ptr %p, ptr %q, i32 %a) { | ||
| ; CHECK-LABEL: @single_predecessor( | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0 | ||
| ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] | ||
| ; CHECK: common.ret: | ||
| ; CHECK-NEXT: ret void | ||
| ; CHECK: if.end: | ||
| ; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4 | ||
| ; CHECK-NEXT: br label [[COMMON_RET:%.*]] | ||
| ; CHECK: if.then: | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q]], align 4 | ||
| ; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4 | ||
| ; CHECK-NEXT: br label [[COMMON_RET]] | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1> | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1> | ||
| ; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]]) | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison) | ||
| ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32 | ||
| ; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]]) | ||
| ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 2, i32 3 | ||
| ; CHECK-NEXT: ret i32 [[DOT]] | ||
| ; | ||
| entry: | ||
| %tobool = icmp ne i32 %a, 0 | ||
| br i1 %tobool, label %if.end, label %if.then | ||
|
|
||
| if.end: | ||
| store i32 1, ptr %q | ||
| ret void | ||
| ret i32 2 | ||
|
|
||
| if.then: | ||
| %0 = load i32, ptr %q | ||
| store i32 %0, ptr %p | ||
| ret void | ||
| ret i32 3 | ||
| } | ||
|
|
||
| ;; Hoist 6 stores. | ||
|
|
@@ -759,6 +757,43 @@ if.true: | |
| ret i32 %res | ||
| } | ||
|
|
||
| define i32 @multi_successors(i1 %c1, i32 %c2, ptr %p) { | ||
|
||
| ; CHECK-LABEL: @multi_successors( | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: br i1 [[C1:%.*]], label [[ENTRY_IF:%.*]], label [[COMMON_RET:%.*]] | ||
| ; CHECK: entry.if: | ||
| ; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[P:%.*]], align 4 | ||
| ; CHECK-NEXT: switch i32 [[C2:%.*]], label [[COMMON_RET]] [ | ||
| ; CHECK-NEXT: i32 0, label [[SW_BB:%.*]] | ||
| ; CHECK-NEXT: i32 1, label [[SW_BB]] | ||
| ; CHECK-NEXT: ] | ||
| ; CHECK: common.ret: | ||
| ; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL]], [[ENTRY_IF]] ], [ 0, [[SW_BB]] ] | ||
| ; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] | ||
| ; CHECK: sw.bb: | ||
| ; CHECK-NEXT: br label [[COMMON_RET]] | ||
| ; | ||
| entry: | ||
| br i1 %c1, label %entry.if, label %entry.else | ||
|
|
||
| entry.if: ; preds = %entry | ||
| %val = load i32, ptr %p, align 4 | ||
| switch i32 %c2, label %return [ | ||
| i32 0, label %sw.bb | ||
| i32 1, label %sw.bb | ||
| ] | ||
|
|
||
| entry.else: ; preds = %entry | ||
| ret i32 0 | ||
|
|
||
| sw.bb: ; preds = %entry.if, %entry.if | ||
| br label %return | ||
|
|
||
| return: ; preds = %sw.bb, %entry.if | ||
| %ret = phi i32 [ %val, %entry.if ], [ 0, %sw.bb ] | ||
| ret i32 %ret | ||
| } | ||
|
|
||
| declare i32 @read_memory_only() readonly nounwind willreturn speculatable | ||
|
|
||
| !llvm.dbg.cu = !{!0} | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a comment like
?
It's a little hard to know when it's nullopt w/o searching for the caller.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.