Skip to content
Merged
77 changes: 62 additions & 15 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1660,21 +1660,38 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
/// \endcode
///
/// So we need to turn hoisted load/store into cload/cstore.
static void hoistConditionalLoadsStores(
static bool hoistConditionalLoadsStores(
BranchInst *BI,
SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
bool Invert) {
std::optional<bool> Invert) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment like

\param Invert  ...

?

It's a little hard to know when it's nullopt w/o searching for the caller.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

auto &Context = BI->getParent()->getContext();
auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
auto *Cond = BI->getOperand(0);
// Construct the condition if needed.
BasicBlock *BB = BI->getParent();
IRBuilder<> Builder(SpeculatedConditionalLoadsStores.back());
Value *Mask = Builder.CreateBitCast(
Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
VCondTy);
IRBuilder<> Builder(
Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI);
Value *Mask = nullptr;
Value *MaskFalse = nullptr;
Value *MaskTrue = nullptr;
if (Invert.has_value()) {
Mask = Builder.CreateBitCast(
*Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
VCondTy);
} else {
MaskFalse = Builder.CreateBitCast(
Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
}
auto PeekThroughBitcasts = [](Value *V) {
while (auto *BitCast = dyn_cast<BitCastInst>(V))
V = BitCast->getOperand(0);
return V;
};
for (auto *I : SpeculatedConditionalLoadsStores) {
IRBuilder<> Builder(I);
IRBuilder<> Builder(Invert.has_value() ? I : BI);
if (!Invert.has_value())
Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
// We currently assume conditional faulting load/store is supported for
// scalar types only when creating new instructions. This can be easily
// extended for vector types in the future.
Expand All @@ -1686,12 +1703,14 @@ static void hoistConditionalLoadsStores(
auto *Ty = I->getType();
PHINode *PN = nullptr;
Value *PassThru = nullptr;
for (User *U : I->users())
if ((PN = dyn_cast<PHINode>(U))) {
PassThru = Builder.CreateBitCast(PN->getIncomingValueForBlock(BB),
FixedVectorType::get(Ty, 1));
break;
}
if (Invert.has_value())
for (User *U : I->users())
if ((PN = dyn_cast<PHINode>(U))) {
PassThru = Builder.CreateBitCast(
PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
FixedVectorType::get(Ty, 1));
break;
}
MaskedLoadStore = Builder.CreateMaskedLoad(
FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
Expand All @@ -1700,8 +1719,8 @@ static void hoistConditionalLoadsStores(
I->replaceAllUsesWith(NewLoadStore);
} else {
// Handle Store.
auto *StoredVal =
Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1));
auto *StoredVal = Builder.CreateBitCast(
PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
MaskedLoadStore = Builder.CreateMaskedStore(
StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
}
Expand All @@ -1725,6 +1744,7 @@ static void hoistConditionalLoadsStores(
MaskedLoadStore->copyMetadata(*I);
I->eraseFromParent();
}
return true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this function always return true, should we change it to void?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

}

static bool isSafeCheapLoadStore(const Instruction *I,
Expand Down Expand Up @@ -7815,6 +7835,33 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (HoistCommon &&
hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
return requestResimplify();

if (BI && HoistLoadsStoresWithCondFaulting &&
Options.HoistLoadsStoresWithCondFaulting) {
SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
auto CanSpeculateConditionalLoadsStores = [&]() {
for (auto *Succ : successors(BB)) {
for (Instruction &I : *Succ) {
if (I.isTerminator()) {
if (I.getNumSuccessors() > 1)
return false;
Comment on lines +7894 to +7895
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add test case for this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

continue;
} else if (!isSafeCheapLoadStore(&I, TTI) ||
SpeculatedConditionalLoadsStores.size() ==
HoistLoadsStoresWithCondFaultingThreshold) {
Comment on lines +7898 to +7899
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should consider branch probability for this, e.g. isProfitableToSpeculate. If A has two successors B and C, it's not profitable to execute more instructions to eliminate the branch if the branch is well-predicated and the load/store comes from the unlikely successor.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, done!

return false;
}
SpeculatedConditionalLoadsStores.push_back(&I);
}
}
return !SpeculatedConditionalLoadsStores.empty();
};

if (CanSpeculateConditionalLoadsStores() &&
hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
std::nullopt))
return requestResimplify();
}
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to Successor #1.
Expand Down
26 changes: 12 additions & 14 deletions llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -276,34 +276,32 @@ if.false: ; preds = %if.true, %entry
}

;; Both of successor 0 and successor 1 have a single predecessor.
;; TODO: Support transform for this case.
define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
define i32 @single_predecessor(ptr %p, ptr %q, i32 %a) {
; CHECK-LABEL: @single_predecessor(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: if.end:
; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
; CHECK-NEXT: br label [[COMMON_RET:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q]], align 4
; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
; CHECK-NEXT: br label [[COMMON_RET]]
; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]])
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 2, i32 3
; CHECK-NEXT: ret i32 [[DOT]]
;
entry:
%tobool = icmp ne i32 %a, 0
br i1 %tobool, label %if.end, label %if.then

if.end:
store i32 1, ptr %q
ret void
ret i32 2

if.then:
%0 = load i32, ptr %q
store i32 %0, ptr %p
ret void
ret i32 3
}

;; Hoist 6 stores.
Expand Down