Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 95 additions & 5 deletions llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1621,11 +1621,101 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
// Note that even though we've just canonicalized this PHI, due to the
// worklist visitation order, there are no guarantess that *every* PHI
// has been canonicalized, so we can't just compare operands ranges.
if (!PN.isIdenticalToWhenDefined(&IdenticalPN))
continue;
// Just use that PHI instead then.
++NumPHICSEs;
return replaceInstUsesWith(PN, &IdenticalPN);
if (PN.isIdenticalToWhenDefined(&IdenticalPN)) {
// Just use that PHI instead then.
++NumPHICSEs;
return replaceInstUsesWith(PN, &IdenticalPN);
}

// Look for the following pattern and do PHI CSE to clean up the
// redundant %phi. Here %phi, %1 and %phi.next perform the same
// functionality as %identicalPhi and hence %phi can be eliminated.
//
// BB1:
// %identicalPhi = phi [ X, %BB0 ], [ %identicalPhi.next, %BB1 ]
// %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ]
// ...
// %identicalPhi.next = select %cmp, %val, %identicalPhi
// (or select %cmp, %identicalPhi, %val)
// %1 = select %cmp2, %identicalPhi, %phi
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please also add a commuted test with %1 = select %cmp2, %phi, %identicalPhi?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Twe more test cases added that reflect the commuted test, i.e., select_with_identical_phi_3() and select_with_identical_phi_4().

// %phi.next = select %cmp, %val, %1
// (or select %cmp, %1, %val)
//
// Prove that %phi and %identicalPhi are the same by induction:
//
// Base case: Both %phi and %identicalPhi are equal on entry to the loop.
// Inductive case:
// Suppose %phi and %identicalPhi are equal at iteration i.
// We look at their values at iteration i+1 which are %phi.next and
// %identicalPhi.next. They would have become different only when %cmp is
// false and the corresponding values %1 and %identicalPhi differ
// (similar reason for the other "or" case in the bracket).
//
// The only condition when %1 and %identicalPh could differ is when %cmp2
// is false and %1 is %phi, which contradicts our inductive hypothesis
// that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are
// always equal at iteration i+1.

if (PN.getNumIncomingValues() == 2) {
unsigned DiffVals = 0;
BasicBlock *DiffValBB = nullptr;
// Check that only the backedge incoming value is different.
for (unsigned i = 0; i < 2; i++) {
BasicBlock *PredBB = PN.getIncomingBlock(i);
if (PN.getIncomingValueForBlock(PredBB) !=
IdenticalPN.getIncomingValueForBlock(PredBB)) {
DiffVals++;
DiffValBB = PredBB;
}
}

if (DiffVals != 1)
continue;
// Now check that the backedge incoming values are two select
// instructions that are in the same BB, and have the same condition.
// Either their true values are the same, or their false values are
// the same.
auto *Val = PN.getIncomingValueForBlock(DiffValBB);
auto *IdenticalVal = IdenticalPN.getIncomingValueForBlock(DiffValBB);
if (!isa<SelectInst>(Val) || !isa<SelectInst>(IdenticalVal))
continue;

auto *SI = cast<SelectInst>(Val);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use dyn_cast instead of isa + cast.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I've used dyn_cast now.

auto *IdenticalSI = cast<SelectInst>(IdenticalVal);
if (SI->getCondition() != IdenticalSI->getCondition() ||
(SI->getTrueValue() != IdenticalSI->getTrueValue() &&
SI->getFalseValue() != IdenticalSI->getFalseValue()))
continue;
Value *SIOtherVal = nullptr;
Value *IdenticalSIOtherVal = nullptr;
if (SI->getTrueValue() == IdenticalSI->getTrueValue()) {
SIOtherVal = SI->getFalseValue();
IdenticalSIOtherVal = IdenticalSI->getFalseValue();
} else {
SIOtherVal = SI->getTrueValue();
IdenticalSIOtherVal = IdenticalSI->getTrueValue();
}

// Now check that the other values in select, i.e., %1 and %identicalPhi,
// are essentially the same value within the same BB.
auto SameSelAndPhi = [&](SelectInst *SI, PHINode *IdenticalPN,
PHINode *PN) {
if (SI->getTrueValue() == IdenticalPN) {
return SI->getFalseValue() == PN;
}
return false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (SI->getTrueValue() == IdenticalPN) {
return SI->getFalseValue() == PN;
}
return false;
return SI->getTrueValue() == IdenticalPN && SI->getFalseValue() == PN;

We don't need a lambda function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed the lambda function.

};
if (!isa<SelectInst>(SIOtherVal) || !isa<PHINode>(IdenticalSIOtherVal))
continue;
if (cast<PHINode>(IdenticalSIOtherVal) != &IdenticalPN)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (cast<PHINode>(IdenticalSIOtherVal) != &IdenticalPN)
if (IdenticalSIOtherVal != &IdenticalPN)

Cast doesn't change the pointer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed accordingly.

continue;
auto *SIOtherValAsSel = cast<SelectInst>(SIOtherVal);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use dyn_cast instead of isa + cast.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I've used dyn_cast now.

if (!SameSelAndPhi(SIOtherValAsSel, &IdenticalPN, &PN))
continue;

++NumPHICSEs;
return replaceInstUsesWith(PN, &IdenticalPN);
}
}

// If this is an integer PHI and we know that it has an illegal type, see if
Expand Down
121 changes: 121 additions & 0 deletions llvm/test/Transforms/InstCombine/enhanced-phi-cse.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -passes=instcombine | FileCheck %s
@A = extern_weak global float, align 4

; %phi.to.remove acts the same as %v1, and can be eliminated with PHI CSE.
define void @enhanced_phi_cse(ptr %m, ptr %n, i32 %count) {
; CHECK-LABEL: @enhanced_phi_cse(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]]
; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
; CHECK: exit:
; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4
; CHECK-NEXT: ret void
;
entry:
br label %for.body

for.body: ; preds = %entry, %for.body
%v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
%v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
%phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
%i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
%q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
%c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
%q.load = load float, ptr %q
%c.load = load float, ptr %c
%sub = fsub float %q.load, %c.load
%cmp1 = fcmp olt float %sub, %v0
%v0.1 = select i1 %cmp1, float %sub, float %v0
%same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
%cmp2 = fcmp ogt float %sub, %same.as.v1
%v1.1 = select i1 %cmp2, float %sub, float %v1
%phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1
%inc.i = add nuw nsw i32 %i, 1
%q.next = getelementptr inbounds i8, ptr %q, i64 4
%c.next = getelementptr inbounds i8, ptr %c, i64 4
%exitcond = icmp eq i32 %inc.i, %count
br i1 %exitcond, label %exit, label %for.body

exit:
%vl.1.lcssa = phi float [ %v1.1, %for.body ]
store float %vl.1.lcssa, ptr @A
ret void
}

; %phi.to.remove acts the same as %v1, and can be eliminated with PHI CSE.
; The difference from enhanced_phi_cse() is that the true and false values in
; %phi.to.remove.next and %v1.1 are swapped.
define void @enhanced_phi_cse_2(ptr %m, ptr %n, i32 %count) {
; CHECK-LABEL: @enhanced_phi_cse_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]]
; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
; CHECK: exit:
; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4
; CHECK-NEXT: ret void
;
entry:
br label %for.body

for.body: ; preds = %entry, %for.body
%v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
%v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
%phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
%i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
%q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
%c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
%q.load = load float, ptr %q
%c.load = load float, ptr %c
%sub = fsub float %q.load, %c.load
%cmp1 = fcmp olt float %sub, %v0
%v0.1 = select i1 %cmp1, float %sub, float %v0
%same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
%cmp2 = fcmp ogt float %sub, %same.as.v1
%v1.1 = select i1 %cmp2, float %v1, float %sub
%phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub
%inc.i = add nuw nsw i32 %i, 1
%q.next = getelementptr inbounds i8, ptr %q, i64 4
%c.next = getelementptr inbounds i8, ptr %c, i64 4
%exitcond = icmp eq i32 %inc.i, %count
br i1 %exitcond, label %exit, label %for.body

exit:
%vl.1.lcssa = phi float [ %v1.1, %for.body ]
store float %vl.1.lcssa, ptr @A
ret void
}