diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index e08ef60dbede3..c4252e4aea764 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4866,6 +4866,89 @@ static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F, return nullptr; } +/// Look for the following pattern and simplify %to_fold to %identicalPhi. +/// Here %phi, %to_fold and %phi.next perform the same functionality as +/// %identicalPhi and hence the select instruction %to_fold can be folded +/// into %identicalPhi. +/// +/// BB1: +/// %identicalPhi = phi [ X, %BB0 ], [ %identicalPhi.next, %BB1 ] +/// %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ] +/// ... +/// %identicalPhi.next = select %cmp, %val, %identicalPhi +/// (or select %cmp, %identicalPhi, %val) +/// %to_fold = select %cmp2, %identicalPhi, %phi +/// %phi.next = select %cmp, %val, %to_fold +/// (or select %cmp, %to_fold, %val) +/// +/// Prove that %phi and %identicalPhi are the same by induction: +/// +/// Base case: Both %phi and %identicalPhi are equal on entry to the loop. +/// Inductive case: +/// Suppose %phi and %identicalPhi are equal at iteration i. +/// We look at their values at iteration i+1 which are %phi.next and +/// %identicalPhi.next. They would have become different only when %cmp is +/// false and the corresponding values %to_fold and %identicalPhi differ +/// (similar reason for the other "or" case in the bracket). +/// +/// The only condition when %to_fold and %identicalPh could differ is when %cmp2 +/// is false and %to_fold is %phi, which contradicts our inductive hypothesis +/// that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are +/// always equal at iteration i+1. +bool isSimplifierIdenticalPHI(PHINode &PN, PHINode &IdenticalPN) { + if (PN.getParent() != IdenticalPN.getParent()) + return false; + if (PN.getNumIncomingValues() != 2) + return false; + + // Check that only the backedge incoming value is different. + unsigned DiffVals = 0; + BasicBlock *DiffValBB = nullptr; + for (unsigned i = 0; i < 2; i++) { + BasicBlock *PredBB = PN.getIncomingBlock(i); + if (PN.getIncomingValueForBlock(PredBB) != + IdenticalPN.getIncomingValueForBlock(PredBB)) { + DiffVals++; + DiffValBB = PredBB; + } + } + if (DiffVals != 1) + return false; + // Now check that the backedge incoming values are two select + // instructions with the same condition. Either their true + // values are the same, or their false values are the same. + auto *SI = dyn_cast(PN.getIncomingValueForBlock(DiffValBB)); + auto *IdenticalSI = + dyn_cast(IdenticalPN.getIncomingValueForBlock(DiffValBB)); + if (!SI || !IdenticalSI) + return false; + if (SI->getCondition() != IdenticalSI->getCondition()) + return false; + + SelectInst *SIOtherVal = nullptr; + Value *IdenticalSIOtherVal = nullptr; + if (SI->getTrueValue() == IdenticalSI->getTrueValue()) { + SIOtherVal = dyn_cast(SI->getFalseValue()); + IdenticalSIOtherVal = IdenticalSI->getFalseValue(); + } else if (SI->getFalseValue() == IdenticalSI->getFalseValue()) { + SIOtherVal = dyn_cast(SI->getTrueValue()); + IdenticalSIOtherVal = IdenticalSI->getTrueValue(); + } else { + return false; + } + + // Now check that the other values in select, i.e., %to_fold and + // %identicalPhi, are essentially the same value. + if (!SIOtherVal || IdenticalSIOtherVal != &IdenticalPN) + return false; + if (!(SIOtherVal->getTrueValue() == &IdenticalPN && + SIOtherVal->getFalseValue() == &PN) && + !(SIOtherVal->getTrueValue() == &PN && + SIOtherVal->getFalseValue() == &IdenticalPN)) + return false; + return true; +} + /// Given operands for a SelectInst, see if we can fold the result. /// If not, this returns null. static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, @@ -5041,7 +5124,14 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, std::optional Imp = isImpliedByDomCondition(Cond, Q.CxtI, Q.DL); if (Imp) return *Imp ? TrueVal : FalseVal; - + // Look for same PHIs in the true and false values. + if (auto *TruePHI = dyn_cast(TrueVal)) + if (auto *FalsePHI = dyn_cast(FalseVal)) { + if (isSimplifierIdenticalPHI(*TruePHI, *FalsePHI)) + return FalseVal; + if (isSimplifierIdenticalPHI(*FalsePHI, *TruePHI)) + return TrueVal; + } return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll new file mode 100644 index 0000000000000..7816781250799 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -passes=instcombine | FileCheck %s +@A = extern_weak global float, align 4 + +; %same.as.v1 is a select with two phis %v1 and %phi.to.remove as the true +; and false values, while %v1 and %phi.to.remove are actually the same. +; Fold the selection instruction %same.as.v1 to %v1. +define void @select_with_identical_phi(ptr %m, ptr %n, i32 %count) { +; CHECK-LABEL: @select_with_identical_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 +; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] + %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] + %q.load = load float, ptr %q + %c.load = load float, ptr %c + %sub = fsub float %q.load, %c.load + %cmp1 = fcmp olt float %sub, %v0 + %v0.1 = select i1 %cmp1, float %sub, float %v0 + %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove + %cmp2 = fcmp ogt float %sub, %same.as.v1 + %v1.1 = select i1 %cmp2, float %sub, float %v1 + %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1 + %inc.i = add nuw nsw i32 %i, 1 + %q.next = getelementptr inbounds i8, ptr %q, i64 4 + %c.next = getelementptr inbounds i8, ptr %c, i64 4 + %exitcond = icmp eq i32 %inc.i, %count + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The difference from select_with_identical_phi() is that the true and false values in +; %phi.to.remove.next and %v1.1 are swapped. +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_2(ptr %m, ptr %n, i32 %count) { +; CHECK-LABEL: @select_with_identical_phi_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 +; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] + %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] + %q.load = load float, ptr %q + %c.load = load float, ptr %c + %sub = fsub float %q.load, %c.load + %cmp1 = fcmp olt float %sub, %v0 + %v0.1 = select i1 %cmp1, float %sub, float %v0 + %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove + %cmp2 = fcmp ogt float %sub, %same.as.v1 + %v1.1 = select i1 %cmp2, float %v1, float %sub + %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub + %inc.i = add nuw nsw i32 %i, 1 + %q.next = getelementptr inbounds i8, ptr %q, i64 4 + %c.next = getelementptr inbounds i8, ptr %c, i64 4 + %exitcond = icmp eq i32 %inc.i, %count + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The difference from select_with_identical_phi() is that the true and false values in +; same.as.v1 are swapped. +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_3(ptr %m, ptr %n, i32 %count) { +; CHECK-LABEL: @select_with_identical_phi_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 +; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] + %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] + %q.load = load float, ptr %q + %c.load = load float, ptr %c + %sub = fsub float %q.load, %c.load + %cmp1 = fcmp olt float %sub, %v0 + %v0.1 = select i1 %cmp1, float %sub, float %v0 + %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1 + %cmp2 = fcmp ogt float %sub, %same.as.v1 + %v1.1 = select i1 %cmp2, float %sub, float %v1 + %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1 + %inc.i = add nuw nsw i32 %i, 1 + %q.next = getelementptr inbounds i8, ptr %q, i64 4 + %c.next = getelementptr inbounds i8, ptr %c, i64 4 + %exitcond = icmp eq i32 %inc.i, %count + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +} + +; The difference from select_with_identical_phi() is that the true and false values in +; %same.as.v1, %phi.to.remove.next and %v1.1 are swapped. +; Check that %same.as.v1 can be folded. +define void @select_with_identical_phi_4(ptr %m, ptr %n, i32 %count) { +; CHECK-LABEL: @select_with_identical_phi_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4 +; CHECK-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP2]], float [[V1]], float [[SUB]] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4 +; CHECK-NEXT: [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: store float [[V1_1]], ptr @A, align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ] + %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ] + %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ] + %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ] + %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ] + %q.load = load float, ptr %q + %c.load = load float, ptr %c + %sub = fsub float %q.load, %c.load + %cmp1 = fcmp olt float %sub, %v0 + %v0.1 = select i1 %cmp1, float %sub, float %v0 + %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1 + %cmp2 = fcmp ogt float %sub, %same.as.v1 + %v1.1 = select i1 %cmp2, float %v1, float %sub + %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %sub + %inc.i = add nuw nsw i32 %i, 1 + %q.next = getelementptr inbounds i8, ptr %q, i64 4 + %c.next = getelementptr inbounds i8, ptr %c, i64 4 + %exitcond = icmp eq i32 %inc.i, %count + br i1 %exitcond, label %exit, label %for.body + +exit: + %vl.1.lcssa = phi float [ %v1.1, %for.body ] + store float %vl.1.lcssa, ptr @A + ret void +}