diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 27b7ec4629a26..ef110771c39a3 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -421,11 +421,11 @@ static InstructionCost computeSpeculationCost(const User *I, /// After this function returns, Cost is increased by the cost of /// V plus its non-dominating operands. If that cost is greater than /// Budget, false is returned and Cost is undefined. -static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, - SmallPtrSetImpl &AggressiveInsts, - InstructionCost &Cost, InstructionCost Budget, - const TargetTransformInfo &TTI, - AssumptionCache *AC, unsigned Depth = 0) { +static bool dominatesMergePoint( + Value *V, BasicBlock *BB, Instruction *InsertPt, + SmallPtrSetImpl &AggressiveInsts, InstructionCost &Cost, + InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, + SmallPtrSetImpl &ZeroCostInstructions, unsigned Depth = 0) { // It is possible to hit a zero-cost cycle (phi/gep instructions for example), // so limit the recursion depth. // TODO: While this recursion limit does prevent pathological behavior, it @@ -463,7 +463,17 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, if (!isSafeToSpeculativelyExecute(I, InsertPt, AC)) return false; - Cost += computeSpeculationCost(I, TTI); + // Overflow arithmetic instruction plus extract value are usually generated + // when a division is being replaced. But, in this case, the zero check may + // still be kept in the code. In that case it would be worth to hoist these + // two instruction out of the basic block. Let's treat this pattern as one + // single cheap instruction here! + WithOverflowInst *OverflowInst; + if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) { + ZeroCostInstructions.insert(OverflowInst); + Cost += 1; + } else if (!ZeroCostInstructions.contains(I)) + Cost += computeSpeculationCost(I, TTI); // Allow exactly one instruction to be speculated regardless of its cost // (as long as it is safe to do so). @@ -480,7 +490,7 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, // not take us over the cost threshold. for (Use &Op : I->operands()) if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget, - TTI, AC, Depth + 1)) + TTI, AC, ZeroCostInstructions, Depth + 1)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts.insert(I); @@ -3796,6 +3806,7 @@ static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet AggressiveInsts; + SmallPtrSet ZeroCostInstructions; InstructionCost Cost = 0; InstructionCost Budget = TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; @@ -3813,9 +3824,11 @@ static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI, - AggressiveInsts, Cost, Budget, TTI, AC) || + AggressiveInsts, Cost, Budget, TTI, AC, + ZeroCostInstructions) || !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI, - AggressiveInsts, Cost, Budget, TTI, AC)) + AggressiveInsts, Cost, Budget, TTI, AC, + ZeroCostInstructions)) return Changed; } diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll b/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll index 7bcb6ce17df0e..9858591dfc700 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll @@ -45,26 +45,17 @@ define i1 @will_not_overflow(i64 %arg, i64 %arg1) { ; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_not_overflow( ; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb: ; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0 -; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]] -; INSTCOMBINESIMPLIFYCFGONLY: bb2: ; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]]) ; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 -; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]] -; INSTCOMBINESIMPLIFYCFGONLY: bb5: -; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ] +; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 false, i1 [[MUL_OV]] ; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]] ; ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_not_overflow( ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb: -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0 -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]] -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2: -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]]) +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]] +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]]) ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]] -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5: -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ] -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]] +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[MUL_OV]] ; bb: %t0 = icmp eq i64 %arg, 0 @@ -112,28 +103,19 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) { ; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_overflow( ; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb: ; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0 -; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]] -; INSTCOMBINESIMPLIFYCFGONLY: bb2: ; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]]) ; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true -; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]] -; INSTCOMBINESIMPLIFYCFGONLY: bb5: -; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ] +; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 true, i1 [[PHI_BO]] ; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]] ; ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_overflow( ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb: -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0 -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]] -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2: -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]]) +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1:%.*]] = freeze i64 [[ARG2:%.*]] +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1]]) ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]] -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5: -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ] -; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]] +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[PHI_BO]] ; bb: %t0 = icmp eq i64 %arg, 0 diff --git a/llvm/test/Transforms/SimplifyCFG/RISCV/umul-extract-pattern.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/umul-extract-pattern.ll new file mode 100644 index 0000000000000..0d431e2293b69 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/RISCV/umul-extract-pattern.ll @@ -0,0 +1,129 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s +target triple = "riscv64-unknown-unknown-elf" + +define i16 @basicScenario(i64 %x, i64 %y) { +; CHECK-LABEL: @basicScenario( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0 +; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]]) +; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]] +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i16 +; CHECK-NEXT: ret i16 [[CONV]] +; +entry: + %cmp.not = icmp eq i64 %y, 0 + br i1 %cmp.not, label %land.end, label %land.rhs + +land.rhs: ; preds = %entry + %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x) + %mul.ov = extractvalue { i64, i1 } %mul, 1 + br label %land.end + +land.end: ; preds = %land.rhs, %entry + %result = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ] + %conv = zext i1 %result to i16 + ret i16 %conv +} + +define i16 @samePatternTwice(i64 %x, i64 %y) { +; CHECK-LABEL: @samePatternTwice( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0 +; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X:%.*]]) +; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: [[MUL2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[Y]], i64 [[X]]) +; CHECK-NEXT: [[MUL_OV2:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV2]] +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i16 +; CHECK-NEXT: [[CONV2:%.*]] = zext i1 [[TMP1]] to i16 +; CHECK-NEXT: [[TORET:%.*]] = add nsw i16 [[CONV]], [[CONV2]] +; CHECK-NEXT: ret i16 [[TORET]] +; +entry: + %cmp.not = icmp eq i64 %y, 0 + br i1 %cmp.not, label %land.end, label %land.rhs + +land.rhs: ; preds = %entry + %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x) + %mul.ov = extractvalue { i64, i1 } %mul, 1 + %mul2 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %y, i64 %x) + %mul.ov2 = extractvalue { i64, i1 } %mul2, 1 + br label %land.end + +land.end: ; preds = %land.rhs, %entry + %result1 = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ] + %result2 = phi i1 [ false, %entry ], [ %mul.ov2, %land.rhs ] + %conv1 = zext i1 %result1 to i16 + %conv2 = zext i1 %result2 to i16 + %toRet = add nsw i16 %conv1, %conv2 + ret i16 %toRet +} + +define i16 @stillHoistNotTooExpensive(i64 %x, i64 %y) { +; CHECK-LABEL: @stillHoistNotTooExpensive( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[Y]], [[X:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ADD]], i64 [[X]]) +; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_NOT]], i1 false, i1 [[MUL_OV]] +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i16 +; CHECK-NEXT: ret i16 [[CONV]] +; +entry: + %cmp.not = icmp eq i64 %y, 0 + br i1 %cmp.not, label %land.end, label %land.rhs + +land.rhs: ; preds = %entry + %add = add nsw i64 %y, %x + %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %add, i64 %x) + %mul.ov = extractvalue { i64, i1 } %mul, 1 + br label %land.end + +land.end: ; preds = %land.rhs, %entry + %result = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ] + %conv = zext i1 %result to i16 + ret i16 %conv +} + +define i16 @noHoistTooExpensive(i64 %x, i64 %y) { +; CHECK-LABEL: @noHoistTooExpensive( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[Y:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[LAND_END:%.*]], label [[LAND_RHS:%.*]] +; CHECK: land.rhs: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[Y]], [[X:%.*]] +; CHECK-NEXT: [[ADD2:%.*]] = add nsw i64 [[Y]], [[ADD]] +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i64 [[ADD]], [[ADD2]] +; CHECK-NEXT: [[ADD4:%.*]] = add nsw i64 [[ADD2]], [[ADD3]] +; CHECK-NEXT: [[ADD5:%.*]] = add nsw i64 [[ADD3]], [[ADD4]] +; CHECK-NEXT: [[MUL:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ADD5]], i64 [[X]]) +; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: br label [[LAND_END]] +; CHECK: land.end: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[MUL_OV]], [[LAND_RHS]] ] +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i16 +; CHECK-NEXT: ret i16 [[CONV]] +; +entry: + %cmp.not = icmp eq i64 %y, 0 + br i1 %cmp.not, label %land.end, label %land.rhs + +land.rhs: ; preds = %entry + %add = add nsw i64 %y, %x + %add2 = add nsw i64 %y, %add + %add3 = add nsw i64 %add, %add2 + %add4 = add nsw i64 %add2, %add3 + %add5 = add nsw i64 %add3, %add4 + %mul = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %add5, i64 %x) + %mul.ov = extractvalue { i64, i1 } %mul, 1 + br label %land.end + +land.end: ; preds = %land.rhs, %entry + %result = phi i1 [ false, %entry ], [ %mul.ov, %land.rhs ] + %conv = zext i1 %result to i16 + ret i16 %conv +}