Skip to content

Commit e977b28

Browse files
[InstCombine] Match intrinsic recurrences when known to be hoisted
For value-accumulating recurrences of kind: ``` %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ] %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b) ``` The binary intrinsic may be simplified into an intrinsic with init value and the other operand, if the latter is loop-invariant: ``` %umax = call i8 @llvm.umax.i8(i8 %a, i8 %b) ``` Proofs: https://alive2.llvm.org/ce/z/ea2cVC. Fixes: #145875.
1 parent 5f86456 commit e977b28

File tree

2 files changed

+51
-5
lines changed

2 files changed

+51
-5
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1532,6 +1532,51 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V,
15321532
return nullptr;
15331533
}
15341534

1535+
/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1536+
/// `f(f(x, y), y) == f(x, y)` holds.
1537+
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID) {
1538+
switch (IID) {
1539+
case Intrinsic::smax:
1540+
case Intrinsic::smin:
1541+
case Intrinsic::umax:
1542+
case Intrinsic::umin:
1543+
case Intrinsic::maximum:
1544+
case Intrinsic::minimum:
1545+
case Intrinsic::maximumnum:
1546+
case Intrinsic::minimumnum:
1547+
case Intrinsic::maxnum:
1548+
case Intrinsic::minnum:
1549+
return true;
1550+
default:
1551+
return false;
1552+
}
1553+
}
1554+
1555+
/// Attempt to simplify value-accumulating recurrences of kind:
1556+
/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1557+
/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1558+
/// And let the idempotent binary intrinsic be hoisted, when the operands are
1559+
/// known to be loop-invariant.
1560+
static Value *foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC,
1561+
IntrinsicInst *II) {
1562+
PHINode *PN;
1563+
Value *Init, *OtherOp;
1564+
1565+
// A binary intrinsic recurrence with loop-invariant operands is equivalent to
1566+
// `call @llvm.binary.intrinsic(Init, OtherOp)`.
1567+
auto IID = II->getIntrinsicID();
1568+
if (!isIdempotentBinaryIntrinsic(IID) ||
1569+
!matchSimpleBinaryIntrinsicRecurrence(II, PN, Init, OtherOp) ||
1570+
!IC.getDominatorTree().dominates(OtherOp, PN))
1571+
return nullptr;
1572+
1573+
auto *InvariantBinaryInst =
1574+
IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1575+
if (isa<FPMathOperator>(InvariantBinaryInst))
1576+
cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1577+
return InvariantBinaryInst;
1578+
}
1579+
15351580
static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
15361581
if (!CanReorderLanes)
15371582
return nullptr;
@@ -3912,6 +3957,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
39123957
if (Value *Reverse = foldReversedIntrinsicOperands(II))
39133958
return replaceInstUsesWith(*II, Reverse);
39143959

3960+
if (Value *Res = foldIdempotentBinaryIntrinsicRecurrence(*this, II))
3961+
return replaceInstUsesWith(*II, Res);
3962+
39153963
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
39163964
// context, so it is handled in visitCallBase and we should trigger it.
39173965
return visitCallBase(*II);

llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -236,12 +236,11 @@ define float @simple_recurrence_intrinsic_maximumnum(i32 %n, float %a, float %b)
236236
; CHECK-NEXT: br label %[[LOOP:.*]]
237237
; CHECK: [[LOOP]]:
238238
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
239-
; CHECK-NEXT: [[FMAX_ACC:%.*]] = phi float [ [[FMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
240-
; CHECK-NEXT: [[FMAX]] = call nnan float @llvm.maximumnum.f32(float [[FMAX_ACC]], float [[B]])
241239
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
242240
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
243241
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
244242
; CHECK: [[EXIT]]:
243+
; CHECK-NEXT: [[FMAX:%.*]] = call nnan float @llvm.maximumnum.f32(float [[A]], float [[B]])
245244
; CHECK-NEXT: ret float [[FMAX]]
246245
;
247246
entry:
@@ -265,12 +264,11 @@ define float @simple_recurrence_intrinsic_minimumnum(i32 %n, float %a, float %b)
265264
; CHECK-NEXT: br label %[[LOOP:.*]]
266265
; CHECK: [[LOOP]]:
267266
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
268-
; CHECK-NEXT: [[FMIN_ACC:%.*]] = phi float [ [[FMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
269-
; CHECK-NEXT: [[FMIN]] = call nnan float @llvm.minimumnum.f32(float [[FMIN_ACC]], float [[B]])
270267
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
271268
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
272269
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
273270
; CHECK: [[EXIT]]:
271+
; CHECK-NEXT: [[FMIN:%.*]] = call nnan float @llvm.minimumnum.f32(float [[A]], float [[B]])
274272
; CHECK-NEXT: ret float [[FMIN]]
275273
;
276274
entry:
@@ -296,7 +294,7 @@ define i8 @simple_recurrence_intrinsic_multiuse_phi(i8 %n, i8 %a, i8 %b) {
296294
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
297295
; CHECK-NEXT: [[UMAX_ACC:%.*]] = phi i8 [ [[UMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
298296
; CHECK-NEXT: call void @use(i8 [[UMAX_ACC]])
299-
; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[UMAX_ACC]], i8 [[B]])
297+
; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[A]], i8 [[B]])
300298
; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1
301299
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]]
302300
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]

0 commit comments

Comments
 (0)