Skip to content

Commit 268a6f1

Browse files
committed
[InstCombine] Implement vp.reverse elimination through binop/unop
This simply copies the structure of the vector.reverse patterns from just above, and reimplements them for the vp.reverse intrinsics when the masks and EVLs exactly match. Its unfortunate that we have three different ways to represent a reverse (shuffle, vector.reverse, and vp.reverse) but I don't see an obvious way to remove any them because the semantics are slightly different. This sigificantly improves vectorization in TSVC_2 s112 and s1112 loops when using EVL tail folding.
1 parent 891f6ae commit 268a6f1

File tree

2 files changed

+46
-22
lines changed

2 files changed

+46
-22
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3582,6 +3582,42 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
35823582
}
35833583
break;
35843584
}
3585+
case Intrinsic::experimental_vp_reverse: {
3586+
Value *BO0, *BO1, *X, *Y;
3587+
Value *Vec = II->getArgOperand(0);
3588+
Value *Mask = II->getArgOperand(1);
3589+
Value *EVL = II->getArgOperand(2);
3590+
auto m_VPReverse = [&](Value *&Vec) {
3591+
return m_Intrinsic<Intrinsic::experimental_vp_reverse>(
3592+
m_Value(Vec), m_Specific(Mask), m_Specific(EVL));
3593+
};
3594+
if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) {
3595+
auto *OldBinOp = cast<BinaryOperator>(Vec);
3596+
if (match(BO0, m_VPReverse(X))) {
3597+
// rev(binop rev(X), rev(Y)) --> binop X, Y
3598+
if (match(BO1, m_VPReverse(Y)))
3599+
return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
3600+
OldBinOp->getOpcode(), X, Y,
3601+
OldBinOp, OldBinOp->getName(),
3602+
II->getIterator()));
3603+
// rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
3604+
if (isSplatValue(BO1))
3605+
return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
3606+
OldBinOp->getOpcode(), X, BO1,
3607+
OldBinOp, OldBinOp->getName(),
3608+
II->getIterator()));
3609+
}
3610+
}
3611+
// rev(unop rev(X)) --> unop X
3612+
if (match(Vec, m_OneUse(m_UnOp(m_VPReverse(X))))) {
3613+
auto *OldUnOp = cast<UnaryOperator>(Vec);
3614+
auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
3615+
OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
3616+
II->getIterator());
3617+
return replaceInstUsesWith(CI, NewUnOp);
3618+
}
3619+
break;
3620+
}
35853621
case Intrinsic::vector_reduce_or:
35863622
case Intrinsic::vector_reduce_and: {
35873623
// Canonicalize logical or/and reductions:

llvm/test/Transforms/InstCombine/vp-reverse.ll

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,8 @@
33

44
define <vscale x 4 x i32> @binop_reverse_elim(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 %evl) {
55
; CHECK-LABEL: @binop_reverse_elim(
6-
; CHECK-NEXT: [[A:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
7-
; CHECK-NEXT: [[B:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[B1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
8-
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A]], [[B]]
9-
; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
10-
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
6+
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
7+
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
118
;
129
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1310
%b.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
@@ -18,11 +15,8 @@ define <vscale x 4 x i32> @binop_reverse_elim(<vscale x 4 x i32> %a, <vscale x 4
1815

1916
define <vscale x 4 x i32> @binop_reverse_elim2(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) {
2017
; CHECK-LABEL: @binop_reverse_elim2(
21-
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i1> [[M:%.*]], i32 [[EVL:%.*]])
22-
; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i1> [[M]], i32 [[EVL]])
23-
; CHECK-NEXT: [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV]], [[B_REV]]
24-
; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]], <vscale x 4 x i1> [[M]], i32 [[EVL]])
25-
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
18+
; CHECK-NEXT: [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV:%.*]], [[B_REV:%.*]]
19+
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
2620
;
2721
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> %m, i32 %evl)
2822
%b.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
@@ -63,10 +57,8 @@ define <vscale x 4 x i32> @binop_reverse_elim_diffevl(<vscale x 4 x i32> %a, <vs
6357

6458
define <vscale x 4 x i32> @binop_reverse_splat_elim(<vscale x 4 x i32> %a, i32 %evl) {
6559
; CHECK-LABEL: @binop_reverse_splat_elim(
66-
; CHECK-NEXT: [[A:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
67-
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A]], splat (i32 22)
68-
; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
69-
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
60+
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], splat (i32 22)
61+
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
7062
;
7163
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
7264
%add = add nsw <vscale x 4 x i32> %a.rev, splat (i32 22)
@@ -76,10 +68,8 @@ define <vscale x 4 x i32> @binop_reverse_splat_elim(<vscale x 4 x i32> %a, i32 %
7668

7769
define <vscale x 4 x i32> @binop_reverse_splat_elim2(<vscale x 4 x i32> %a, i32 %evl) {
7870
; CHECK-LABEL: @binop_reverse_splat_elim2(
79-
; CHECK-NEXT: [[A:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
80-
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A]], splat (i32 22)
81-
; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
82-
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
71+
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], splat (i32 22)
72+
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
8373
;
8474
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
8575
%add = add nsw <vscale x 4 x i32> splat (i32 22), %a.rev
@@ -89,10 +79,8 @@ define <vscale x 4 x i32> @binop_reverse_splat_elim2(<vscale x 4 x i32> %a, i32
8979

9080
define <vscale x 4 x float> @unop_reverse_splat_elim(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 %evl) {
9181
; CHECK-LABEL: @unop_reverse_splat_elim(
92-
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
93-
; CHECK-NEXT: [[OP:%.*]] = fneg <vscale x 4 x float> [[A_REV]]
94-
; CHECK-NEXT: [[OP_REV:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> [[OP]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
95-
; CHECK-NEXT: ret <vscale x 4 x float> [[OP_REV]]
82+
; CHECK-NEXT: [[OP:%.*]] = fneg <vscale x 4 x float> [[A_REV:%.*]]
83+
; CHECK-NEXT: ret <vscale x 4 x float> [[OP]]
9684
;
9785
%a.rev = tail call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
9886
%op = fneg <vscale x 4 x float> %a.rev

0 commit comments

Comments
 (0)