Skip to content

Commit 7f686e0

Browse files
committed
[RISCV] Match (ext (op a, b)) to (wop a, b)
This suboptimal case was found when trying to optimize ABD/ABDS operation. Adding ISel patterns is the simplest way to optimize. We can add DAGCombine cases for `ISD::SIGN_EXTEND/ISD::ZERO_EXTEND` instead but that may need a lot of manual handlings.
1 parent edb05c4 commit 7f686e0

File tree

2 files changed

+26
-29
lines changed

2 files changed

+26
-29
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,17 @@ multiclass VPatWidenBinarySDNode_VV_VX<SDNode op, PatFrags extop1, PatFrags exto
467467
(!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX)
468468
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
469469
GPR:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
470+
if !eq(extop1, extop2) then
471+
def : Pat<(wti.Vector (extop1 (op (vti.Vector vti.RegClass:$rs2),
472+
(vti.Vector vti.RegClass:$rs1)))),
473+
(!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
474+
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
475+
vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
476+
def : Pat<(wti.Vector (extop1 (op (vti.Vector vti.RegClass:$rs2),
477+
(vti.Vector (SplatPat (XLenVT GPR:$rs1)))))),
478+
(!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX)
479+
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
480+
GPR:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>;
470481
}
471482
}
472483
}

llvm/test/CodeGen/RISCV/rvv/abd.ll

Lines changed: 15 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,8 @@ define <vscale x 8 x i16> @sabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
5858
; CHECK: # %bb.0:
5959
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
6060
; CHECK-NEXT: vmin.vv v10, v8, v9
61-
; CHECK-NEXT: vmax.vv v8, v8, v9
62-
; CHECK-NEXT: vsub.vv v10, v8, v10
63-
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
64-
; CHECK-NEXT: vzext.vf2 v8, v10
61+
; CHECK-NEXT: vmax.vv v11, v8, v9
62+
; CHECK-NEXT: vwsubu.vv v8, v11, v10
6563
; CHECK-NEXT: ret
6664
%a.sext = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
6765
%b.sext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -91,10 +89,8 @@ define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
9189
; CHECK: # %bb.0:
9290
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
9391
; CHECK-NEXT: vmin.vv v10, v8, v9
94-
; CHECK-NEXT: vmax.vv v8, v8, v9
95-
; CHECK-NEXT: vsub.vv v10, v8, v10
96-
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
97-
; CHECK-NEXT: vzext.vf2 v8, v10
92+
; CHECK-NEXT: vmax.vv v11, v8, v9
93+
; CHECK-NEXT: vwsubu.vv v8, v11, v10
9894
; CHECK-NEXT: ret
9995
%a.sext = sext <vscale x 4 x i16> %a to <vscale x 4 x i32>
10096
%b.sext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -124,10 +120,8 @@ define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
124120
; CHECK: # %bb.0:
125121
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
126122
; CHECK-NEXT: vmin.vv v10, v8, v9
127-
; CHECK-NEXT: vmax.vv v8, v8, v9
128-
; CHECK-NEXT: vsub.vv v10, v8, v10
129-
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
130-
; CHECK-NEXT: vzext.vf2 v8, v10
123+
; CHECK-NEXT: vmax.vv v11, v8, v9
124+
; CHECK-NEXT: vwsubu.vv v8, v11, v10
131125
; CHECK-NEXT: ret
132126
%a.sext = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
133127
%b.sext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -192,10 +186,8 @@ define <vscale x 8 x i16> @uabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
192186
; CHECK: # %bb.0:
193187
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
194188
; CHECK-NEXT: vminu.vv v10, v8, v9
195-
; CHECK-NEXT: vmaxu.vv v8, v8, v9
196-
; CHECK-NEXT: vsub.vv v10, v8, v10
197-
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
198-
; CHECK-NEXT: vzext.vf2 v8, v10
189+
; CHECK-NEXT: vmaxu.vv v11, v8, v9
190+
; CHECK-NEXT: vwsubu.vv v8, v11, v10
199191
; CHECK-NEXT: ret
200192
%a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
201193
%b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -225,10 +217,8 @@ define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
225217
; CHECK: # %bb.0:
226218
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
227219
; CHECK-NEXT: vminu.vv v10, v8, v9
228-
; CHECK-NEXT: vmaxu.vv v8, v8, v9
229-
; CHECK-NEXT: vsub.vv v10, v8, v10
230-
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
231-
; CHECK-NEXT: vzext.vf2 v8, v10
220+
; CHECK-NEXT: vmaxu.vv v11, v8, v9
221+
; CHECK-NEXT: vwsubu.vv v8, v11, v10
232222
; CHECK-NEXT: ret
233223
%a.zext = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
234224
%b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -258,10 +248,8 @@ define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
258248
; CHECK: # %bb.0:
259249
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
260250
; CHECK-NEXT: vminu.vv v10, v8, v9
261-
; CHECK-NEXT: vmaxu.vv v8, v8, v9
262-
; CHECK-NEXT: vsub.vv v10, v8, v10
263-
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
264-
; CHECK-NEXT: vzext.vf2 v8, v10
251+
; CHECK-NEXT: vmaxu.vv v11, v8, v9
252+
; CHECK-NEXT: vwsubu.vv v8, v11, v10
265253
; CHECK-NEXT: ret
266254
%a.zext = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
267255
%b.zext = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -296,11 +284,9 @@ define <vscale x 4 x i32> @uabd_non_matching_promoted_ops(<vscale x 4 x i8> %a,
296284
; CHECK: # %bb.0:
297285
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
298286
; CHECK-NEXT: vzext.vf2 v10, v8
299-
; CHECK-NEXT: vminu.vv v8, v10, v9
300-
; CHECK-NEXT: vmaxu.vv v9, v10, v9
301-
; CHECK-NEXT: vsub.vv v10, v9, v8
302-
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
303-
; CHECK-NEXT: vzext.vf2 v8, v10
287+
; CHECK-NEXT: vminu.vv v11, v10, v9
288+
; CHECK-NEXT: vmaxu.vv v10, v10, v9
289+
; CHECK-NEXT: vwsubu.vv v8, v10, v11
304290
; CHECK-NEXT: ret
305291
%a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
306292
%b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>

0 commit comments

Comments
 (0)