Skip to content

Commit 53726c4

Browse files
[RISCV][VLOPT] Add support for 11.14 widening integer mul-add instructions
1 parent 2f09c72 commit 53726c4

File tree

2 files changed

+129
-20
lines changed

2 files changed

+129
-20
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -422,8 +422,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
422422
case RISCV::VWSUB_WX:
423423
// Vector Widening Integer Multiply-Add Instructions
424424
// Destination EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
425-
// Even though the add is a 2*SEW addition, the operands of the add are the
426-
// Dest which is 2*SEW and the result of the multiply which is 2*SEW.
425+
// A SEW-bit*SEW-bit multiply of the sources forms a 2*SEW-bit value, which
426+
// is then added to the 2*SEW-bit Dest.
427427
case RISCV::VWMACCU_VV:
428428
case RISCV::VWMACCU_VX:
429429
case RISCV::VWMACC_VV:
@@ -567,9 +567,13 @@ static bool isSupportedInstr(const MachineInstr &MI) {
567567
// Vector Single-Width Integer Multiply-Add Instructions
568568
// FIXME: Add support
569569
// Vector Widening Integer Multiply-Add Instructions
570-
// FIXME: Add support
571-
case RISCV::VWMACC_VX:
570+
case RISCV::VWMACCU_VV:
572571
case RISCV::VWMACCU_VX:
572+
case RISCV::VWMACC_VV:
573+
case RISCV::VWMACC_VX:
574+
case RISCV::VWMACCSU_VV:
575+
case RISCV::VWMACCSU_VX:
576+
case RISCV::VWMACCUS_VX:
573577
// Vector Integer Merge Instructions
574578
// FIXME: Add support
575579
// Vector Integer Move Instructions

llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll

Lines changed: 121 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,44 +1122,149 @@ define <vscale x 4 x i32> @vrem_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
11221122
ret <vscale x 4 x i32> %2
11231123
}
11241124

1125-
define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i16> %a, i16 %b, iXLen %vl) {
1125+
define <vscale x 4 x i32> @vwmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
1126+
; NOVLOPT-LABEL: vwmacc_vv:
1127+
; NOVLOPT: # %bb.0:
1128+
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
1129+
; NOVLOPT-NEXT: vwmacc.vv v8, v10, v11
1130+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1131+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
1132+
; NOVLOPT-NEXT: ret
1133+
;
1134+
; VLOPT-LABEL: vwmacc_vv:
1135+
; VLOPT: # %bb.0:
1136+
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
1137+
; VLOPT-NEXT: vwmacc.vv v8, v10, v11
1138+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1139+
; VLOPT-NEXT: vadd.vv v8, v8, v8
1140+
; VLOPT-NEXT: ret
1141+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1142+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
1143+
ret <vscale x 4 x i32> %2
1144+
}
1145+
1146+
define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
11261147
; NOVLOPT-LABEL: vwmacc_vx:
11271148
; NOVLOPT: # %bb.0:
1128-
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma
1129-
; NOVLOPT-NEXT: vwmacc.vx v10, a0, v8
1149+
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
1150+
; NOVLOPT-NEXT: vwmacc.vx v8, a0, v10
11301151
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1131-
; NOVLOPT-NEXT: vadd.vv v8, v10, v10
1152+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
11321153
; NOVLOPT-NEXT: ret
11331154
;
11341155
; VLOPT-LABEL: vwmacc_vx:
11351156
; VLOPT: # %bb.0:
1136-
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1137-
; VLOPT-NEXT: vwmacc.vx v10, a0, v8
1157+
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
1158+
; VLOPT-NEXT: vwmacc.vx v8, a0, v10
11381159
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1139-
; VLOPT-NEXT: vadd.vv v8, v10, v10
1160+
; VLOPT-NEXT: vadd.vv v8, v8, v8
11401161
; VLOPT-NEXT: ret
1141-
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> poison, i16 %b, <vscale x 4 x i16> %a, iXLen -1, iXLen 0)
1162+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
11421163
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
11431164
ret <vscale x 4 x i32> %2
11441165
}
11451166

1146-
define <vscale x 4 x i32> @vwmaccu_vx(<vscale x 4 x i16> %a, i16 %b, iXLen %vl) {
1167+
define <vscale x 4 x i32> @vwmaccu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
1168+
; NOVLOPT-LABEL: vwmaccu_vv:
1169+
; NOVLOPT: # %bb.0:
1170+
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
1171+
; NOVLOPT-NEXT: vwmaccu.vv v8, v10, v11
1172+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1173+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
1174+
; NOVLOPT-NEXT: ret
1175+
;
1176+
; VLOPT-LABEL: vwmaccu_vv:
1177+
; VLOPT: # %bb.0:
1178+
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
1179+
; VLOPT-NEXT: vwmaccu.vv v8, v10, v11
1180+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1181+
; VLOPT-NEXT: vadd.vv v8, v8, v8
1182+
; VLOPT-NEXT: ret
1183+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1184+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
1185+
ret <vscale x 4 x i32> %2
1186+
}
1187+
1188+
define <vscale x 4 x i32> @vwmaccu_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
11471189
; NOVLOPT-LABEL: vwmaccu_vx:
11481190
; NOVLOPT: # %bb.0:
1149-
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma
1150-
; NOVLOPT-NEXT: vwmaccu.vx v10, a0, v8
1191+
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
1192+
; NOVLOPT-NEXT: vwmaccu.vx v8, a0, v10
11511193
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1152-
; NOVLOPT-NEXT: vadd.vv v8, v10, v10
1194+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
11531195
; NOVLOPT-NEXT: ret
11541196
;
11551197
; VLOPT-LABEL: vwmaccu_vx:
11561198
; VLOPT: # %bb.0:
1157-
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1158-
; VLOPT-NEXT: vwmaccu.vx v10, a0, v8
1199+
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
1200+
; VLOPT-NEXT: vwmaccu.vx v8, a0, v10
11591201
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1160-
; VLOPT-NEXT: vadd.vv v8, v10, v10
1202+
; VLOPT-NEXT: vadd.vv v8, v8, v8
1203+
; VLOPT-NEXT: ret
1204+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1205+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
1206+
ret <vscale x 4 x i32> %2
1207+
}
1208+
1209+
define <vscale x 4 x i32> @vwmaccsu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
1210+
; NOVLOPT-LABEL: vwmaccsu_vv:
1211+
; NOVLOPT: # %bb.0:
1212+
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
1213+
; NOVLOPT-NEXT: vwmaccsu.vv v8, v10, v11
1214+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1215+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
1216+
; NOVLOPT-NEXT: ret
1217+
;
1218+
; VLOPT-LABEL: vwmaccsu_vv:
1219+
; VLOPT: # %bb.0:
1220+
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
1221+
; VLOPT-NEXT: vwmaccsu.vv v8, v10, v11
1222+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1223+
; VLOPT-NEXT: vadd.vv v8, v8, v8
1224+
; VLOPT-NEXT: ret
1225+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1226+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
1227+
ret <vscale x 4 x i32> %2
1228+
}
1229+
1230+
define <vscale x 4 x i32> @vwmaccsu_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
1231+
; NOVLOPT-LABEL: vwmaccsu_vx:
1232+
; NOVLOPT: # %bb.0:
1233+
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
1234+
; NOVLOPT-NEXT: vwmaccsu.vx v8, a0, v10
1235+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1236+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
1237+
; NOVLOPT-NEXT: ret
1238+
;
1239+
; VLOPT-LABEL: vwmaccsu_vx:
1240+
; VLOPT: # %bb.0:
1241+
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
1242+
; VLOPT-NEXT: vwmaccsu.vx v8, a0, v10
1243+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1244+
; VLOPT-NEXT: vadd.vv v8, v8, v8
1245+
; VLOPT-NEXT: ret
1246+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1247+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
1248+
ret <vscale x 4 x i32> %2
1249+
}
1250+
1251+
define <vscale x 4 x i32> @vwmaccus_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
1252+
; NOVLOPT-LABEL: vwmaccus_vx:
1253+
; NOVLOPT: # %bb.0:
1254+
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
1255+
; NOVLOPT-NEXT: vwmaccus.vx v8, a0, v10
1256+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1257+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
1258+
; NOVLOPT-NEXT: ret
1259+
;
1260+
; VLOPT-LABEL: vwmaccus_vx:
1261+
; VLOPT: # %bb.0:
1262+
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
1263+
; VLOPT-NEXT: vwmaccus.vx v8, a0, v10
1264+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1265+
; VLOPT-NEXT: vadd.vv v8, v8, v8
11611266
; VLOPT-NEXT: ret
1162-
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> poison, i16 %b, <vscale x 4 x i16> %a, iXLen -1, iXLen 0)
1267+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccus.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
11631268
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
11641269
ret <vscale x 4 x i32> %2
11651270
}

0 commit comments

Comments
 (0)