Skip to content

Commit 91b423d

Browse files
authored
[DAG][RISCV] Use vp.<binop> when widening illegal types for binops which can trap (#105214)
This allows the use a single wider operation with a restricted EVL instead of having to split and cover via decreasing powers-of-two sizes. On RISCV, this avoids the need for a bunch of vslidedown and vslideup instructions to extract subvectors, and VL toggles to switch between the various widths. Note there is a potential downside of using vp nodes; we loose any generic DAG combines which might have applied to the split form.
1 parent 8f30506 commit 91b423d

File tree

5 files changed

+52
-91
lines changed

5 files changed

+52
-91
lines changed

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1519,7 +1519,7 @@ std::optional<unsigned> getVPExplicitVectorLengthIdx(unsigned Opcode);
15191519
std::optional<unsigned> getBaseOpcodeForVP(unsigned Opcode, bool hasFPExcept);
15201520

15211521
/// Translate this non-VP Opcode to its corresponding VP Opcode.
1522-
unsigned getVPForBaseOpcode(unsigned Opcode);
1522+
std::optional<unsigned> getVPForBaseOpcode(unsigned Opcode);
15231523

15241524
//===--------------------------------------------------------------------===//
15251525
/// MemIndexedMode enum - This enum defines the load / store indexed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4845,6 +4845,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
48454845
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
48464846
}
48474847

4848+
// Generate a vp.op if it is custom/legal for the target. This avoids need
4849+
// to split and tile the subvectors (below), because the inactive lanes can
4850+
// simply be disabled. To avoid possible recursion, only do this if the
4851+
// widened mask type is legal.
4852+
if (auto VPOpcode = ISD::getVPForBaseOpcode(Opcode);
4853+
VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WidenVT)) {
4854+
if (EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
4855+
WidenVT.getVectorElementCount());
4856+
TLI.isTypeLegal(WideMaskVT)) {
4857+
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
4858+
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
4859+
SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT);
4860+
SDValue EVL =
4861+
DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
4862+
N->getValueType(0).getVectorElementCount());
4863+
return DAG.getNode(*VPOpcode, dl, WidenVT, InOp1, InOp2, Mask, EVL,
4864+
Flags);
4865+
}
4866+
}
4867+
48484868
// FIXME: Improve support for scalable vectors.
48494869
assert(!VT.isScalableVector() && "Scalable vectors not handled yet.");
48504870

llvm/lib/CodeGen/SelectionDAG/MatchContext.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ class VPMatchContext {
110110
// SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return
111111
// DAG.getNode(Opcode, DL, VT); }
112112
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) {
113-
unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
113+
unsigned VPOpcode = *ISD::getVPForBaseOpcode(Opcode);
114114
assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
115115
ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
116116
return DAG.getNode(VPOpcode, DL, VT,
@@ -119,15 +119,15 @@ class VPMatchContext {
119119

120120
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
121121
SDValue N2) {
122-
unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
122+
unsigned VPOpcode = *ISD::getVPForBaseOpcode(Opcode);
123123
assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
124124
ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
125125
return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp});
126126
}
127127

128128
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
129129
SDValue N2, SDValue N3) {
130-
unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
130+
unsigned VPOpcode = *ISD::getVPForBaseOpcode(Opcode);
131131
assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
132132
ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
133133
return DAG.getNode(VPOpcode, DL, VT,
@@ -136,7 +136,7 @@ class VPMatchContext {
136136

137137
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
138138
SDNodeFlags Flags) {
139-
unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
139+
unsigned VPOpcode = *ISD::getVPForBaseOpcode(Opcode);
140140
assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
141141
ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
142142
return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp},
@@ -145,7 +145,7 @@ class VPMatchContext {
145145

146146
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
147147
SDValue N2, SDNodeFlags Flags) {
148-
unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
148+
unsigned VPOpcode = *ISD::getVPForBaseOpcode(Opcode);
149149
assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
150150
ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
151151
return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp},
@@ -154,21 +154,21 @@ class VPMatchContext {
154154

155155
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
156156
SDValue N2, SDValue N3, SDNodeFlags Flags) {
157-
unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
157+
unsigned VPOpcode = *ISD::getVPForBaseOpcode(Opcode);
158158
assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
159159
ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
160160
return DAG.getNode(VPOpcode, DL, VT,
161161
{N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags);
162162
}
163163

164164
bool isOperationLegal(unsigned Op, EVT VT) const {
165-
unsigned VPOp = ISD::getVPForBaseOpcode(Op);
165+
unsigned VPOp = *ISD::getVPForBaseOpcode(Op);
166166
return TLI.isOperationLegal(VPOp, VT);
167167
}
168168

169169
bool isOperationLegalOrCustom(unsigned Op, EVT VT,
170170
bool LegalOnly = false) const {
171-
unsigned VPOp = ISD::getVPForBaseOpcode(Op);
171+
unsigned VPOp = *ISD::getVPForBaseOpcode(Op);
172172
return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly);
173173
}
174174

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -550,10 +550,10 @@ std::optional<unsigned> ISD::getBaseOpcodeForVP(unsigned VPOpcode,
550550
return std::nullopt;
551551
}
552552

553-
unsigned ISD::getVPForBaseOpcode(unsigned Opcode) {
553+
std::optional<unsigned> ISD::getVPForBaseOpcode(unsigned Opcode) {
554554
switch (Opcode) {
555555
default:
556-
llvm_unreachable("can not translate this Opcode to VP.");
556+
return std::nullopt;
557557
#define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) break;
558558
#define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) case ISD::SDOPC:
559559
#define END_REGISTER_VP_SDNODE(VPOPC) return ISD::VPOPC;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll

Lines changed: 21 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -776,18 +776,9 @@ define void @sdiv_v6i16(ptr %x, ptr %y) {
776776
; CHECK-LABEL: sdiv_v6i16:
777777
; CHECK: # %bb.0:
778778
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
779-
; CHECK-NEXT: vle16.v v8, (a1)
780-
; CHECK-NEXT: vle16.v v9, (a0)
781-
; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
782-
; CHECK-NEXT: vslidedown.vi v10, v8, 4
783-
; CHECK-NEXT: vslidedown.vi v11, v9, 4
784-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
785-
; CHECK-NEXT: vdiv.vv v10, v11, v10
786-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
787-
; CHECK-NEXT: vdiv.vv v8, v9, v8
788-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
789-
; CHECK-NEXT: vslideup.vi v8, v10, 4
790-
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
779+
; CHECK-NEXT: vle16.v v8, (a0)
780+
; CHECK-NEXT: vle16.v v9, (a1)
781+
; CHECK-NEXT: vdiv.vv v8, v8, v9
791782
; CHECK-NEXT: vse16.v v8, (a0)
792783
; CHECK-NEXT: ret
793784
%a = load <6 x i16>, ptr %x
@@ -865,18 +856,9 @@ define void @srem_v6i16(ptr %x, ptr %y) {
865856
; CHECK-LABEL: srem_v6i16:
866857
; CHECK: # %bb.0:
867858
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
868-
; CHECK-NEXT: vle16.v v8, (a1)
869-
; CHECK-NEXT: vle16.v v9, (a0)
870-
; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
871-
; CHECK-NEXT: vslidedown.vi v10, v8, 4
872-
; CHECK-NEXT: vslidedown.vi v11, v9, 4
873-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
874-
; CHECK-NEXT: vrem.vv v10, v11, v10
875-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
876-
; CHECK-NEXT: vrem.vv v8, v9, v8
877-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
878-
; CHECK-NEXT: vslideup.vi v8, v10, 4
879-
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
859+
; CHECK-NEXT: vle16.v v8, (a0)
860+
; CHECK-NEXT: vle16.v v9, (a1)
861+
; CHECK-NEXT: vrem.vv v8, v8, v9
880862
; CHECK-NEXT: vse16.v v8, (a0)
881863
; CHECK-NEXT: ret
882864
%a = load <6 x i16>, ptr %x
@@ -954,18 +936,9 @@ define void @udiv_v6i16(ptr %x, ptr %y) {
954936
; CHECK-LABEL: udiv_v6i16:
955937
; CHECK: # %bb.0:
956938
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
957-
; CHECK-NEXT: vle16.v v8, (a1)
958-
; CHECK-NEXT: vle16.v v9, (a0)
959-
; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
960-
; CHECK-NEXT: vslidedown.vi v10, v8, 4
961-
; CHECK-NEXT: vslidedown.vi v11, v9, 4
962-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
963-
; CHECK-NEXT: vdivu.vv v10, v11, v10
964-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
965-
; CHECK-NEXT: vdivu.vv v8, v9, v8
966-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
967-
; CHECK-NEXT: vslideup.vi v8, v10, 4
968-
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
939+
; CHECK-NEXT: vle16.v v8, (a0)
940+
; CHECK-NEXT: vle16.v v9, (a1)
941+
; CHECK-NEXT: vdivu.vv v8, v8, v9
969942
; CHECK-NEXT: vse16.v v8, (a0)
970943
; CHECK-NEXT: ret
971944
%a = load <6 x i16>, ptr %x
@@ -1043,18 +1016,9 @@ define void @urem_v6i16(ptr %x, ptr %y) {
10431016
; CHECK-LABEL: urem_v6i16:
10441017
; CHECK: # %bb.0:
10451018
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1046-
; CHECK-NEXT: vle16.v v8, (a1)
1047-
; CHECK-NEXT: vle16.v v9, (a0)
1048-
; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
1049-
; CHECK-NEXT: vslidedown.vi v10, v8, 4
1050-
; CHECK-NEXT: vslidedown.vi v11, v9, 4
1051-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1052-
; CHECK-NEXT: vremu.vv v10, v11, v10
1053-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1054-
; CHECK-NEXT: vremu.vv v8, v9, v8
1055-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1056-
; CHECK-NEXT: vslideup.vi v8, v10, 4
1057-
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1019+
; CHECK-NEXT: vle16.v v8, (a0)
1020+
; CHECK-NEXT: vle16.v v9, (a1)
1021+
; CHECK-NEXT: vremu.vv v8, v8, v9
10581022
; CHECK-NEXT: vse16.v v8, (a0)
10591023
; CHECK-NEXT: ret
10601024
%a = load <6 x i16>, ptr %x
@@ -1192,23 +1156,12 @@ define void @mulhu_v6i16(ptr %x) {
11921156
; CHECK: # %bb.0:
11931157
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
11941158
; CHECK-NEXT: vle16.v v8, (a0)
1195-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1196-
; CHECK-NEXT: vid.v v9
1197-
; CHECK-NEXT: vadd.vi v9, v9, 12
1198-
; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
1199-
; CHECK-NEXT: vslidedown.vi v10, v8, 4
1200-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1201-
; CHECK-NEXT: vdivu.vv v9, v10, v9
1202-
; CHECK-NEXT: lui a1, 45217
1203-
; CHECK-NEXT: addi a1, a1, -1785
1204-
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1205-
; CHECK-NEXT: vmv.s.x v10, a1
1206-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1207-
; CHECK-NEXT: vsext.vf2 v11, v10
1208-
; CHECK-NEXT: vdivu.vv v8, v8, v11
1159+
; CHECK-NEXT: lui a1, %hi(.LCPI67_0)
1160+
; CHECK-NEXT: addi a1, a1, %lo(.LCPI67_0)
12091161
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1210-
; CHECK-NEXT: vslideup.vi v8, v9, 4
1162+
; CHECK-NEXT: vle16.v v9, (a1)
12111163
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1164+
; CHECK-NEXT: vdivu.vv v8, v8, v9
12121165
; CHECK-NEXT: vse16.v v8, (a0)
12131166
; CHECK-NEXT: ret
12141167
%a = load <6 x i16>, ptr %x
@@ -1353,25 +1306,13 @@ define void @mulhs_v6i16(ptr %x) {
13531306
; CHECK: # %bb.0:
13541307
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
13551308
; CHECK-NEXT: vle16.v v8, (a0)
1356-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1357-
; CHECK-NEXT: vmv.v.i v9, 7
1358-
; CHECK-NEXT: vid.v v10
1359-
; CHECK-NEXT: li a1, -14
1360-
; CHECK-NEXT: vmadd.vx v10, a1, v9
1361-
; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
1362-
; CHECK-NEXT: vslidedown.vi v9, v8, 4
1363-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1364-
; CHECK-NEXT: vdiv.vv v9, v9, v10
1365-
; CHECK-NEXT: lui a1, 1020016
1366-
; CHECK-NEXT: addi a1, a1, 2041
1367-
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1368-
; CHECK-NEXT: vmv.s.x v10, a1
1369-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1370-
; CHECK-NEXT: vsext.vf2 v11, v10
1371-
; CHECK-NEXT: vdiv.vv v8, v8, v11
1309+
; CHECK-NEXT: li a1, 22
1310+
; CHECK-NEXT: vmv.s.x v0, a1
13721311
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1373-
; CHECK-NEXT: vslideup.vi v8, v9, 4
1312+
; CHECK-NEXT: vmv.v.i v9, -7
1313+
; CHECK-NEXT: vmerge.vim v9, v9, 7, v0
13741314
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1315+
; CHECK-NEXT: vdiv.vv v8, v8, v9
13751316
; CHECK-NEXT: vse16.v v8, (a0)
13761317
; CHECK-NEXT: ret
13771318
%a = load <6 x i16>, ptr %x

0 commit comments

Comments
 (0)