Skip to content

Commit 5c89ec9

Browse files
frasercrmckmemfrob
authored andcommitted
[LegalizeTypes][VP] Add splitting support for binary VP ops
This patch extends D107904's introduction of vector-predicated (VP) operation legalization to include vector splitting. When the result of a binary VP operation needs splitting, all of its operands are split in kind. The two operands and the mask are split as usual, and the vector-length parameter EVL is "split" such that the low and high halves each execute the correct number of elements. Tests have been added to the RISC-V target to show splitting several scenarios for fixed- and scalable-vector types. Without support for `umax` (e.g. in the `B` extension) the generated code starts to branch. Ideally a cost model would prevent their insertion in the first place. Through these tests many opportunities for better codegen can be seen: combining known-undef VP operations and for constant-folding operations on `ISD::VSCALE`, to name but a few. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D107957
1 parent f844321 commit 5c89ec9

File tree

4 files changed

+529
-6
lines changed

4 files changed

+529
-6
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -818,7 +818,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
818818

819819
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
820820
void SplitVectorResult(SDNode *N, unsigned ResNo);
821-
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
821+
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, bool IsVP);
822822
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
823823
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
824824
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 58 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,7 +1069,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
10691069
case ISD::USHLSAT:
10701070
case ISD::ROTL:
10711071
case ISD::ROTR:
1072-
SplitVecRes_BinOp(N, Lo, Hi);
1072+
SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ false);
10731073
break;
10741074
case ISD::FMA:
10751075
case ISD::FSHL:
@@ -1106,6 +1106,26 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
11061106
case ISD::UDIVFIXSAT:
11071107
SplitVecRes_FIX(N, Lo, Hi);
11081108
break;
1109+
case ISD::VP_ADD:
1110+
case ISD::VP_AND:
1111+
case ISD::VP_MUL:
1112+
case ISD::VP_OR:
1113+
case ISD::VP_SUB:
1114+
case ISD::VP_XOR:
1115+
case ISD::VP_SHL:
1116+
case ISD::VP_LSHR:
1117+
case ISD::VP_ASHR:
1118+
case ISD::VP_SDIV:
1119+
case ISD::VP_UDIV:
1120+
case ISD::VP_SREM:
1121+
case ISD::VP_UREM:
1122+
case ISD::VP_FADD:
1123+
case ISD::VP_FSUB:
1124+
case ISD::VP_FMUL:
1125+
case ISD::VP_FDIV:
1126+
case ISD::VP_FREM:
1127+
SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ true);
1128+
break;
11091129
}
11101130

11111131
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1137,8 +1157,8 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
11371157
}
11381158
}
11391159

1140-
void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
1141-
SDValue &Hi) {
1160+
void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi,
1161+
bool IsVP) {
11421162
SDValue LHSLo, LHSHi;
11431163
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
11441164
SDValue RHSLo, RHSHi;
@@ -1147,8 +1167,41 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
11471167

11481168
const SDNodeFlags Flags = N->getFlags();
11491169
unsigned Opcode = N->getOpcode();
1150-
Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
1151-
Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
1170+
if (!IsVP) {
1171+
Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
1172+
Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
1173+
return;
1174+
}
1175+
1176+
// Split the mask.
1177+
SDValue MaskLo, MaskHi;
1178+
SDValue Mask = N->getOperand(2);
1179+
EVT MaskVT = Mask.getValueType();
1180+
if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
1181+
GetSplitVector(Mask, MaskLo, MaskHi);
1182+
else
1183+
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask));
1184+
1185+
// Split the vector length parameter.
1186+
// %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts).
1187+
SDValue EVL = N->getOperand(3);
1188+
EVT VecVT = N->getValueType(0);
1189+
EVT EVLVT = EVL.getValueType();
1190+
assert(VecVT.getVectorElementCount().isKnownEven() &&
1191+
"Expecting the mask to be an evenly-sized vector");
1192+
unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2;
1193+
SDValue HalfNumElts =
1194+
VecVT.isFixedLengthVector()
1195+
? DAG.getConstant(HalfMinNumElts, dl, EVLVT)
1196+
: DAG.getVScale(dl, EVLVT,
1197+
APInt(EVLVT.getScalarSizeInBits(), HalfMinNumElts));
1198+
SDValue EVLLo = DAG.getNode(ISD::UMIN, dl, EVLVT, EVL, HalfNumElts);
1199+
SDValue EVLHi = DAG.getNode(ISD::USUBSAT, dl, EVLVT, EVL, HalfNumElts);
1200+
1201+
Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(),
1202+
{LHSLo, RHSLo, MaskLo, EVLLo}, Flags);
1203+
Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(),
1204+
{LHSHi, RHSHi, MaskHi, EVLHi}, Flags);
11521205
}
11531206

11541207
void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll

Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,103 @@ define <16 x i8> @vadd_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
384384
ret <16 x i8> %v
385385
}
386386

387+
declare <256 x i8> @llvm.vp.add.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
388+
389+
define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) {
390+
; CHECK-LABEL: vadd_vi_v258i8:
391+
; CHECK: # %bb.0:
392+
; CHECK-NEXT: addi a2, zero, 128
393+
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
394+
; CHECK-NEXT: vle1.v v25, (a0)
395+
; CHECK-NEXT: addi a0, a1, -128
396+
; CHECK-NEXT: vmv1r.v v26, v0
397+
; CHECK-NEXT: mv a3, zero
398+
; CHECK-NEXT: bltu a1, a0, .LBB30_2
399+
; CHECK-NEXT: # %bb.1:
400+
; CHECK-NEXT: mv a3, a0
401+
; CHECK-NEXT: .LBB30_2:
402+
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, mu
403+
; CHECK-NEXT: vmv1r.v v0, v25
404+
; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
405+
; CHECK-NEXT: bltu a1, a2, .LBB30_4
406+
; CHECK-NEXT: # %bb.3:
407+
; CHECK-NEXT: addi a1, zero, 128
408+
; CHECK-NEXT: .LBB30_4:
409+
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
410+
; CHECK-NEXT: vmv1r.v v0, v26
411+
; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
412+
; CHECK-NEXT: ret
413+
%elt.head = insertelement <256 x i8> undef, i8 -1, i32 0
414+
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> undef, <256 x i32> zeroinitializer
415+
%v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
416+
ret <256 x i8> %v
417+
}
418+
419+
define <256 x i8> @vadd_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) {
420+
; CHECK-LABEL: vadd_vi_v258i8_unmasked:
421+
; CHECK: # %bb.0:
422+
; CHECK-NEXT: addi a1, a0, -128
423+
; CHECK-NEXT: mv a2, zero
424+
; CHECK-NEXT: bltu a0, a1, .LBB31_2
425+
; CHECK-NEXT: # %bb.1:
426+
; CHECK-NEXT: mv a2, a1
427+
; CHECK-NEXT: .LBB31_2:
428+
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
429+
; CHECK-NEXT: addi a1, zero, 128
430+
; CHECK-NEXT: vadd.vi v16, v16, -1
431+
; CHECK-NEXT: bltu a0, a1, .LBB31_4
432+
; CHECK-NEXT: # %bb.3:
433+
; CHECK-NEXT: addi a0, zero, 128
434+
; CHECK-NEXT: .LBB31_4:
435+
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
436+
; CHECK-NEXT: vadd.vi v8, v8, -1
437+
; CHECK-NEXT: ret
438+
%elt.head = insertelement <256 x i8> undef, i8 -1, i32 0
439+
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> undef, <256 x i32> zeroinitializer
440+
%head = insertelement <256 x i1> undef, i1 true, i32 0
441+
%m = shufflevector <256 x i1> %head, <256 x i1> undef, <256 x i32> zeroinitializer
442+
%v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
443+
ret <256 x i8> %v
444+
}
445+
446+
; Test splitting when the %evl is a known constant.
447+
448+
define <256 x i8> @vadd_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) {
449+
; CHECK-LABEL: vadd_vi_v258i8_evl129:
450+
; CHECK: # %bb.0:
451+
; CHECK-NEXT: addi a1, zero, 128
452+
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
453+
; CHECK-NEXT: vle1.v v25, (a0)
454+
; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
455+
; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, mu
456+
; CHECK-NEXT: vmv1r.v v0, v25
457+
; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
458+
; CHECK-NEXT: ret
459+
%elt.head = insertelement <256 x i8> undef, i8 -1, i32 0
460+
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> undef, <256 x i32> zeroinitializer
461+
%v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129)
462+
ret <256 x i8> %v
463+
}
464+
465+
; FIXME: The upper half is doing nothing.
466+
467+
define <256 x i8> @vadd_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) {
468+
; CHECK-LABEL: vadd_vi_v258i8_evl128:
469+
; CHECK: # %bb.0:
470+
; CHECK-NEXT: addi a1, zero, 128
471+
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
472+
; CHECK-NEXT: vle1.v v25, (a0)
473+
; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
474+
; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, mu
475+
; CHECK-NEXT: vmv1r.v v0, v25
476+
; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
477+
; CHECK-NEXT: ret
478+
%elt.head = insertelement <256 x i8> undef, i8 -1, i32 0
479+
%vb = shufflevector <256 x i8> %elt.head, <256 x i8> undef, <256 x i32> zeroinitializer
480+
%v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128)
481+
ret <256 x i8> %v
482+
}
483+
387484
declare <2 x i16> @llvm.vp.add.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
388485

389486
define <2 x i16> @vadd_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
@@ -1407,3 +1504,176 @@ define <16 x i64> @vadd_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
14071504
%v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
14081505
ret <16 x i64> %v
14091506
}
1507+
1508+
; Test that split-legalization works as expected.
1509+
1510+
declare <32 x i64> @llvm.vp.add.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32)
1511+
1512+
define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
1513+
; RV32-LABEL: vadd_vx_v32i64:
1514+
; RV32: # %bb.0:
1515+
; RV32-NEXT: mv a1, zero
1516+
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
1517+
; RV32-NEXT: vmv1r.v v1, v0
1518+
; RV32-NEXT: vslidedown.vi v0, v0, 2
1519+
; RV32-NEXT: addi a2, zero, 32
1520+
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
1521+
; RV32-NEXT: addi a2, a0, -16
1522+
; RV32-NEXT: vmv.v.i v24, -1
1523+
; RV32-NEXT: bltu a0, a2, .LBB106_2
1524+
; RV32-NEXT: # %bb.1:
1525+
; RV32-NEXT: mv a1, a2
1526+
; RV32-NEXT: .LBB106_2:
1527+
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
1528+
; RV32-NEXT: addi a1, zero, 16
1529+
; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
1530+
; RV32-NEXT: bltu a0, a1, .LBB106_4
1531+
; RV32-NEXT: # %bb.3:
1532+
; RV32-NEXT: addi a0, zero, 16
1533+
; RV32-NEXT: .LBB106_4:
1534+
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu
1535+
; RV32-NEXT: vmv1r.v v0, v1
1536+
; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
1537+
; RV32-NEXT: ret
1538+
;
1539+
; RV64-LABEL: vadd_vx_v32i64:
1540+
; RV64: # %bb.0:
1541+
; RV64-NEXT: mv a1, zero
1542+
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
1543+
; RV64-NEXT: addi a2, a0, -16
1544+
; RV64-NEXT: vmv1r.v v25, v0
1545+
; RV64-NEXT: vslidedown.vi v0, v0, 2
1546+
; RV64-NEXT: bltu a0, a2, .LBB106_2
1547+
; RV64-NEXT: # %bb.1:
1548+
; RV64-NEXT: mv a1, a2
1549+
; RV64-NEXT: .LBB106_2:
1550+
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
1551+
; RV64-NEXT: addi a1, zero, 16
1552+
; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
1553+
; RV64-NEXT: bltu a0, a1, .LBB106_4
1554+
; RV64-NEXT: # %bb.3:
1555+
; RV64-NEXT: addi a0, zero, 16
1556+
; RV64-NEXT: .LBB106_4:
1557+
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu
1558+
; RV64-NEXT: vmv1r.v v0, v25
1559+
; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
1560+
; RV64-NEXT: ret
1561+
%elt.head = insertelement <32 x i64> undef, i64 -1, i32 0
1562+
%vb = shufflevector <32 x i64> %elt.head, <32 x i64> undef, <32 x i32> zeroinitializer
1563+
%v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
1564+
ret <32 x i64> %v
1565+
}
1566+
1567+
define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
1568+
; RV32-LABEL: vadd_vi_v32i64_unmasked:
1569+
; RV32: # %bb.0:
1570+
; RV32-NEXT: mv a1, zero
1571+
; RV32-NEXT: addi a2, zero, 32
1572+
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
1573+
; RV32-NEXT: addi a2, a0, -16
1574+
; RV32-NEXT: vmv.v.i v24, -1
1575+
; RV32-NEXT: bltu a0, a2, .LBB107_2
1576+
; RV32-NEXT: # %bb.1:
1577+
; RV32-NEXT: mv a1, a2
1578+
; RV32-NEXT: .LBB107_2:
1579+
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
1580+
; RV32-NEXT: addi a1, zero, 16
1581+
; RV32-NEXT: vadd.vv v16, v16, v24
1582+
; RV32-NEXT: bltu a0, a1, .LBB107_4
1583+
; RV32-NEXT: # %bb.3:
1584+
; RV32-NEXT: addi a0, zero, 16
1585+
; RV32-NEXT: .LBB107_4:
1586+
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu
1587+
; RV32-NEXT: vadd.vv v8, v8, v24
1588+
; RV32-NEXT: ret
1589+
;
1590+
; RV64-LABEL: vadd_vi_v32i64_unmasked:
1591+
; RV64: # %bb.0:
1592+
; RV64-NEXT: addi a1, a0, -16
1593+
; RV64-NEXT: mv a2, zero
1594+
; RV64-NEXT: bltu a0, a1, .LBB107_2
1595+
; RV64-NEXT: # %bb.1:
1596+
; RV64-NEXT: mv a2, a1
1597+
; RV64-NEXT: .LBB107_2:
1598+
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
1599+
; RV64-NEXT: addi a1, zero, 16
1600+
; RV64-NEXT: vadd.vi v16, v16, -1
1601+
; RV64-NEXT: bltu a0, a1, .LBB107_4
1602+
; RV64-NEXT: # %bb.3:
1603+
; RV64-NEXT: addi a0, zero, 16
1604+
; RV64-NEXT: .LBB107_4:
1605+
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu
1606+
; RV64-NEXT: vadd.vi v8, v8, -1
1607+
; RV64-NEXT: ret
1608+
%elt.head = insertelement <32 x i64> undef, i64 -1, i32 0
1609+
%vb = shufflevector <32 x i64> %elt.head, <32 x i64> undef, <32 x i32> zeroinitializer
1610+
%head = insertelement <32 x i1> undef, i1 true, i32 0
1611+
%m = shufflevector <32 x i1> %head, <32 x i1> undef, <32 x i32> zeroinitializer
1612+
%v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
1613+
ret <32 x i64> %v
1614+
}
1615+
1616+
; FIXME: After splitting, the "high" vadd.vv is doing nothing; could be
1617+
; replaced by undef.
1618+
1619+
define <32 x i64> @vadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) {
1620+
; RV32-LABEL: vadd_vx_v32i64_evl12:
1621+
; RV32: # %bb.0:
1622+
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
1623+
; RV32-NEXT: vslidedown.vi v1, v0, 2
1624+
; RV32-NEXT: addi a0, zero, 32
1625+
; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, mu
1626+
; RV32-NEXT: vmv.v.i v24, -1
1627+
; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, mu
1628+
; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
1629+
; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, mu
1630+
; RV32-NEXT: vmv1r.v v0, v1
1631+
; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
1632+
; RV32-NEXT: ret
1633+
;
1634+
; RV64-LABEL: vadd_vx_v32i64_evl12:
1635+
; RV64: # %bb.0:
1636+
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
1637+
; RV64-NEXT: vslidedown.vi v25, v0, 2
1638+
; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, mu
1639+
; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
1640+
; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, mu
1641+
; RV64-NEXT: vmv1r.v v0, v25
1642+
; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
1643+
; RV64-NEXT: ret
1644+
%elt.head = insertelement <32 x i64> undef, i64 -1, i32 0
1645+
%vb = shufflevector <32 x i64> %elt.head, <32 x i64> undef, <32 x i32> zeroinitializer
1646+
%v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12)
1647+
ret <32 x i64> %v
1648+
}
1649+
1650+
define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) {
1651+
; RV32-LABEL: vadd_vx_v32i64_evl27:
1652+
; RV32: # %bb.0:
1653+
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
1654+
; RV32-NEXT: vslidedown.vi v1, v0, 2
1655+
; RV32-NEXT: addi a0, zero, 32
1656+
; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, mu
1657+
; RV32-NEXT: vmv.v.i v24, -1
1658+
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
1659+
; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
1660+
; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, mu
1661+
; RV32-NEXT: vmv1r.v v0, v1
1662+
; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
1663+
; RV32-NEXT: ret
1664+
;
1665+
; RV64-LABEL: vadd_vx_v32i64_evl27:
1666+
; RV64: # %bb.0:
1667+
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
1668+
; RV64-NEXT: vslidedown.vi v25, v0, 2
1669+
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
1670+
; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
1671+
; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, mu
1672+
; RV64-NEXT: vmv1r.v v0, v25
1673+
; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
1674+
; RV64-NEXT: ret
1675+
%elt.head = insertelement <32 x i64> undef, i64 -1, i32 0
1676+
%vb = shufflevector <32 x i64> %elt.head, <32 x i64> undef, <32 x i32> zeroinitializer
1677+
%v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27)
1678+
ret <32 x i64> %v
1679+
}

0 commit comments

Comments
 (0)