Skip to content

Commit 1648852

Browse files
committed
[RISCV][RVV] Fix vslide1up/down intrinsics overflow bug for SEW=64 on RV32
Reviewed By: craig.topper, kito-cheng Differential Revision: https://reviews.llvm.org/D120899
1 parent 62bcfcb commit 1648852

File tree

11 files changed

+690
-25
lines changed

11 files changed

+690
-25
lines changed

llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ unsigned RISCVVType::encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW,
131131
bool TailAgnostic, bool MaskAgnostic) {
132132
assert(isValidSEW(SEW) && "Invalid SEW");
133133
unsigned VLMULBits = static_cast<unsigned>(VLMUL);
134-
unsigned VSEWBits = Log2_32(SEW) - 3;
134+
unsigned VSEWBits = encodeSEW(SEW);
135135
unsigned VTypeI = (VSEWBits << 3) | (VLMULBits & 0x7);
136136
if (TailAgnostic)
137137
VTypeI |= 0x40;

llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,11 @@ inline static unsigned decodeVSEW(unsigned VSEW) {
377377
return 1 << (VSEW + 3);
378378
}
379379

380+
inline static unsigned encodeSEW(unsigned SEW) {
381+
assert(isValidSEW(SEW) && "Unexpected SEW value");
382+
return Log2_32(SEW) - 3;
383+
}
384+
380385
inline static unsigned getSEW(unsigned VType) {
381386
unsigned VSEW = (VType >> 3) & 0x7;
382387
return decodeVSEW(VSEW);

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4659,12 +4659,58 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
46594659
DAG.getConstant(1, DL, XLenVT));
46604660

46614661
// Double the VL since we halved SEW.
4662-
SDValue VL = getVLOperand(Op);
4663-
SDValue I32VL =
4664-
DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
4662+
SDValue AVL = getVLOperand(Op);
4663+
SDValue I32VL;
4664+
4665+
// Optimize for constant AVL
4666+
if (isa<ConstantSDNode>(AVL)) {
4667+
unsigned EltSize = VT.getScalarSizeInBits();
4668+
unsigned MinSize = VT.getSizeInBits().getKnownMinValue();
4669+
4670+
unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
4671+
unsigned MaxVLMAX =
4672+
RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
4673+
4674+
unsigned VectorBitsMin = Subtarget.getRealMinVLen();
4675+
unsigned MinVLMAX =
4676+
RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
4677+
4678+
uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue();
4679+
if (AVLInt <= MinVLMAX) {
4680+
I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
4681+
} else if (AVLInt >= 2 * MaxVLMAX) {
4682+
// Just set vl to VLMAX in this situation
4683+
RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT);
4684+
SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
4685+
unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
4686+
SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
4687+
SDValue SETVLMAX = DAG.getTargetConstant(
4688+
Intrinsic::riscv_vsetvlimax_opt, DL, MVT::i32);
4689+
I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
4690+
LMUL);
4691+
} else {
4692+
// For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
4693+
// is related to the hardware implementation.
4694+
// So let the following code handle
4695+
}
4696+
}
4697+
if (!I32VL) {
4698+
RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
4699+
SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
4700+
unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
4701+
SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
4702+
SDValue SETVL =
4703+
DAG.getTargetConstant(Intrinsic::riscv_vsetvli_opt, DL, MVT::i32);
4704+
// Using vsetvli instruction to get actually used length which related to
4705+
// the hardware implementation
4706+
SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
4707+
SEW, LMUL);
4708+
I32VL =
4709+
DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
4710+
}
46654711

46664712
MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
4667-
SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
4713+
SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, I32VL);
46684714

46694715
// Shift the two scalar parts in using SEW=32 slide1up/slide1down
46704716
// instructions.
@@ -4704,10 +4750,11 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
47044750
// TAMU
47054751
if (Policy == RISCVII::TAIL_AGNOSTIC)
47064752
return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff,
4707-
VL);
4753+
AVL);
47084754
// TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
47094755
// It's fine because vmerge does not care mask policy.
4710-
return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff, VL);
4756+
return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff,
4757+
AVL);
47114758
}
47124759
}
47134760

@@ -5606,7 +5653,8 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
56065653
unsigned MaxVLMAX = 0;
56075654
unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
56085655
if (VectorBitsMax != 0)
5609-
MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
5656+
MaxVLMAX =
5657+
RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
56105658

56115659
unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
56125660
MVT IntVT = VecVT.changeVectorElementTypeToInteger();

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,11 @@ enum NodeType : unsigned {
317317
};
318318
} // namespace RISCVISD
319319

320+
namespace RISCV {
321+
// We use 64 bits as the known part in the scalable vector types.
322+
static constexpr unsigned RVVBitsPerBlock = 64;
323+
} // namespace RISCV
324+
320325
class RISCVTargetLowering : public TargetLowering {
321326
const RISCVSubtarget &Subtarget;
322327

@@ -531,6 +536,15 @@ class RISCVTargetLowering : public TargetLowering {
531536
Optional<CallingConv::ID> CC) const override;
532537

533538
static RISCVII::VLMUL getLMUL(MVT VT);
539+
inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize,
540+
unsigned MinSize) {
541+
// Original equation:
542+
// VLMAX = (VectorBits / EltSize) * LMUL
543+
// where LMUL = MinSize / RISCV::RVVBitsPerBlock
544+
// The following equations have been reordered to prevent loss of precision
545+
// when calculating fractional LMUL.
546+
return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
547+
};
534548
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul);
535549
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index);
536550
static unsigned getRegClassIDForVecVT(MVT VT);
@@ -671,12 +685,6 @@ class RISCVTargetLowering : public TargetLowering {
671685
return false;
672686
};
673687
};
674-
675-
namespace RISCV {
676-
// We use 64 bits as the known part in the scalable vector types.
677-
static constexpr unsigned RVVBitsPerBlock = 64;
678-
} // namespace RISCV
679-
680688
namespace RISCVVIntrinsicsTable {
681689

682690
struct RISCVVIntrinsicInfo {

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,15 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
206206
return 0;
207207
}
208208
unsigned getMinVLen() const { return ZvlLen; }
209+
unsigned getMaxVLen() const { return Zvl65536b; }
210+
unsigned getRealMinVLen() const {
211+
unsigned VLen = getMinRVVVectorSizeInBits();
212+
return VLen == 0 ? getMinVLen() : VLen;
213+
}
214+
unsigned getRealMaxVLen() const {
215+
unsigned VLen = getMaxRVVVectorSizeInBits();
216+
return VLen == 0 ? getMaxVLen() : VLen;
217+
}
209218
RISCVABI::ABI getTargetABI() const { return TargetABI; }
210219
bool isRegisterReservedByUser(Register i) const {
211220
assert(i < RISCV::NUM_TARGET_REGS && "Register out of range");

llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ declare <vscale x 1 x i64> @llvm.riscv.vslide1down.mask.nxv1i64.i64(
1313
define <vscale x 1 x i64> @intrinsic_vslide1down_mask_tumu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
1414
; CHECK-LABEL: intrinsic_vslide1down_mask_tumu_vx_nxv1i64_nxv1i64_i64:
1515
; CHECK: # %bb.0: # %entry
16-
; CHECK-NEXT: slli a3, a2, 1
16+
; CHECK-NEXT: vsetvli a3, a2, e64, m1, ta, mu
17+
; CHECK-NEXT: slli a3, a3, 1
1718
; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu
1819
; CHECK-NEXT: vslide1down.vx v9, v9, a0
1920
; CHECK-NEXT: vslide1down.vx v9, v9, a1
@@ -34,7 +35,8 @@ entry:
3435
define <vscale x 1 x i64> @intrinsic_vslide1down_mask_tamu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
3536
; CHECK-LABEL: intrinsic_vslide1down_mask_tamu_vx_nxv1i64_nxv1i64_i64:
3637
; CHECK: # %bb.0: # %entry
37-
; CHECK-NEXT: slli a3, a2, 1
38+
; CHECK-NEXT: vsetvli a3, a2, e64, m1, ta, mu
39+
; CHECK-NEXT: slli a3, a3, 1
3840
; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu
3941
; CHECK-NEXT: vslide1down.vx v9, v9, a0
4042
; CHECK-NEXT: vslide1down.vx v9, v9, a1
@@ -57,7 +59,8 @@ entry:
5759
define <vscale x 1 x i64> @intrinsic_vslide1down_mask_tuma_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
5860
; CHECK-LABEL: intrinsic_vslide1down_mask_tuma_vx_nxv1i64_nxv1i64_i64:
5961
; CHECK: # %bb.0: # %entry
60-
; CHECK-NEXT: slli a3, a2, 1
62+
; CHECK-NEXT: vsetvli a3, a2, e64, m1, ta, mu
63+
; CHECK-NEXT: slli a3, a3, 1
6164
; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu
6265
; CHECK-NEXT: vslide1down.vx v9, v9, a0
6366
; CHECK-NEXT: vslide1down.vx v9, v9, a1
@@ -79,6 +82,7 @@ entry:
7982
define <vscale x 1 x i64> @intrinsic_vslide1down_mask_tama_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
8083
; CHECK-LABEL: intrinsic_vslide1down_mask_tama_vx_nxv1i64_nxv1i64_i64:
8184
; CHECK: # %bb.0: # %entry
85+
; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu
8286
; CHECK-NEXT: slli a2, a2, 1
8387
; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu
8488
; CHECK-NEXT: vslide1down.vx v8, v8, a0
@@ -98,6 +102,7 @@ entry:
98102
define <vscale x 1 x i64> @intrinsic_vslide1down_mask_tama_undef_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
99103
; CHECK-LABEL: intrinsic_vslide1down_mask_tama_undef_mask_vx_nxv1i64_nxv1i64_i64:
100104
; CHECK: # %bb.0: # %entry
105+
; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu
101106
; CHECK-NEXT: slli a2, a2, 1
102107
; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu
103108
; CHECK-NEXT: vslide1down.vx v8, v8, a0

llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -886,6 +886,7 @@ declare <vscale x 1 x i64> @llvm.riscv.vslide1down.nxv1i64(
886886
define <vscale x 1 x i64> @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, iXLen %3) nounwind {
887887
; RV32-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64:
888888
; RV32: # %bb.0: # %entry
889+
; RV32-NEXT: vsetvli a2, a2, e64, m1, ta, mu
889890
; RV32-NEXT: slli a2, a2, 1
890891
; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, mu
891892
; RV32-NEXT: vmv1r.v v10, v8
@@ -917,6 +918,7 @@ declare <vscale x 1 x i64> @llvm.riscv.vslide1up.nxv1i64.i64(
917918
define <vscale x 1 x i64> @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, iXLen %3) nounwind {
918919
; RV32-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64:
919920
; RV32: # %bb.0: # %entry
921+
; RV32-NEXT: vsetvli a2, a2, e64, m1, ta, mu
920922
; RV32-NEXT: slli a2, a2, 1
921923
; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, mu
922924
; RV32-NEXT: vmv1r.v v10, v8

0 commit comments

Comments
 (0)