Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 61 additions & 3 deletions llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ using namespace llvm;
STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");

static cl::opt<bool> EnsureWholeVectorRegisterMoveValidVTYPE(
DEBUG_TYPE "-whole-vector-register-move-valid-vtype", cl::Hidden,
cl::desc("Insert vsetvlis before vmvNr.vs to ensure vtype is valid and "
"vill is cleared"),
cl::init(true));

namespace {

/// Given a virtual register \p Reg, return the corresponding VNInfo for it.
Expand Down Expand Up @@ -195,6 +201,14 @@ static bool hasUndefinedPassthru(const MachineInstr &MI) {
return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
}

/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
static bool isVectorCopy(const TargetRegisterInfo *TRI,
const MachineInstr &MI) {
return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() &&
RISCVRegisterInfo::isRVVRegClass(
TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg()));
}

/// Which subfields of VL or VTYPE have values we need to preserve?
struct DemandedFields {
// Some unknown property of VL is used. If demanded, must preserve entire
Expand All @@ -221,10 +235,13 @@ struct DemandedFields {
bool SEWLMULRatio = false;
bool TailPolicy = false;
bool MaskPolicy = false;
// If this is true, we demand that VTYPE is set to some legal state, i.e. that
// vill is unset.
bool VILL = false;

// Return true if any part of VTYPE was used
bool usedVTYPE() const {
return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
}

// Return true if any property of VL was used
Expand All @@ -239,6 +256,7 @@ struct DemandedFields {
SEWLMULRatio = true;
TailPolicy = true;
MaskPolicy = true;
VILL = true;
}

// Mark all VL properties as demanded
Expand All @@ -263,6 +281,7 @@ struct DemandedFields {
SEWLMULRatio |= B.SEWLMULRatio;
TailPolicy |= B.TailPolicy;
MaskPolicy |= B.MaskPolicy;
VILL |= B.VILL;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down Expand Up @@ -308,7 +327,8 @@ struct DemandedFields {
OS << ", ";
OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
OS << "TailPolicy=" << TailPolicy << ", ";
OS << "MaskPolicy=" << MaskPolicy;
OS << "MaskPolicy=" << MaskPolicy << ", ";
OS << "VILL=" << VILL;
OS << "}";
}
#endif
Expand Down Expand Up @@ -503,6 +523,21 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
}
}

// In §32.16.6, whole vector register moves have a dependency on SEW. At the
// MIR level though we don't encode the element type, and it gives the same
// result whatever the SEW may be.
//
// However it does need valid SEW, i.e. vill must be cleared. The entry to a
// function, calls and inline assembly may all set it, so make sure we clear
// it for whole register copies. Do this by leaving VILL demanded.
if (isVectorCopy(ST->getRegisterInfo(), MI)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a cl::opt for testing? I think simple having it conditional here would effectively disable this change, and having a quick way to rule in/out this change might be useful going forward.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added it under -riscv-insert-vsetvli-whole-vector-register-move-valid-vtype=false. I ended up having to put the check in emitVSETVLIs because it adds an implicit use on VTYPE there, and getting rid of the implicit use seems to undo the change.

Res.LMUL = DemandedFields::LMULNone;
Res.SEW = DemandedFields::SEWNone;
Res.SEWLMULRatio = false;
Res.TailPolicy = false;
Res.MaskPolicy = false;
}

return Res;
}

Expand Down Expand Up @@ -1208,6 +1243,18 @@ static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
// legal for MI, but may not be the state requested by MI.
void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
const MachineInstr &MI) const {
if (isVectorCopy(ST->getRegisterInfo(), MI) &&
(Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) {
// Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may
// be coalesced into another vsetvli since we won't demand any fields.
VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
NewInfo.setAVLImm(1);
NewInfo.setVTYPE(RISCVII::VLMUL::LMUL_1, /*sew*/ 8, /*ta*/ true,
/*ma*/ true);
Info = NewInfo;
return;
}

if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
return;

Expand Down Expand Up @@ -1296,7 +1343,8 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
for (const MachineInstr &MI : MBB) {
transferBefore(Info, MI);

if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
isVectorCopy(ST->getRegisterInfo(), MI))
HadVectorOp = true;

transferAfter(Info, MI);
Expand Down Expand Up @@ -1426,6 +1474,16 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
PrefixTransparent = false;
}

if (EnsureWholeVectorRegisterMoveValidVTYPE &&
isVectorCopy(ST->getRegisterInfo(), MI)) {
if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a followup, we can reduce the number of vsetvli's emitted by integrating this with the needVSETVLIPHI logic below. Definitely non blocking.

insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
PrefixTransparent = false;
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
/*isImp*/ true));
}

uint64_t TSFlags = MI.getDesc().TSFlags;
if (RISCVII::hasSEWOp(TSFlags)) {
if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/RISCV/inline-asm-v-constraint.ll
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ define <vscale x 1 x i8> @constraint_vd(<vscale x 1 x i8> %0, <vscale x 1 x i8>
define <vscale x 1 x i1> @constraint_vm(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1) nounwind {
; RV32I-LABEL: constraint_vm:
; RV32I: # %bb.0:
; RV32I-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32I-NEXT: vmv1r.v v9, v0
; RV32I-NEXT: vmv1r.v v0, v8
; RV32I-NEXT: #APP
Expand All @@ -54,6 +55,7 @@ define <vscale x 1 x i1> @constraint_vm(<vscale x 1 x i1> %0, <vscale x 1 x i1>
;
; RV64I-LABEL: constraint_vm:
; RV64I: # %bb.0:
; RV64I-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64I-NEXT: vmv1r.v v9, v0
; RV64I-NEXT: vmv1r.v v0, v8
; RV64I-NEXT: #APP
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/rvv/abs-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ define <vscale x 16 x i64> @vp_abs_nxv16i64(<vscale x 16 x i64> %va, <vscale x 1
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
Expand All @@ -576,7 +577,6 @@ define <vscale x 16 x i64> @vp_abs_nxv16i64(<vscale x 16 x i64> %va, <vscale x 1
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: sub a3, a0, a1
; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: sltu a2, a0, a3
; CHECK-NEXT: addi a2, a2, -1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3075,6 +3075,7 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
Expand All @@ -3086,7 +3087,6 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
; CHECK-NEXT: lui a2, 3
; CHECK-NEXT: srli a4, a3, 1
; CHECK-NEXT: slli a3, a3, 2
; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v0, a4
; CHECK-NEXT: sub a4, a0, a3
; CHECK-NEXT: sltu a5, a0, a4
Expand Down Expand Up @@ -3158,11 +3158,11 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-ZVBB-NEXT: vmv1r.v v24, v0
; CHECK-ZVBB-NEXT: csrr a1, vlenb
; CHECK-ZVBB-NEXT: srli a2, a1, 1
; CHECK-ZVBB-NEXT: slli a1, a1, 2
; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2
; CHECK-ZVBB-NEXT: sub a2, a0, a1
; CHECK-ZVBB-NEXT: sltu a3, a0, a2
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1584,6 +1584,7 @@ define <vscale x 64 x i16> @vp_bswap_nxv64i16(<vscale x 64 x i16> %va, <vscale x
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
Expand All @@ -1593,7 +1594,6 @@ define <vscale x 64 x i16> @vp_bswap_nxv64i16(<vscale x 64 x i16> %va, <vscale x
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 1
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v0, a2
; CHECK-NEXT: sub a2, a0, a1
; CHECK-NEXT: sltu a3, a0, a2
Expand Down Expand Up @@ -1631,11 +1631,11 @@ define <vscale x 64 x i16> @vp_bswap_nxv64i16(<vscale x 64 x i16> %va, <vscale x
;
; CHECK-ZVKB-LABEL: vp_bswap_nxv64i16:
; CHECK-ZVKB: # %bb.0:
; CHECK-ZVKB-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-ZVKB-NEXT: vmv1r.v v24, v0
; CHECK-ZVKB-NEXT: csrr a1, vlenb
; CHECK-ZVKB-NEXT: srli a2, a1, 1
; CHECK-ZVKB-NEXT: slli a1, a1, 2
; CHECK-ZVKB-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-ZVKB-NEXT: vslidedown.vx v0, v0, a2
; CHECK-ZVKB-NEXT: sub a2, a0, a1
; CHECK-ZVKB-NEXT: sltu a3, a0, a2
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: li a3, 2
; RV32-NEXT: vs8r.v v16, (a1)
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32-NEXT: vmv8r.v v8, v0
; RV32-NEXT: vmv8r.v v16, v24
; RV32-NEXT: call ext2
Expand Down Expand Up @@ -374,6 +375,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_i32(<vsca
; RV64-NEXT: add a1, a3, a1
; RV64-NEXT: li a3, 2
; RV64-NEXT: vs8r.v v16, (a1)
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64-NEXT: vmv8r.v v8, v0
; RV64-NEXT: vmv8r.v v16, v24
; RV64-NEXT: call ext2
Expand Down Expand Up @@ -451,6 +453,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 128
; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32-NEXT: vmv8r.v v16, v0
; RV32-NEXT: call ext3
; RV32-NEXT: addi sp, s0, -144
Expand Down Expand Up @@ -523,6 +526,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 128
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64-NEXT: vmv8r.v v16, v0
; RV64-NEXT: call ext3
; RV64-NEXT: addi sp, s0, -144
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @caller_tuple_return(
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: call callee_tuple_return
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32-NEXT: vmv2r.v v6, v8
; RV32-NEXT: vmv2r.v v8, v10
; RV32-NEXT: vmv2r.v v10, v6
Expand All @@ -119,6 +120,7 @@ define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @caller_tuple_return(
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: call callee_tuple_return
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64-NEXT: vmv2r.v v6, v8
; RV64-NEXT: vmv2r.v v8, v10
; RV64-NEXT: vmv2r.v v10, v6
Expand All @@ -144,6 +146,7 @@ define void @caller_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32-NEXT: vmv2r.v v6, v8
; RV32-NEXT: vmv2r.v v8, v10
; RV32-NEXT: vmv2r.v v10, v6
Expand All @@ -160,6 +163,7 @@ define void @caller_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64-NEXT: vmv2r.v v6, v8
; RV64-NEXT: vmv2r.v v8, v10
; RV64-NEXT: vmv2r.v v10, v6
Expand Down
Loading
Loading