Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 21 additions & 66 deletions llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,8 @@
// The reason why we need to do this:
// 1. When tracking register pressure, we don't track physical registers.
// 2. We have a RegisterClass for mask register (which is `VMV0`), but we don't
// use it in most RVV pseudos (only used in inline asm constraint and add/sub
// with carry instructions). Instead, we use physical register V0 directly
// and insert a `$v0 = COPY ...` before the use. And, there is a fundamental
// issue in register allocator when handling RegisterClass with only one
// physical register, so we can't simply replace V0 with VMV0.
// use it by the time we reach scheduling. Instead, we use physical
// register V0 directly and insert a `$v0 = COPY ...` before the use.
// 3. For mask producers, we are using VR RegisterClass (we can allocate V0-V31
// to it). So if V0 is not available, there are still 31 available registers
// out there.
Expand All @@ -43,66 +40,24 @@

namespace llvm {

static inline bool isVectorMaskProducer(const MachineInstr *MI) {
switch (RISCV::getRVVMCOpcode(MI->getOpcode())) {
// Vector Mask Instructions
case RISCV::VMAND_MM:
case RISCV::VMNAND_MM:
case RISCV::VMANDN_MM:
case RISCV::VMXOR_MM:
case RISCV::VMOR_MM:
case RISCV::VMNOR_MM:
case RISCV::VMORN_MM:
case RISCV::VMXNOR_MM:
case RISCV::VMSBF_M:
case RISCV::VMSIF_M:
case RISCV::VMSOF_M:
// Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
case RISCV::VMADC_VV:
case RISCV::VMADC_VX:
case RISCV::VMADC_VI:
case RISCV::VMADC_VVM:
case RISCV::VMADC_VXM:
case RISCV::VMADC_VIM:
case RISCV::VMSBC_VV:
case RISCV::VMSBC_VX:
case RISCV::VMSBC_VVM:
case RISCV::VMSBC_VXM:
// Vector Integer Compare Instructions
case RISCV::VMSEQ_VV:
case RISCV::VMSEQ_VX:
case RISCV::VMSEQ_VI:
case RISCV::VMSNE_VV:
case RISCV::VMSNE_VX:
case RISCV::VMSNE_VI:
case RISCV::VMSLT_VV:
case RISCV::VMSLT_VX:
case RISCV::VMSLTU_VV:
case RISCV::VMSLTU_VX:
case RISCV::VMSLE_VV:
case RISCV::VMSLE_VX:
case RISCV::VMSLE_VI:
case RISCV::VMSLEU_VV:
case RISCV::VMSLEU_VX:
case RISCV::VMSLEU_VI:
case RISCV::VMSGTU_VX:
case RISCV::VMSGTU_VI:
case RISCV::VMSGT_VX:
case RISCV::VMSGT_VI:
// Vector Floating-Point Compare Instructions
case RISCV::VMFEQ_VV:
case RISCV::VMFEQ_VF:
case RISCV::VMFNE_VV:
case RISCV::VMFNE_VF:
case RISCV::VMFLT_VV:
case RISCV::VMFLT_VF:
case RISCV::VMFLE_VV:
case RISCV::VMFLE_VF:
case RISCV::VMFGT_VF:
case RISCV::VMFGE_VF:
return true;
}
return false;
static bool isCopyToV0(const MachineInstr &MI) {
return MI.isCopy() && MI.getOperand(0).getReg() == RISCV::V0 &&
MI.getOperand(1).getReg().isVirtual() &&
MI.getOperand(1).getSubReg() == RISCV::NoSubRegister;
}

static bool isSoleUseCopyToV0(SUnit &SU) {
if (SU.Succs.size() != 1)
return false;
SDep &Dep = SU.Succs[0];
// Ignore dependencies other than data or strong ordering.
if (Dep.isWeak())
return false;

SUnit &DepSU = *Dep.getSUnit();
if (DepSU.isBoundaryNode())
return false;
return isCopyToV0(*DepSU.getInstr());
}

class RISCVVectorMaskDAGMutation : public ScheduleDAGMutation {
Expand All @@ -119,7 +74,7 @@ class RISCVVectorMaskDAGMutation : public ScheduleDAGMutation {
if (MI->findRegisterUseOperand(RISCV::V0, TRI))
NearestUseV0SU = &SU;

if (NearestUseV0SU && NearestUseV0SU != &SU && isVectorMaskProducer(MI) &&
if (NearestUseV0SU && NearestUseV0SU != &SU && isSoleUseCopyToV0(SU) &&
// For LMUL=8 cases, there will be more possibilities to spill.
// FIXME: We should use RegPressureTracker to do fine-grained
// controls.
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,11 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x,
; CHECK-NEXT: vslideup.vi v12, v10, 2, v0.t
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v0, 2
; CHECK-NEXT: vmv.v.i v10, 12
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; CHECK-NEXT: vslidedown.vi v8, v8, 6, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v0, 12
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
; CHECK-NEXT: ret
%z = shufflevector <8 x float> %x, <8 x float> %y, <4 x i32> <i32 0, i32 7, i32 8, i32 15>
Expand Down
79 changes: 39 additions & 40 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -698,15 +698,16 @@ define void @buildvec_seq_v9i8(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 73
; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: li a1, 146
; CHECK-NEXT: vmv.s.x v8, a1
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v8, v9, 2, v0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
Comment on lines -701 to +710
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This case is handled by moving RISCVVMV0Elimination to after pre-ra scheduling: lukel97@c00e2b7#diff-19fd7ee1004c9fe02050701bc140c5c670f89b7b143ffae3c9a649106c92628bR700-R711

; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <9 x i8> <i8 1, i8 2, i8 3, i8 1, i8 2, i8 3, i8 1, i8 2, i8 3>, ptr %x
Expand Down Expand Up @@ -973,27 +974,27 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16,
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vmv.v.i v9, 0
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: li a0, 512
; RV32-NEXT: li a1, 240
; RV32-NEXT: vmv.s.x v8, a1
; RV32-NEXT: li a1, 15
; RV32-NEXT: vmerge.vim v10, v9, -1, v0
; RV32-NEXT: vmerge.vim v9, v8, -1, v0
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-NEXT: vmv.v.i v12, 3
; RV32-NEXT: slli a1, a1, 8
; RV32-NEXT: vmv1r.v v0, v10
; RV32-NEXT: vmv1r.v v0, v9
; RV32-NEXT: vmerge.vim v12, v12, 0, v0
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: li a1, 15
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
; RV32-NEXT: vmerge.vim v10, v9, -1, v0
; RV32-NEXT: vmv.s.x v8, a1
; RV32-NEXT: vmv1r.v v0, v10
; RV32-NEXT: vmerge.vim v9, v8, -1, v0
; RV32-NEXT: slli a1, a1, 8
; RV32-NEXT: vmv1r.v v0, v9
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-NEXT: vmerge.vim v12, v12, 1, v0
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
; RV32-NEXT: vmerge.vim v8, v9, -1, v0
; RV32-NEXT: vmerge.vim v8, v8, -1, v0
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-NEXT: vmerge.vim v8, v12, 2, v0
Expand All @@ -1003,25 +1004,23 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16,
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.i v0, 3
; RV64V-NEXT: vmv.v.i v9, 0
; RV64V-NEXT: vmv.v.i v8, 0
; RV64V-NEXT: li a0, 512
; RV64V-NEXT: vmv.v.i v8, 12
; RV64V-NEXT: li a1, 48
; RV64V-NEXT: vmerge.vim v10, v9, -1, v0
; RV64V-NEXT: vmerge.vim v9, v8, -1, v0
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64V-NEXT: vmv.v.i v12, 3
; RV64V-NEXT: vmv1r.v v0, v10
; RV64V-NEXT: vmv1r.v v0, v9
; RV64V-NEXT: vmerge.vim v12, v12, 0, v0
; RV64V-NEXT: vmv1r.v v0, v8
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64V-NEXT: vmerge.vim v10, v9, -1, v0
; RV64V-NEXT: vmv.s.x v8, a1
; RV64V-NEXT: vmv.v.v v0, v10
; RV64V-NEXT: vmv.v.i v0, 12
; RV64V-NEXT: vmerge.vim v9, v8, -1, v0
; RV64V-NEXT: li a1, 48
; RV64V-NEXT: vmv.v.v v0, v9
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64V-NEXT: vmerge.vim v12, v12, 1, v0
; RV64V-NEXT: vmv1r.v v0, v8
; RV64V-NEXT: vmv.s.x v0, a1
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64V-NEXT: vmerge.vim v8, v9, -1, v0
; RV64V-NEXT: vmerge.vim v8, v8, -1, v0
; RV64V-NEXT: vmv.v.v v0, v8
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64V-NEXT: vmerge.vim v8, v12, 2, v0
Expand All @@ -1031,27 +1030,27 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16,
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.i v0, 15
; RV64ZVE32-NEXT: vmv.v.i v9, 0
; RV64ZVE32-NEXT: vmv.v.i v8, 0
; RV64ZVE32-NEXT: li a0, 512
; RV64ZVE32-NEXT: li a1, 240
; RV64ZVE32-NEXT: vmv.s.x v8, a1
; RV64ZVE32-NEXT: li a1, 15
; RV64ZVE32-NEXT: vmerge.vim v10, v9, -1, v0
; RV64ZVE32-NEXT: vmerge.vim v9, v8, -1, v0
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64ZVE32-NEXT: vmv.v.i v12, 3
; RV64ZVE32-NEXT: slli a1, a1, 8
; RV64ZVE32-NEXT: vmv1r.v v0, v10
; RV64ZVE32-NEXT: vmv1r.v v0, v9
; RV64ZVE32-NEXT: vmerge.vim v12, v12, 0, v0
; RV64ZVE32-NEXT: vmv1r.v v0, v8
; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, a1
; RV64ZVE32-NEXT: li a1, 15
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmerge.vim v10, v9, -1, v0
; RV64ZVE32-NEXT: vmv.s.x v8, a1
; RV64ZVE32-NEXT: vmv.v.v v0, v10
; RV64ZVE32-NEXT: vmerge.vim v9, v8, -1, v0
; RV64ZVE32-NEXT: slli a1, a1, 8
; RV64ZVE32-NEXT: vmv.v.v v0, v9
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64ZVE32-NEXT: vmerge.vim v12, v12, 1, v0
; RV64ZVE32-NEXT: vmv1r.v v0, v8
; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, a1
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmerge.vim v8, v9, -1, v0
; RV64ZVE32-NEXT: vmerge.vim v8, v8, -1, v0
; RV64ZVE32-NEXT: vmv.v.v v0, v8
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64ZVE32-NEXT: vmerge.vim v8, v12, 2, v0
Expand Down
16 changes: 7 additions & 9 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,10 @@ define <4 x i16> @vrgather_shuffle_vx_v4i16(<4 x i16> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vmv.v.i v0, 2
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vslidedown.vi v8, v8, 2, v0.t
; CHECK-NEXT: vmv.v.i v10, 5
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: vmv.v.i v0, 3
; CHECK-NEXT: vmv.v.i v9, 5
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
; CHECK-NEXT: ret
%s = shufflevector <4 x i16> %x, <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
ret <4 x i16> %s
Expand Down Expand Up @@ -971,13 +970,12 @@ define <8 x i32> @shuffle_repeat3_singlesrc_e32(<8 x i32> %v) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v0, 7
; CHECK-NEXT: vmv.v.i v10, 1
; CHECK-NEXT: vmv.v.i v9, 1
; CHECK-NEXT: li a0, 192
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vmerge.vim v9, v9, 0, v0
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: vmerge.vim v10, v10, 0, v0
; CHECK-NEXT: vmv.v.v v0, v9
; CHECK-NEXT: vmerge.vim v9, v10, 2, v0
; CHECK-NEXT: vmerge.vim v9, v9, 2, v0
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vslidedown.vx v10, v9, a0
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
Expand Down
Loading
Loading