Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ struct DemandedVL {
};

class RISCVVLOptimizer : public MachineFunctionPass {
const MachineRegisterInfo *MRI;
MachineRegisterInfo *MRI;
const MachineDominatorTree *MDT;
const TargetInstrInfo *TII;

Expand Down Expand Up @@ -1392,6 +1392,41 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
return true;
}

/// Given a vslidedown.vx like:
///
/// %slideamt = ADDI %x, -1
/// %v = PseudoVSLIDEDOWN_VX %passthru, %src, %slideamt, avl=1
///
/// %v will only read the first %slideamt + 1 lanes of %src, which = %x.
/// This is a common case when lowering extractelement.
///
/// Note that if %x is 0, %slideamt will be all ones. In this case %src will be
/// completely slid down and none of its lanes will be read (since %slideamt is
/// greater than the largest VLMAX of 65536) so we can demand any minimum VL.
static std::optional<DemandedVL>
getMinimumVLForVSLIDEDOWN_VX(const MachineOperand &UserOp,
const MachineRegisterInfo *MRI) {
const MachineInstr &MI = *UserOp.getParent();
if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VSLIDEDOWN_VX)
return std::nullopt;
// We're looking at what lanes are used from the src operand.
if (UserOp.getOperandNo() != 2)
return std::nullopt;
// For now, the AVL must be 1.
const MachineOperand &AVL = MI.getOperand(4);
if (!AVL.isImm() || AVL.getImm() != 1)
return std::nullopt;
// The slide amount must be %x - 1.
const MachineOperand &SlideAmt = MI.getOperand(3);
if (!SlideAmt.getReg().isVirtual())
return std::nullopt;
MachineInstr *SlideAmtDef = MRI->getUniqueVRegDef(SlideAmt.getReg());
if (SlideAmtDef->getOpcode() != RISCV::ADDI ||
Copy link
Collaborator

@topperc topperc Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible for this ADDI to be an LI? In which case Operand 1 is X0 and not a virtual register.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Woops yes, fixed in 0fa31a4

SlideAmtDef->getOperand(2).getImm() != -AVL.getImm())
return std::nullopt;
return SlideAmtDef->getOperand(1);
}

DemandedVL
RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
const MachineInstr &UserMI = *UserOp.getParent();
Expand All @@ -1406,6 +1441,9 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
return DemandedVL::vlmax();
}

if (auto VL = getMinimumVLForVSLIDEDOWN_VX(UserOp, MRI))
return *VL;

if (RISCVII::readsPastVL(
TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) {
LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
Expand Down Expand Up @@ -1624,6 +1662,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {

// All our checks passed. We can reduce VL.
VLOp.ChangeToRegister(CommonVL->getReg(), false);
MRI->constrainRegClass(CommonVL->getReg(), &RISCV::GPRNoX0RegClass);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we're taking the demanded VL from an ADDI's operands, the AVL may be a plain GPR virtual register now. So we need to constrain it to GPRNoX0. This doesn't seem to affect any existing AVLs that are reduced.

return true;
}

Expand Down
44 changes: 44 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vl-opt-live-out.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s

define i32 @loop_live_out(ptr %p, i64 %n) {
; CHECK-LABEL: loop_live_out:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mv a2, a0
; CHECK-NEXT: .LBB0_1: # %loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetvli a3, a1, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a2)
; CHECK-NEXT: sub a1, a1, a3
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: vse32.v v8, (a2)
; CHECK-NEXT: slli a2, a3, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: bnez a1, .LBB0_1
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a3
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
entry:
br label %loop

loop:
%avl = phi i64 [%n, %entry], [%avl.next, %loop]
%gep = phi ptr [%p, %entry], [%gep.next, %loop]
%vl = call i32 @llvm.experimental.get.vector.length(i64 %avl, i32 4, i1 true)
%x = call <vscale x 4 x i32> @llvm.vp.load(ptr %gep, <vscale x 4 x i1> splat (i1 true), i32 %vl)
%y = add <vscale x 4 x i32> %x, splat (i32 1)
call void @llvm.vp.store(<vscale x 4 x i32> %y, ptr %gep, <vscale x 4 x i1> splat (i1 true), i32 %vl)
%vl.zext = zext i32 %vl to i64
%avl.next = sub i64 %avl, %vl.zext
%gep.next = getelementptr i32, ptr %p, i32 %vl
%ec = icmp eq i64 %avl.next, 0
br i1 %ec, label %exit, label %loop

exit:
%lastidx = sub i64 %vl.zext, 1
%lastelt = extractelement <vscale x 4 x i32> %y, i64 %lastidx
ret i32 %lastelt
}
18 changes: 18 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
Original file line number Diff line number Diff line change
Expand Up @@ -778,3 +778,21 @@ body: |
; CHECK: DBG_VALUE %0:vr
DBG_VALUE %0:vr
...
---
name: vslidedown_vx
tracksRegLiveness: true
body: |
bb.0:
liveins: $x8
; CHECK-LABEL: name: vslidedown_vx
; CHECK: liveins: $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x:gprnox0 = COPY $x8
; CHECK-NEXT: %y:gprnox0 = ADDI %x, -1
; CHECK-NEXT: %v:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, %x, 5 /* e32 */, 0 /* tu, mu */
; CHECK-NEXT: %w:vr = PseudoVSLIDEDOWN_VX_M1 $noreg, %v, %y, 1, 5 /* e32 */, 0 /* tu, mu */
%x:gpr = COPY $x8
%y:gprnox0 = ADDI %x, -1
%v:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 0 /* tu, mu */
%w:vr = PseudoVSLIDEDOWN_VX_M1 $noreg, %v, %y, 1, 5 /* e32 */, 0 /* tu, mu */
...
Loading