Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 98 additions & 4 deletions llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,19 @@ static unsigned getIntegerExtensionOperandEEW(unsigned Factor,
return Log2EEW;
}

#define VSEG_CASES(Prefix, EEW) \
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of using macros, we can also use the search tables we have for segmented loads/stores here. I'm open to either way

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The macros are fine by me, I think the search tables are on the pseudos anyway so it would be weird to mix the non-pseudo + pseudo opcodes.

RISCV::Prefix##SEG2E##EEW##_V: \
case RISCV::Prefix##SEG3E##EEW##_V: \
case RISCV::Prefix##SEG4E##EEW##_V: \
case RISCV::Prefix##SEG5E##EEW##_V: \
case RISCV::Prefix##SEG6E##EEW##_V: \
case RISCV::Prefix##SEG7E##EEW##_V: \
case RISCV::Prefix##SEG8E##EEW##_V
#define VSSEG_CASES(EEW) VSEG_CASES(VS, EEW)
#define VSSSEG_CASES(EEW) VSEG_CASES(VSS, EEW)
#define VSUXSEG_CASES(EEW) VSEG_CASES(VSUX, I##EEW)
#define VSOXSEG_CASES(EEW) VSEG_CASES(VSOX, I##EEW)

static std::optional<unsigned>
getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
const MachineInstr &MI = *MO.getParent();
Expand Down Expand Up @@ -225,21 +238,29 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VSE8_V:
case RISCV::VLSE8_V:
case RISCV::VSSE8_V:
case VSSEG_CASES(8):
case VSSSEG_CASES(8):
return 3;
case RISCV::VLE16_V:
case RISCV::VSE16_V:
case RISCV::VLSE16_V:
case RISCV::VSSE16_V:
case VSSEG_CASES(16):
case VSSSEG_CASES(16):
return 4;
case RISCV::VLE32_V:
case RISCV::VSE32_V:
case RISCV::VLSE32_V:
case RISCV::VSSE32_V:
case VSSEG_CASES(32):
case VSSSEG_CASES(32):
return 5;
case RISCV::VLE64_V:
case RISCV::VSE64_V:
case RISCV::VLSE64_V:
case RISCV::VSSE64_V:
case VSSEG_CASES(64):
case VSSSEG_CASES(64):
return 6;

// Vector Indexed Instructions
Expand All @@ -248,31 +269,39 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VLUXEI8_V:
case RISCV::VLOXEI8_V:
case RISCV::VSUXEI8_V:
case RISCV::VSOXEI8_V: {
case RISCV::VSOXEI8_V:
case VSUXSEG_CASES(8):
case VSOXSEG_CASES(8): {
if (MO.getOperandNo() == 0)
return MILog2SEW;
return 3;
}
case RISCV::VLUXEI16_V:
case RISCV::VLOXEI16_V:
case RISCV::VSUXEI16_V:
case RISCV::VSOXEI16_V: {
case RISCV::VSOXEI16_V:
case VSUXSEG_CASES(16):
case VSOXSEG_CASES(16): {
if (MO.getOperandNo() == 0)
return MILog2SEW;
return 4;
}
case RISCV::VLUXEI32_V:
case RISCV::VLOXEI32_V:
case RISCV::VSUXEI32_V:
case RISCV::VSOXEI32_V: {
case RISCV::VSOXEI32_V:
case VSUXSEG_CASES(32):
case VSOXSEG_CASES(32): {
if (MO.getOperandNo() == 0)
return MILog2SEW;
return 5;
}
case RISCV::VLUXEI64_V:
case RISCV::VLOXEI64_V:
case RISCV::VSUXEI64_V:
case RISCV::VSOXEI64_V: {
case RISCV::VSOXEI64_V:
case VSUXSEG_CASES(64):
case VSOXSEG_CASES(64): {
if (MO.getOperandNo() == 0)
return MILog2SEW;
return 6;
Expand Down Expand Up @@ -1375,6 +1404,54 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
return VLOp;
}

/// Return true if MI is an instruction used for assembling registers
/// for segmented store instructions, namely, RISCVISD::TUPLE_INSERT.
/// Currently it's lowered to INSERT_SUBREG.
static bool isTupleInsertInstr(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
if (MI.getOpcode() != RISCV::INSERT_SUBREG)
return false;

const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
if (!RISCVRI::isVRegClass(DstRC->TSFlags))
return false;
unsigned NF = RISCVRI::getNF(DstRC->TSFlags);
if (NF < 2)
return false;

// Check whether INSERT_SUBREG has the correct subreg index for tuple inserts.
auto VLMul = RISCVRI::getLMul(DstRC->TSFlags);
unsigned SubRegIdx = MI.getOperand(3).getImm();
[[maybe_unused]] auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
assert(!IsFractional && "unexpected LMUL for tuple register classes");
return TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul;
}

static bool isSegmentedStoreInstr(const MachineInstr &MI) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can just add this info to TSFlags like those in RISCVII?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah we probably should, there are still plenty of space in TSFlags. I'll do that in a follow-up patch.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or a predicate in RISCVInstrPredicates?

switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
case VSSEG_CASES(8):
case VSSSEG_CASES(8):
case VSUXSEG_CASES(8):
case VSOXSEG_CASES(8):
case VSSEG_CASES(16):
case VSSSEG_CASES(16):
case VSUXSEG_CASES(16):
case VSOXSEG_CASES(16):
case VSSEG_CASES(32):
case VSSSEG_CASES(32):
case VSUXSEG_CASES(32):
case VSOXSEG_CASES(32):
case VSSEG_CASES(64):
case VSSSEG_CASES(64):
case VSUXSEG_CASES(64):
case VSOXSEG_CASES(64):
return true;
default:
return false;
}
}

std::optional<MachineOperand>
RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
std::optional<MachineOperand> CommonVL;
Expand All @@ -1395,6 +1472,23 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
continue;
}

if (isTupleInsertInstr(UserMI, *MRI)) {
LLVM_DEBUG(dbgs().indent(4) << "Peeking through uses of INSERT_SUBREG\n");
for (MachineOperand &UseOp :
MRI->use_operands(UserMI.getOperand(0).getReg())) {
const MachineInstr &CandidateMI = *UseOp.getParent();
// We should not propagate the VL if the user is not a segmented store
// or another INSERT_SUBREG, since VL just works differently
// between segmented operations (per-field) v.s. other RVV ops (on the
// whole register group).
if (!isTupleInsertInstr(CandidateMI, *MRI) &&
!isSegmentedStoreInstr(CandidateMI))
return std::nullopt;
Worklist.insert(&UseOp);
}
continue;
}

if (UserMI.isPHI()) {
// Don't follow PHI cycles
if (!PHISeen.insert(&UserMI).second)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/rvv/pr141907.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ define void @pr141907(ptr %0) nounwind {
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmclr.m v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: addi a3, sp, 20
Expand Down
Loading
Loading