From 5df264046fedd0e543b9499f286d14b098ef87d3 Mon Sep 17 00:00:00 2001 From: Min Hsu Date: Tue, 26 Aug 2025 10:40:13 -0700 Subject: [PATCH 1/9] [RISCV][VLOPT] Support segmented store instructions --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 93 +++++++- llvm/test/CodeGen/RISCV/rvv/pr141907.ll | 2 +- .../test/CodeGen/RISCV/rvv/vl-opt-op-info.mir | 216 ++++++++++++++++++ 3 files changed, 306 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index b7c93ffea41f7..c57b08172c821 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -178,6 +178,19 @@ static unsigned getIntegerExtensionOperandEEW(unsigned Factor, return Log2EEW; } +#define VSEG_CASES(Prefix, EEW) \ + RISCV::Prefix##SEG2E##EEW##_V: \ + case RISCV::Prefix##SEG3E##EEW##_V: \ + case RISCV::Prefix##SEG4E##EEW##_V: \ + case RISCV::Prefix##SEG5E##EEW##_V: \ + case RISCV::Prefix##SEG6E##EEW##_V: \ + case RISCV::Prefix##SEG7E##EEW##_V: \ + case RISCV::Prefix##SEG8E##EEW##_V +#define VSSEG_CASES(EEW) VSEG_CASES(VS, EEW) +#define VSSSEG_CASES(EEW) VSEG_CASES(VSS, EEW) +#define VSUXSEG_CASES(EEW) VSEG_CASES(VSUX, I##EEW) +#define VSOXSEG_CASES(EEW) VSEG_CASES(VSOX, I##EEW) + static std::optional getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { const MachineInstr &MI = *MO.getParent(); @@ -225,21 +238,29 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VSE8_V: case RISCV::VLSE8_V: case RISCV::VSSE8_V: + case VSSEG_CASES(8): + case VSSSEG_CASES(8): return 3; case RISCV::VLE16_V: case RISCV::VSE16_V: case RISCV::VLSE16_V: case RISCV::VSSE16_V: + case VSSEG_CASES(16): + case VSSSEG_CASES(16): return 4; case RISCV::VLE32_V: case RISCV::VSE32_V: case RISCV::VLSE32_V: case RISCV::VSSE32_V: + case VSSEG_CASES(32): + case VSSSEG_CASES(32): return 5; case RISCV::VLE64_V: case RISCV::VSE64_V: case RISCV::VLSE64_V: case RISCV::VSSE64_V: + case VSSEG_CASES(64): + case VSSSEG_CASES(64): return 6; // Vector Indexed Instructions @@ -248,7 +269,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VLUXEI8_V: case RISCV::VLOXEI8_V: case RISCV::VSUXEI8_V: - case RISCV::VSOXEI8_V: { + case RISCV::VSOXEI8_V: + case VSUXSEG_CASES(8): + case VSOXSEG_CASES(8): { if (MO.getOperandNo() == 0) return MILog2SEW; return 3; @@ -256,7 +279,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VLUXEI16_V: case RISCV::VLOXEI16_V: case RISCV::VSUXEI16_V: - case RISCV::VSOXEI16_V: { + case RISCV::VSOXEI16_V: + case VSUXSEG_CASES(16): + case VSOXSEG_CASES(16): { if (MO.getOperandNo() == 0) return MILog2SEW; return 4; @@ -264,7 +289,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VLUXEI32_V: case RISCV::VLOXEI32_V: case RISCV::VSUXEI32_V: - case RISCV::VSOXEI32_V: { + case RISCV::VSOXEI32_V: + case VSUXSEG_CASES(32): + case VSOXSEG_CASES(32): { if (MO.getOperandNo() == 0) return MILog2SEW; return 5; @@ -272,7 +299,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VLUXEI64_V: case RISCV::VLOXEI64_V: case RISCV::VSUXEI64_V: - case RISCV::VSOXEI64_V: { + case RISCV::VSOXEI64_V: + case VSUXSEG_CASES(64): + case VSOXSEG_CASES(64): { if (MO.getOperandNo() == 0) return MILog2SEW; return 6; @@ -1375,6 +1404,55 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { return VLOp; } +/// Return true if MI is an instruction used for assembling registers +/// for segmented store instructions, namely, RISCVISD::TUPLE_INSERT. +/// Currently it's lowered to INSERT_SUBREG. +static bool isTupleInsertInstr(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + if (MI.getOpcode() != RISCV::INSERT_SUBREG) + return false; + + const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); + // Check whether it was lowered with the correct subreg index. + [[maybe_unused]] const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + [[maybe_unused]] unsigned SubRegIdx = MI.getOperand(3).getImm(); + switch (DstRC->getID()) { + case RISCV::VRN2M1RegClassID: + case RISCV::VRN2M1NoV0RegClassID: + case RISCV::VRN3M1RegClassID: + case RISCV::VRN3M1NoV0RegClassID: + case RISCV::VRN4M1RegClassID: + case RISCV::VRN4M1NoV0RegClassID: + case RISCV::VRN5M1RegClassID: + case RISCV::VRN5M1NoV0RegClassID: + case RISCV::VRN6M1RegClassID: + case RISCV::VRN6M1NoV0RegClassID: + case RISCV::VRN7M1RegClassID: + case RISCV::VRN7M1NoV0RegClassID: + case RISCV::VRN8M1RegClassID: + case RISCV::VRN8M1NoV0RegClassID: + assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock && + "unexpected subreg index for VRM1 sub-register"); + return true; + case RISCV::VRN2M2RegClassID: + case RISCV::VRN2M2NoV0RegClassID: + case RISCV::VRN3M2RegClassID: + case RISCV::VRN3M2NoV0RegClassID: + case RISCV::VRN4M2RegClassID: + case RISCV::VRN4M2NoV0RegClassID: + assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * 2 && + "unexpected subreg index for VRM2 sub-register"); + return true; + case RISCV::VRN2M4RegClassID: + case RISCV::VRN2M4NoV0RegClassID: + assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * 4 && + "unexpected subreg index for VRM4 sub-register"); + return true; + default: + return false; + } +} + std::optional RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { std::optional CommonVL; @@ -1395,6 +1473,13 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { continue; } + if (isTupleInsertInstr(UserMI, *MRI)) { + LLVM_DEBUG(dbgs().indent(4) << "Peeking through uses of INSERT_SUBREG\n"); + Worklist.insert_range(llvm::make_pointer_range( + MRI->use_operands(UserMI.getOperand(0).getReg()))); + continue; + } + if (UserMI.isPHI()) { // Don't follow PHI cycles if (!PHISeen.insert(&UserMI).second) diff --git a/llvm/test/CodeGen/RISCV/rvv/pr141907.ll b/llvm/test/CodeGen/RISCV/rvv/pr141907.ll index f93f88a5bc06c..1f485ea348396 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr141907.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr141907.ll @@ -12,7 +12,7 @@ define void @pr141907(ptr %0) nounwind { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: addi a3, sp, 20 diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir index 4e428fd1a6cea..e6d2f133ed7fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir @@ -2192,3 +2192,219 @@ body: | %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 4 /* e16 */, 0 %y:vr = PseudoVRGATHEREI16_VV_M1_E32_MF2 $noreg, $noreg, %x, 1, 5 /* e32 */, 0 $v8 = COPY %y +... +--- +name: vsseg3e32_v +body: | + bb.0: + liveins: $v8 + + ; CHECK-LABEL: name: vsseg3e32_v + ; CHECK: liveins: $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0 + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1 + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2 + ; CHECK-NEXT: PseudoVSSEG3E32_V_M1 killed [[INSERT_SUBREG2]], $noreg, 1, 5 /* e32 */ + %0:vr = COPY $v8 + %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */ + %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + %6:vrn3m1 = IMPLICIT_DEF + %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0 + %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1 + %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2 + PseudoVSSEG3E32_V_M1 killed %8, $noreg, 1, 5 /* e32 */ +... +--- +name: vsseg3e64_v_incompatible_eew +body: | + bb.0: + liveins: $v8 + + ; CHECK-LABEL: name: vsseg3e64_v_incompatible_eew + ; CHECK: liveins: $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0 + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1 + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2 + ; CHECK-NEXT: PseudoVSSEG3E64_V_M1 killed [[INSERT_SUBREG2]], $noreg, 1, 6 /* e64 */ + %0:vr = COPY $v8 + %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */ + %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + %6:vrn3m1 = IMPLICIT_DEF + %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0 + %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1 + %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2 + PseudoVSSEG3E64_V_M1 killed %8, $noreg, 1, 6 /* e64 */ +... +--- +name: vsseg3e32_v_incompatible_emul +body: | + bb.0: + liveins: $v8 + + ; CHECK-LABEL: name: vsseg3e32_v_incompatible_emul + ; CHECK: liveins: $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0 + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1 + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2 + ; CHECK-NEXT: PseudoVSSEG3E32_V_M1 killed [[INSERT_SUBREG2]], $noreg, 1, 6 /* e64 */ + %0:vr = COPY $v8 + %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */ + %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + %6:vrn3m1 = IMPLICIT_DEF + %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0 + %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1 + %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2 + PseudoVSSEG3E32_V_M1 killed %8, $noreg, 1, 6 /* e64 */ +... +--- +name: vssseg3e32_v +body: | + bb.0: + liveins: $v8 + + ; CHECK-LABEL: name: vssseg3e32_v + ; CHECK: liveins: $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0 + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1 + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2 + ; CHECK-NEXT: PseudoVSSSEG3E32_V_M1 killed [[INSERT_SUBREG2]], $noreg, $noreg, 1, 5 /* e32 */ + %0:vr = COPY $v8 + %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */ + %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + %6:vrn3m1 = IMPLICIT_DEF + %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0 + %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1 + %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2 + PseudoVSSSEG3E32_V_M1 killed %8, $noreg, $noreg, 1, 5 /* e32 */ +... +--- +name: vsuxseg3ei64_v +body: | + bb.0: + liveins: $v8 + + ; CHECK-LABEL: name: vsuxseg3ei64_v + ; CHECK: liveins: $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0 + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1 + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2 + ; CHECK-NEXT: PseudoVSUXSEG3EI64_V_M2_M1 killed [[INSERT_SUBREG2]], $noreg, $noreg, 1, 5 /* e32 */ + %0:vr = COPY $v8 + %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */ + %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + %6:vrn3m1 = IMPLICIT_DEF + %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0 + %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1 + %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2 + PseudoVSUXSEG3EI64_V_M2_M1 killed %8, $noreg, $noreg, 1, 5 /* e32 */ +... +--- +name: vsuxseg3ei64_v_incompatible_data_eew +body: | + bb.0: + liveins: $v8 + + ; CHECK-LABEL: name: vsuxseg3ei64_v_incompatible_data_eew + ; CHECK: liveins: $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 6 /* e64 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 6 /* e64 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0 + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1 + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2 + ; CHECK-NEXT: PseudoVSUXSEG3EI64_V_M2_M1 killed [[INSERT_SUBREG2]], $noreg, $noreg, 1, 5 /* e32 */ + %0:vr = COPY $v8 + %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 6 /* e64 */, 3 /* ta, ma */ + %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 6 /* e64 */, 3 /* ta, ma */ + %6:vrn3m1 = IMPLICIT_DEF + %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0 + %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1 + %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2 + PseudoVSUXSEG3EI64_V_M2_M1 killed %8, $noreg, $noreg, 1, 5 /* e32 */ +... +--- +name: vsuxseg3ei32_v_index +body: | + bb.0: + + ; CHECK-LABEL: name: vsuxseg3ei32_v_index + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: PseudoVSUXSEG3EI32_V_M1_M2 $noreg, $noreg, [[PseudoVADD_VV_M1_]], 1, 6 /* e64 */ + %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + PseudoVSUXSEG3EI32_V_M1_M2 $noreg, $noreg, %2, 1, 6 /* e64 */ +... +--- +name: vsuxseg3ei32_v_incompatible_index_eew +body: | + bb.0: + + ; CHECK-LABEL: name: vsuxseg3ei32_v_incompatible_index_eew + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 6 /* e64 */, 3 /* ta, ma */ + ; CHECK-NEXT: PseudoVSUXSEG3EI32_V_M1_M2 $noreg, $noreg, [[PseudoVADD_VV_M1_]], 1, 6 /* e64 */ + %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 6 /* e64 */, 3 /* ta, ma */ + PseudoVSUXSEG3EI32_V_M1_M2 $noreg, $noreg, %2, 1, 6 /* e64 */ +... +--- +name: vsoxseg3ei64_v +body: | + bb.0: + liveins: $v8 + + ; CHECK-LABEL: name: vsoxseg3ei64_v + ; CHECK: liveins: $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0 + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_1 + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_1]], %subreg.sub_vrm1_2 + ; CHECK-NEXT: PseudoVSOXSEG3EI64_V_M2_M1 killed [[INSERT_SUBREG2]], $noreg, $noreg, 1, 5 /* e32 */ + %0:vr = COPY $v8 + %1:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */ + %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + %6:vrn3m1 = IMPLICIT_DEF + %5:vrn3m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0 + %7:vrn3m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1 + %8:vrn3m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2 + PseudoVSOXSEG3EI64_V_M2_M1 killed %8, $noreg, $noreg, 1, 5 /* e32 */ +... +--- +name: vsoxseg3ei32_v_index +body: | + bb.0: + + ; CHECK-LABEL: name: vsoxseg3ei32_v_index + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: PseudoVSOXSEG3EI32_V_M1_M2 $noreg, $noreg, [[PseudoVADD_VV_M1_]], 1, 6 /* e64 */ + %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + PseudoVSOXSEG3EI32_V_M1_M2 $noreg, $noreg, %2, 1, 6 /* e64 */ +... From 9b74e9eeb0de0f242ec5cfde717b093ca2e47d80 Mon Sep 17 00:00:00 2001 From: Min Hsu Date: Tue, 26 Aug 2025 12:00:12 -0700 Subject: [PATCH 2/9] fixup! Update more test cases --- .../test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll index 63fd892058811..d394df954cbda 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll @@ -383,14 +383,13 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; RV32-NEXT: vnsrl.wx v13, v10, a2 ; RV32-NEXT: vnsrl.wi v12, v10, 0 -; RV32-NEXT: vmv.x.s a2, v10 +; RV32-NEXT: vmv.x.s a1, v10 ; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; RV32-NEXT: vsseg2e32.v v12, (a0), v0.t -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: masked_load_store_factor2_v2_shared_mask_extract: From de5ed1a3a7b5fd8d69b9b8ba4043e601ccfa54b5 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Tue, 2 Sep 2025 11:55:49 -0700 Subject: [PATCH 3/9] fixup! Filter out some of the INSERT_SUBREG users --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 37 ++++++++++++++++++++-- llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 32 +++++++++++++++++++ 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index c57b08172c821..9cf769dd08c52 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1453,6 +1453,34 @@ static bool isTupleInsertInstr(const MachineInstr &MI, } } +static bool isSegmentedStoreInstr(const MachineInstr &MI) { + const RISCVVPseudosTable::PseudoInfo *RVV = + RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()); + if (!RVV) + return false; + switch (RVV->BaseInstr) { + case VSSEG_CASES(8): + case VSSSEG_CASES(8): + case VSUXSEG_CASES(8): + case VSOXSEG_CASES(8): + case VSSEG_CASES(16): + case VSSSEG_CASES(16): + case VSUXSEG_CASES(16): + case VSOXSEG_CASES(16): + case VSSEG_CASES(32): + case VSSSEG_CASES(32): + case VSUXSEG_CASES(32): + case VSOXSEG_CASES(32): + case VSSEG_CASES(64): + case VSSSEG_CASES(64): + case VSUXSEG_CASES(64): + case VSOXSEG_CASES(64): + return true; + default: + return false; + } +} + std::optional RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { std::optional CommonVL; @@ -1475,8 +1503,13 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { if (isTupleInsertInstr(UserMI, *MRI)) { LLVM_DEBUG(dbgs().indent(4) << "Peeking through uses of INSERT_SUBREG\n"); - Worklist.insert_range(llvm::make_pointer_range( - MRI->use_operands(UserMI.getOperand(0).getReg()))); + for (MachineOperand &UseOp : + MRI->use_operands(UserMI.getOperand(0).getReg())) { + const MachineInstr &CandidateMI = *UseOp.getParent(); + if (CandidateMI.getOpcode() == RISCV::INSERT_SUBREG || + isSegmentedStoreInstr(CandidateMI)) + Worklist.insert(&UseOp); + } continue; } diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir index 0acdca91ee84c..b5c17a21e00ff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir @@ -661,3 +661,35 @@ body: | %y:vrnov0, %vl:gprnox0 = PseudoVLE8FF_V_M1_MASK $noreg, $noreg, $noreg, 1, 3 /* e8 */, 3 /* ta, ma */ PseudoVSE8_V_M1 %x, $noreg, %vl, 3 /* e8 */ ... +--- +name: insert_subreg_bitcast_no_peekthru +body: | + bb.0: + liveins: $v8, $v9, $v10 + + ; We should not peekthrough an INSERT_SUBREG if its user is not a segmented store or another INSERT_SUBREG. + ; CHECK-LABEL: name: insert_subreg_bitcast_no_peekthru + ; CHECK: liveins: $v8, $v9, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9 + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v10 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn4m1 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn4m1 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_vrm1_0 + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn4m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[COPY1]], %subreg.sub_vrm1_1 + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn4m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVADD_VV_M1_]], %subreg.sub_vrm1_2 + ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrn4m1 = INSERT_SUBREG [[INSERT_SUBREG2]], [[COPY2]], %subreg.sub_vrm1_3 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm4 = COPY [[INSERT_SUBREG3]] + ; CHECK-NEXT: PseudoVSE32_V_M4 [[COPY3]], $noreg, 1, 5 /* e32 */ + %0:vr = COPY $v8 + %1:vr = COPY $v9 + %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + %3:vr = COPY $v10 + %6:vrn4m1 = IMPLICIT_DEF + %5:vrn4m1 = INSERT_SUBREG %6, %0, %subreg.sub_vrm1_0 + %7:vrn4m1 = INSERT_SUBREG %5, %1, %subreg.sub_vrm1_1 + %8:vrn4m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_2 + %9:vrn4m1 = INSERT_SUBREG %8, %3, %subreg.sub_vrm1_3 + %10:vrm4 = COPY %9 + PseudoVSE32_V_M4 %10:vrm4, $noreg, 1, 5 /* e32 */ From 8df283bfda788b813d052cf371c11698080697f7 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Tue, 2 Sep 2025 12:09:25 -0700 Subject: [PATCH 4/9] fixup! fixup! Filter out some of the INSERT_SUBREG users --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 9cf769dd08c52..29ffbe64fbd9d 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1506,6 +1506,10 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { for (MachineOperand &UseOp : MRI->use_operands(UserMI.getOperand(0).getReg())) { const MachineInstr &CandidateMI = *UseOp.getParent(); + // We should not propagate the VL if the user is not a segmented store + // or another INSERT_SUBREG, since VL just works differently + // between segmented operations (per-field) v.s. other RVV ops (on the + // whole register group). if (CandidateMI.getOpcode() == RISCV::INSERT_SUBREG || isSegmentedStoreInstr(CandidateMI)) Worklist.insert(&UseOp); From 3bfff33ab2728ded78eb93cc71f318bd3184235e Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Tue, 2 Sep 2025 18:28:47 -0700 Subject: [PATCH 5/9] fixup! Use isTupleInsertInstr instead --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 29ffbe64fbd9d..e0e9514a369ed 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1510,7 +1510,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { // or another INSERT_SUBREG, since VL just works differently // between segmented operations (per-field) v.s. other RVV ops (on the // whole register group). - if (CandidateMI.getOpcode() == RISCV::INSERT_SUBREG || + if (isTupleInsertInstr(CandidateMI, *MRI) || isSegmentedStoreInstr(CandidateMI)) Worklist.insert(&UseOp); } From fdf82423fa6d1adba1535538dd0510146914c490 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Wed, 3 Sep 2025 09:32:27 -0700 Subject: [PATCH 6/9] fixup! Stop propagation if any of the INSERT_SUBREG is not illegible --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 7 ++++--- llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 8 +++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index e0e9514a369ed..37e997ec62875 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1510,9 +1510,10 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { // or another INSERT_SUBREG, since VL just works differently // between segmented operations (per-field) v.s. other RVV ops (on the // whole register group). - if (isTupleInsertInstr(CandidateMI, *MRI) || - isSegmentedStoreInstr(CandidateMI)) - Worklist.insert(&UseOp); + if (!isTupleInsertInstr(CandidateMI, *MRI) && + !isSegmentedStoreInstr(CandidateMI)) + return std::nullopt; + Worklist.insert(&UseOp); } continue; } diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir index b5c17a21e00ff..086b3203ed5b0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir @@ -667,7 +667,7 @@ body: | bb.0: liveins: $v8, $v9, $v10 - ; We should not peekthrough an INSERT_SUBREG if its user is not a segmented store or another INSERT_SUBREG. + ; We should not peekthrough an INSERT_SUBREG if any of its users is not a segmented store or another INSERT_SUBREG. ; CHECK-LABEL: name: insert_subreg_bitcast_no_peekthru ; CHECK: liveins: $v8, $v9, $v10 ; CHECK-NEXT: {{ $}} @@ -682,6 +682,9 @@ body: | ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrn4m1 = INSERT_SUBREG [[INSERT_SUBREG2]], [[COPY2]], %subreg.sub_vrm1_3 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm4 = COPY [[INSERT_SUBREG3]] ; CHECK-NEXT: PseudoVSE32_V_M4 [[COPY3]], $noreg, 1, 5 /* e32 */ + ; CHECK-NEXT: [[PseudoVADD_VV_M1_1:%[0-9]+]]:vr = PseudoVADD_VV_M1 $noreg, [[PseudoVADD_VV_M1_]], $noreg, 10, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: $v10 = COPY [[PseudoVADD_VV_M1_1]] + ; CHECK-NEXT: PseudoRET implicit $v10 %0:vr = COPY $v8 %1:vr = COPY $v9 %2:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ @@ -693,3 +696,6 @@ body: | %9:vrn4m1 = INSERT_SUBREG %8, %3, %subreg.sub_vrm1_3 %10:vrm4 = COPY %9 PseudoVSE32_V_M4 %10:vrm4, $noreg, 1, 5 /* e32 */ + %11:vr = PseudoVADD_VV_M1 $noreg, %2, $noreg, 10, 5 /* e32 */, 3 /* ta, ma */ + $v10 = COPY %11 + PseudoRET implicit $v10 From 39bc8d632aadb892f7520b1298a8e0bd8ca1d781 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Wed, 3 Sep 2025 10:08:26 -0700 Subject: [PATCH 7/9] fixup! Check tuple register class using TSFlags --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 49 ++++++---------------- 1 file changed, 13 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 37e997ec62875..101a0167e76b7 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1413,44 +1413,21 @@ static bool isTupleInsertInstr(const MachineInstr &MI, return false; const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); - // Check whether it was lowered with the correct subreg index. + if (!RISCVRI::isVRegClass(DstRC->TSFlags)) + return false; + unsigned NF = RISCVRI::getNF(DstRC->TSFlags); + if (NF < 2) + return false; + + // Check whether INSERT_SUBREG was lowered with the correct subreg index. + auto VLMul = RISCVRI::getLMul(DstRC->TSFlags); + [[maybe_unused]] auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul); + assert(!IsFractional && "unexpected LMUL for tuple register classes"); [[maybe_unused]] const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); [[maybe_unused]] unsigned SubRegIdx = MI.getOperand(3).getImm(); - switch (DstRC->getID()) { - case RISCV::VRN2M1RegClassID: - case RISCV::VRN2M1NoV0RegClassID: - case RISCV::VRN3M1RegClassID: - case RISCV::VRN3M1NoV0RegClassID: - case RISCV::VRN4M1RegClassID: - case RISCV::VRN4M1NoV0RegClassID: - case RISCV::VRN5M1RegClassID: - case RISCV::VRN5M1NoV0RegClassID: - case RISCV::VRN6M1RegClassID: - case RISCV::VRN6M1NoV0RegClassID: - case RISCV::VRN7M1RegClassID: - case RISCV::VRN7M1NoV0RegClassID: - case RISCV::VRN8M1RegClassID: - case RISCV::VRN8M1NoV0RegClassID: - assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock && - "unexpected subreg index for VRM1 sub-register"); - return true; - case RISCV::VRN2M2RegClassID: - case RISCV::VRN2M2NoV0RegClassID: - case RISCV::VRN3M2RegClassID: - case RISCV::VRN3M2NoV0RegClassID: - case RISCV::VRN4M2RegClassID: - case RISCV::VRN4M2NoV0RegClassID: - assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * 2 && - "unexpected subreg index for VRM2 sub-register"); - return true; - case RISCV::VRN2M4RegClassID: - case RISCV::VRN2M4NoV0RegClassID: - assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * 4 && - "unexpected subreg index for VRM4 sub-register"); - return true; - default: - return false; - } + assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul && + "unexpected subreg index of tuple register class"); + return true; } static bool isSegmentedStoreInstr(const MachineInstr &MI) { From 06b75b882b832f5ba16126cdab1bec937a5b10ee Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Thu, 4 Sep 2025 10:41:49 -0700 Subject: [PATCH 8/9] fixup! Address review comments --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 16 +++++----------- llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir | 15 +++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vl-opt.ll | 16 ++++++++++++++++ 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 101a0167e76b7..4d4f1db215220 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1413,29 +1413,23 @@ static bool isTupleInsertInstr(const MachineInstr &MI, return false; const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); if (!RISCVRI::isVRegClass(DstRC->TSFlags)) return false; unsigned NF = RISCVRI::getNF(DstRC->TSFlags); if (NF < 2) return false; - // Check whether INSERT_SUBREG was lowered with the correct subreg index. + // Check whether INSERT_SUBREG has the correct subreg index for tuple inserts. auto VLMul = RISCVRI::getLMul(DstRC->TSFlags); + unsigned SubRegIdx = MI.getOperand(3).getImm(); [[maybe_unused]] auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul); assert(!IsFractional && "unexpected LMUL for tuple register classes"); - [[maybe_unused]] const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); - [[maybe_unused]] unsigned SubRegIdx = MI.getOperand(3).getImm(); - assert(TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul && - "unexpected subreg index of tuple register class"); - return true; + return TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul; } static bool isSegmentedStoreInstr(const MachineInstr &MI) { - const RISCVVPseudosTable::PseudoInfo *RVV = - RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()); - if (!RVV) - return false; - switch (RVV->BaseInstr) { + switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { case VSSEG_CASES(8): case VSSSEG_CASES(8): case VSUXSEG_CASES(8): diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir index e6d2f133ed7fd..cd85853c2d12c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir @@ -2272,6 +2272,21 @@ body: | PseudoVSSEG3E32_V_M1 killed %8, $noreg, 1, 6 /* e64 */ ... --- +name: vsseg3e32_v_incompatible_insert_subreg +body: | + bb.0: + + ; CHECK-LABEL: name: vsseg3e32_v_incompatible_insert_subreg + ; CHECK: [[PseudoVADD_VV_M2_:%[0-9]+]]:vrm2 = PseudoVADD_VV_M2 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn3m1 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn3m1 = INSERT_SUBREG [[DEF]], [[PseudoVADD_VV_M2_]], %subreg.sub_vrm2_0 + ; CHECK-NEXT: PseudoVSSEG3E32_V_M1 killed [[INSERT_SUBREG]], $noreg, 1, 5 /* e32 */ + %2:vrm2 = PseudoVADD_VV_M2 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 3 /* ta, ma */ + %6:vrn3m1 = IMPLICIT_DEF + %5:vrn3m1 = INSERT_SUBREG %6, %2, %subreg.sub_vrm2_0 + PseudoVSSEG3E32_V_M1 killed %5, $noreg, 1, 5 /* e32 */ +... +--- name: vssseg3e32_v body: | bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll index 6b5b984a48789..20608cd6bed87 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll @@ -222,3 +222,19 @@ define @vcompress_add( %a, @llvm.riscv.vcompress.nxv8i32( poison, %add, %c, iXLen %vl) ret %compress } + +; Make sure we peek through INSERT_SUBREG of tuple registers. +define void @segmented_store_insert_subreg( %v0, %v1, %v2, ptr %p, iXLen %vl) { +; CHECK-LABEL: segmented_store_insert_subreg: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vfadd.vv v10, v8, v10 +; CHECK-NEXT: vsseg3e32.v v8, (a0) +; CHECK-NEXT: ret + %fadd = fadd %v0, %v1 + %t0 = call target("riscv.vector.tuple", , 3) @llvm.riscv.tuple.insert(target("riscv.vector.tuple", , 3) poison, %v0, i32 0) + %t1 = call target("riscv.vector.tuple", , 3) @llvm.riscv.tuple.insert(target("riscv.vector.tuple", , 3) %t0, %fadd, i32 1) + %t2 = call target("riscv.vector.tuple", , 3) @llvm.riscv.tuple.insert(target("riscv.vector.tuple", , 3) %t1, %v2, i32 2) + call void @llvm.riscv.vsseg3(target("riscv.vector.tuple", , 3) %t2, ptr %p, iXLen %vl, iXLen 5) + ret void +} From f32e1f4e4b5b2b70cc766ec46ca5c5f717d33c20 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Thu, 4 Sep 2025 11:59:53 -0700 Subject: [PATCH 9/9] fixup! Update tests --- .../test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll index d394df954cbda..d7d767e600db5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll @@ -421,14 +421,13 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV64-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; RV64-NEXT: vle32.v v10, (a0), v0.t ; RV64-NEXT: li a2, 32 -; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; RV64-NEXT: vnsrl.wx v13, v10, a2 ; RV64-NEXT: vnsrl.wi v12, v10, 0 -; RV64-NEXT: vmv.x.s a2, v10 +; RV64-NEXT: vmv.x.s a1, v10 ; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; RV64-NEXT: vsseg2e32.v v12, (a0), v0.t -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %rvl = mul nuw i32 %evl, 2 %interleaved.mask = tail call @llvm.vector.interleave2.nxv4i1( %mask, %mask)