@@ -178,6 +178,19 @@ static unsigned getIntegerExtensionOperandEEW(unsigned Factor,
178178 return Log2EEW;
179179}
180180
181+ #define VSEG_CASES (Prefix, EEW ) \
182+ RISCV::Prefix##SEG2E##EEW##_V: \
183+ case RISCV::Prefix##SEG3E##EEW##_V: \
184+ case RISCV::Prefix##SEG4E##EEW##_V: \
185+ case RISCV::Prefix##SEG5E##EEW##_V: \
186+ case RISCV::Prefix##SEG6E##EEW##_V: \
187+ case RISCV::Prefix##SEG7E##EEW##_V: \
188+ case RISCV::Prefix##SEG8E##EEW##_V
189+ #define VSSEG_CASES (EEW ) VSEG_CASES(VS, EEW)
190+ #define VSSSEG_CASES (EEW ) VSEG_CASES(VSS, EEW)
191+ #define VSUXSEG_CASES (EEW ) VSEG_CASES(VSUX, I##EEW)
192+ #define VSOXSEG_CASES (EEW ) VSEG_CASES(VSOX, I##EEW)
193+
181194static std::optional<unsigned >
182195getOperandLog2EEW (const MachineOperand &MO, const MachineRegisterInfo *MRI) {
183196 const MachineInstr &MI = *MO.getParent ();
@@ -225,21 +238,29 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
225238 case RISCV::VSE8_V:
226239 case RISCV::VLSE8_V:
227240 case RISCV::VSSE8_V:
241+ case VSSEG_CASES (8 ):
242+ case VSSSEG_CASES (8 ):
228243 return 3 ;
229244 case RISCV::VLE16_V:
230245 case RISCV::VSE16_V:
231246 case RISCV::VLSE16_V:
232247 case RISCV::VSSE16_V:
248+ case VSSEG_CASES (16 ):
249+ case VSSSEG_CASES (16 ):
233250 return 4 ;
234251 case RISCV::VLE32_V:
235252 case RISCV::VSE32_V:
236253 case RISCV::VLSE32_V:
237254 case RISCV::VSSE32_V:
255+ case VSSEG_CASES (32 ):
256+ case VSSSEG_CASES (32 ):
238257 return 5 ;
239258 case RISCV::VLE64_V:
240259 case RISCV::VSE64_V:
241260 case RISCV::VLSE64_V:
242261 case RISCV::VSSE64_V:
262+ case VSSEG_CASES (64 ):
263+ case VSSSEG_CASES (64 ):
243264 return 6 ;
244265
245266 // Vector Indexed Instructions
@@ -248,31 +269,39 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
248269 case RISCV::VLUXEI8_V:
249270 case RISCV::VLOXEI8_V:
250271 case RISCV::VSUXEI8_V:
251- case RISCV::VSOXEI8_V: {
272+ case RISCV::VSOXEI8_V:
273+ case VSUXSEG_CASES (8 ):
274+ case VSOXSEG_CASES (8 ): {
252275 if (MO.getOperandNo () == 0 )
253276 return MILog2SEW;
254277 return 3 ;
255278 }
256279 case RISCV::VLUXEI16_V:
257280 case RISCV::VLOXEI16_V:
258281 case RISCV::VSUXEI16_V:
259- case RISCV::VSOXEI16_V: {
282+ case RISCV::VSOXEI16_V:
283+ case VSUXSEG_CASES (16 ):
284+ case VSOXSEG_CASES (16 ): {
260285 if (MO.getOperandNo () == 0 )
261286 return MILog2SEW;
262287 return 4 ;
263288 }
264289 case RISCV::VLUXEI32_V:
265290 case RISCV::VLOXEI32_V:
266291 case RISCV::VSUXEI32_V:
267- case RISCV::VSOXEI32_V: {
292+ case RISCV::VSOXEI32_V:
293+ case VSUXSEG_CASES (32 ):
294+ case VSOXSEG_CASES (32 ): {
268295 if (MO.getOperandNo () == 0 )
269296 return MILog2SEW;
270297 return 5 ;
271298 }
272299 case RISCV::VLUXEI64_V:
273300 case RISCV::VLOXEI64_V:
274301 case RISCV::VSUXEI64_V:
275- case RISCV::VSOXEI64_V: {
302+ case RISCV::VSOXEI64_V:
303+ case VSUXSEG_CASES (64 ):
304+ case VSOXSEG_CASES (64 ): {
276305 if (MO.getOperandNo () == 0 )
277306 return MILog2SEW;
278307 return 6 ;
@@ -1375,6 +1404,54 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
13751404 return VLOp;
13761405}
13771406
1407+ // / Return true if MI is an instruction used for assembling registers
1408+ // / for segmented store instructions, namely, RISCVISD::TUPLE_INSERT.
1409+ // / Currently it's lowered to INSERT_SUBREG.
1410+ static bool isTupleInsertInstr (const MachineInstr &MI,
1411+ const MachineRegisterInfo &MRI) {
1412+ if (MI.getOpcode () != RISCV::INSERT_SUBREG)
1413+ return false ;
1414+
1415+ const TargetRegisterClass *DstRC = MRI.getRegClass (MI.getOperand (0 ).getReg ());
1416+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo ();
1417+ if (!RISCVRI::isVRegClass (DstRC->TSFlags ))
1418+ return false ;
1419+ unsigned NF = RISCVRI::getNF (DstRC->TSFlags );
1420+ if (NF < 2 )
1421+ return false ;
1422+
1423+ // Check whether INSERT_SUBREG has the correct subreg index for tuple inserts.
1424+ auto VLMul = RISCVRI::getLMul (DstRC->TSFlags );
1425+ unsigned SubRegIdx = MI.getOperand (3 ).getImm ();
1426+ [[maybe_unused]] auto [LMul, IsFractional] = RISCVVType::decodeVLMUL (VLMul);
1427+ assert (!IsFractional && " unexpected LMUL for tuple register classes" );
1428+ return TRI->getSubRegIdxSize (SubRegIdx) == RISCV::RVVBitsPerBlock * LMul;
1429+ }
1430+
1431+ static bool isSegmentedStoreInstr (const MachineInstr &MI) {
1432+ switch (RISCV::getRVVMCOpcode (MI.getOpcode ())) {
1433+ case VSSEG_CASES (8 ):
1434+ case VSSSEG_CASES (8 ):
1435+ case VSUXSEG_CASES (8 ):
1436+ case VSOXSEG_CASES (8 ):
1437+ case VSSEG_CASES (16 ):
1438+ case VSSSEG_CASES (16 ):
1439+ case VSUXSEG_CASES (16 ):
1440+ case VSOXSEG_CASES (16 ):
1441+ case VSSEG_CASES (32 ):
1442+ case VSSSEG_CASES (32 ):
1443+ case VSUXSEG_CASES (32 ):
1444+ case VSOXSEG_CASES (32 ):
1445+ case VSSEG_CASES (64 ):
1446+ case VSSSEG_CASES (64 ):
1447+ case VSUXSEG_CASES (64 ):
1448+ case VSOXSEG_CASES (64 ):
1449+ return true ;
1450+ default :
1451+ return false ;
1452+ }
1453+ }
1454+
13781455std::optional<MachineOperand>
13791456RISCVVLOptimizer::checkUsers (const MachineInstr &MI) const {
13801457 std::optional<MachineOperand> CommonVL;
@@ -1395,6 +1472,23 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
13951472 continue ;
13961473 }
13971474
1475+ if (isTupleInsertInstr (UserMI, *MRI)) {
1476+ LLVM_DEBUG (dbgs ().indent (4 ) << " Peeking through uses of INSERT_SUBREG\n " );
1477+ for (MachineOperand &UseOp :
1478+ MRI->use_operands (UserMI.getOperand (0 ).getReg ())) {
1479+ const MachineInstr &CandidateMI = *UseOp.getParent ();
1480+ // We should not propagate the VL if the user is not a segmented store
1481+ // or another INSERT_SUBREG, since VL just works differently
1482+ // between segmented operations (per-field) v.s. other RVV ops (on the
1483+ // whole register group).
1484+ if (!isTupleInsertInstr (CandidateMI, *MRI) &&
1485+ !isSegmentedStoreInstr (CandidateMI))
1486+ return std::nullopt ;
1487+ Worklist.insert (&UseOp);
1488+ }
1489+ continue ;
1490+ }
1491+
13981492 if (UserMI.isPHI ()) {
13991493 // Don't follow PHI cycles
14001494 if (!PHISeen.insert (&UserMI).second )
0 commit comments