Skip to content

Commit b45582f

Browse files
authored
[RISCV][VLOPT] Support segmented store instructions (#155467)
Add RISCVVLOptimizer supported for unit-stride, strided, and indexed strided segmented stores. The biggest change was adding the capability to look through INSERT_SUBREG, which was used for composing segmented register class values. Fix #149350
1 parent fffd6da commit b45582f

File tree

6 files changed

+390
-13
lines changed

6 files changed

+390
-13
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 98 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,19 @@ static unsigned getIntegerExtensionOperandEEW(unsigned Factor,
178178
return Log2EEW;
179179
}
180180

181+
#define VSEG_CASES(Prefix, EEW) \
182+
RISCV::Prefix##SEG2E##EEW##_V: \
183+
case RISCV::Prefix##SEG3E##EEW##_V: \
184+
case RISCV::Prefix##SEG4E##EEW##_V: \
185+
case RISCV::Prefix##SEG5E##EEW##_V: \
186+
case RISCV::Prefix##SEG6E##EEW##_V: \
187+
case RISCV::Prefix##SEG7E##EEW##_V: \
188+
case RISCV::Prefix##SEG8E##EEW##_V
189+
#define VSSEG_CASES(EEW) VSEG_CASES(VS, EEW)
190+
#define VSSSEG_CASES(EEW) VSEG_CASES(VSS, EEW)
191+
#define VSUXSEG_CASES(EEW) VSEG_CASES(VSUX, I##EEW)
192+
#define VSOXSEG_CASES(EEW) VSEG_CASES(VSOX, I##EEW)
193+
181194
static std::optional<unsigned>
182195
getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
183196
const MachineInstr &MI = *MO.getParent();
@@ -225,21 +238,29 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
225238
case RISCV::VSE8_V:
226239
case RISCV::VLSE8_V:
227240
case RISCV::VSSE8_V:
241+
case VSSEG_CASES(8):
242+
case VSSSEG_CASES(8):
228243
return 3;
229244
case RISCV::VLE16_V:
230245
case RISCV::VSE16_V:
231246
case RISCV::VLSE16_V:
232247
case RISCV::VSSE16_V:
248+
case VSSEG_CASES(16):
249+
case VSSSEG_CASES(16):
233250
return 4;
234251
case RISCV::VLE32_V:
235252
case RISCV::VSE32_V:
236253
case RISCV::VLSE32_V:
237254
case RISCV::VSSE32_V:
255+
case VSSEG_CASES(32):
256+
case VSSSEG_CASES(32):
238257
return 5;
239258
case RISCV::VLE64_V:
240259
case RISCV::VSE64_V:
241260
case RISCV::VLSE64_V:
242261
case RISCV::VSSE64_V:
262+
case VSSEG_CASES(64):
263+
case VSSSEG_CASES(64):
243264
return 6;
244265

245266
// Vector Indexed Instructions
@@ -248,31 +269,39 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
248269
case RISCV::VLUXEI8_V:
249270
case RISCV::VLOXEI8_V:
250271
case RISCV::VSUXEI8_V:
251-
case RISCV::VSOXEI8_V: {
272+
case RISCV::VSOXEI8_V:
273+
case VSUXSEG_CASES(8):
274+
case VSOXSEG_CASES(8): {
252275
if (MO.getOperandNo() == 0)
253276
return MILog2SEW;
254277
return 3;
255278
}
256279
case RISCV::VLUXEI16_V:
257280
case RISCV::VLOXEI16_V:
258281
case RISCV::VSUXEI16_V:
259-
case RISCV::VSOXEI16_V: {
282+
case RISCV::VSOXEI16_V:
283+
case VSUXSEG_CASES(16):
284+
case VSOXSEG_CASES(16): {
260285
if (MO.getOperandNo() == 0)
261286
return MILog2SEW;
262287
return 4;
263288
}
264289
case RISCV::VLUXEI32_V:
265290
case RISCV::VLOXEI32_V:
266291
case RISCV::VSUXEI32_V:
267-
case RISCV::VSOXEI32_V: {
292+
case RISCV::VSOXEI32_V:
293+
case VSUXSEG_CASES(32):
294+
case VSOXSEG_CASES(32): {
268295
if (MO.getOperandNo() == 0)
269296
return MILog2SEW;
270297
return 5;
271298
}
272299
case RISCV::VLUXEI64_V:
273300
case RISCV::VLOXEI64_V:
274301
case RISCV::VSUXEI64_V:
275-
case RISCV::VSOXEI64_V: {
302+
case RISCV::VSOXEI64_V:
303+
case VSUXSEG_CASES(64):
304+
case VSOXSEG_CASES(64): {
276305
if (MO.getOperandNo() == 0)
277306
return MILog2SEW;
278307
return 6;
@@ -1375,6 +1404,54 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
13751404
return VLOp;
13761405
}
13771406

1407+
/// Return true if MI is an instruction used for assembling registers
1408+
/// for segmented store instructions, namely, RISCVISD::TUPLE_INSERT.
1409+
/// Currently it's lowered to INSERT_SUBREG.
1410+
static bool isTupleInsertInstr(const MachineInstr &MI,
1411+
const MachineRegisterInfo &MRI) {
1412+
if (MI.getOpcode() != RISCV::INSERT_SUBREG)
1413+
return false;
1414+
1415+
const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
1416+
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
1417+
if (!RISCVRI::isVRegClass(DstRC->TSFlags))
1418+
return false;
1419+
unsigned NF = RISCVRI::getNF(DstRC->TSFlags);
1420+
if (NF < 2)
1421+
return false;
1422+
1423+
// Check whether INSERT_SUBREG has the correct subreg index for tuple inserts.
1424+
auto VLMul = RISCVRI::getLMul(DstRC->TSFlags);
1425+
unsigned SubRegIdx = MI.getOperand(3).getImm();
1426+
[[maybe_unused]] auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
1427+
assert(!IsFractional && "unexpected LMUL for tuple register classes");
1428+
return TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul;
1429+
}
1430+
1431+
static bool isSegmentedStoreInstr(const MachineInstr &MI) {
1432+
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
1433+
case VSSEG_CASES(8):
1434+
case VSSSEG_CASES(8):
1435+
case VSUXSEG_CASES(8):
1436+
case VSOXSEG_CASES(8):
1437+
case VSSEG_CASES(16):
1438+
case VSSSEG_CASES(16):
1439+
case VSUXSEG_CASES(16):
1440+
case VSOXSEG_CASES(16):
1441+
case VSSEG_CASES(32):
1442+
case VSSSEG_CASES(32):
1443+
case VSUXSEG_CASES(32):
1444+
case VSOXSEG_CASES(32):
1445+
case VSSEG_CASES(64):
1446+
case VSSSEG_CASES(64):
1447+
case VSUXSEG_CASES(64):
1448+
case VSOXSEG_CASES(64):
1449+
return true;
1450+
default:
1451+
return false;
1452+
}
1453+
}
1454+
13781455
std::optional<MachineOperand>
13791456
RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
13801457
std::optional<MachineOperand> CommonVL;
@@ -1395,6 +1472,23 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
13951472
continue;
13961473
}
13971474

1475+
if (isTupleInsertInstr(UserMI, *MRI)) {
1476+
LLVM_DEBUG(dbgs().indent(4) << "Peeking through uses of INSERT_SUBREG\n");
1477+
for (MachineOperand &UseOp :
1478+
MRI->use_operands(UserMI.getOperand(0).getReg())) {
1479+
const MachineInstr &CandidateMI = *UseOp.getParent();
1480+
// We should not propagate the VL if the user is not a segmented store
1481+
// or another INSERT_SUBREG, since VL just works differently
1482+
// between segmented operations (per-field) v.s. other RVV ops (on the
1483+
// whole register group).
1484+
if (!isTupleInsertInstr(CandidateMI, *MRI) &&
1485+
!isSegmentedStoreInstr(CandidateMI))
1486+
return std::nullopt;
1487+
Worklist.insert(&UseOp);
1488+
}
1489+
continue;
1490+
}
1491+
13981492
if (UserMI.isPHI()) {
13991493
// Don't follow PHI cycles
14001494
if (!PHISeen.insert(&UserMI).second)

llvm/test/CodeGen/RISCV/rvv/pr141907.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define void @pr141907(ptr %0) nounwind {
1212
; CHECK-NEXT: vmv.v.i v8, 0
1313
; CHECK-NEXT: vmclr.m v0
1414
; CHECK-NEXT: li a1, 0
15-
; CHECK-NEXT: vsetvli a5, zero, e16, mf2, ta, ma
15+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1616
; CHECK-NEXT: vmv.v.i v10, 0
1717
; CHECK-NEXT: addi a2, sp, 16
1818
; CHECK-NEXT: addi a3, sp, 20

0 commit comments

Comments
 (0)