-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[RISCV][VLOPT] Support segmented store instructions #155467
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5df2640
9b74e9e
de5ed1a
8df283b
3bfff33
fdf8242
39bc8d6
06b75b8
f32e1f4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -178,6 +178,19 @@ static unsigned getIntegerExtensionOperandEEW(unsigned Factor, | |
| return Log2EEW; | ||
| } | ||
|
|
||
| #define VSEG_CASES(Prefix, EEW) \ | ||
| RISCV::Prefix##SEG2E##EEW##_V: \ | ||
| case RISCV::Prefix##SEG3E##EEW##_V: \ | ||
| case RISCV::Prefix##SEG4E##EEW##_V: \ | ||
| case RISCV::Prefix##SEG5E##EEW##_V: \ | ||
| case RISCV::Prefix##SEG6E##EEW##_V: \ | ||
| case RISCV::Prefix##SEG7E##EEW##_V: \ | ||
| case RISCV::Prefix##SEG8E##EEW##_V | ||
| #define VSSEG_CASES(EEW) VSEG_CASES(VS, EEW) | ||
| #define VSSSEG_CASES(EEW) VSEG_CASES(VSS, EEW) | ||
| #define VSUXSEG_CASES(EEW) VSEG_CASES(VSUX, I##EEW) | ||
| #define VSOXSEG_CASES(EEW) VSEG_CASES(VSOX, I##EEW) | ||
|
|
||
| static std::optional<unsigned> | ||
| getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { | ||
| const MachineInstr &MI = *MO.getParent(); | ||
|
|
@@ -225,21 +238,29 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { | |
| case RISCV::VSE8_V: | ||
| case RISCV::VLSE8_V: | ||
| case RISCV::VSSE8_V: | ||
| case VSSEG_CASES(8): | ||
| case VSSSEG_CASES(8): | ||
| return 3; | ||
| case RISCV::VLE16_V: | ||
| case RISCV::VSE16_V: | ||
| case RISCV::VLSE16_V: | ||
| case RISCV::VSSE16_V: | ||
| case VSSEG_CASES(16): | ||
| case VSSSEG_CASES(16): | ||
| return 4; | ||
| case RISCV::VLE32_V: | ||
| case RISCV::VSE32_V: | ||
| case RISCV::VLSE32_V: | ||
| case RISCV::VSSE32_V: | ||
| case VSSEG_CASES(32): | ||
| case VSSSEG_CASES(32): | ||
| return 5; | ||
| case RISCV::VLE64_V: | ||
| case RISCV::VSE64_V: | ||
| case RISCV::VLSE64_V: | ||
| case RISCV::VSSE64_V: | ||
| case VSSEG_CASES(64): | ||
| case VSSSEG_CASES(64): | ||
| return 6; | ||
|
|
||
| // Vector Indexed Instructions | ||
|
|
@@ -248,31 +269,39 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { | |
| case RISCV::VLUXEI8_V: | ||
| case RISCV::VLOXEI8_V: | ||
| case RISCV::VSUXEI8_V: | ||
| case RISCV::VSOXEI8_V: { | ||
| case RISCV::VSOXEI8_V: | ||
| case VSUXSEG_CASES(8): | ||
| case VSOXSEG_CASES(8): { | ||
| if (MO.getOperandNo() == 0) | ||
| return MILog2SEW; | ||
| return 3; | ||
| } | ||
| case RISCV::VLUXEI16_V: | ||
| case RISCV::VLOXEI16_V: | ||
| case RISCV::VSUXEI16_V: | ||
| case RISCV::VSOXEI16_V: { | ||
| case RISCV::VSOXEI16_V: | ||
| case VSUXSEG_CASES(16): | ||
| case VSOXSEG_CASES(16): { | ||
| if (MO.getOperandNo() == 0) | ||
| return MILog2SEW; | ||
| return 4; | ||
| } | ||
| case RISCV::VLUXEI32_V: | ||
| case RISCV::VLOXEI32_V: | ||
| case RISCV::VSUXEI32_V: | ||
| case RISCV::VSOXEI32_V: { | ||
| case RISCV::VSOXEI32_V: | ||
| case VSUXSEG_CASES(32): | ||
| case VSOXSEG_CASES(32): { | ||
| if (MO.getOperandNo() == 0) | ||
| return MILog2SEW; | ||
| return 5; | ||
| } | ||
| case RISCV::VLUXEI64_V: | ||
| case RISCV::VLOXEI64_V: | ||
| case RISCV::VSUXEI64_V: | ||
| case RISCV::VSOXEI64_V: { | ||
| case RISCV::VSOXEI64_V: | ||
| case VSUXSEG_CASES(64): | ||
| case VSOXSEG_CASES(64): { | ||
| if (MO.getOperandNo() == 0) | ||
| return MILog2SEW; | ||
| return 6; | ||
|
|
@@ -1375,6 +1404,54 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { | |
| return VLOp; | ||
| } | ||
|
|
||
| /// Return true if MI is an instruction used for assembling registers | ||
| /// for segmented store instructions, namely, RISCVISD::TUPLE_INSERT. | ||
| /// Currently it's lowered to INSERT_SUBREG. | ||
| static bool isTupleInsertInstr(const MachineInstr &MI, | ||
| const MachineRegisterInfo &MRI) { | ||
| if (MI.getOpcode() != RISCV::INSERT_SUBREG) | ||
| return false; | ||
|
|
||
| const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); | ||
| const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); | ||
| if (!RISCVRI::isVRegClass(DstRC->TSFlags)) | ||
| return false; | ||
| unsigned NF = RISCVRI::getNF(DstRC->TSFlags); | ||
| if (NF < 2) | ||
| return false; | ||
|
|
||
| // Check whether INSERT_SUBREG has the correct subreg index for tuple inserts. | ||
| auto VLMul = RISCVRI::getLMul(DstRC->TSFlags); | ||
| unsigned SubRegIdx = MI.getOperand(3).getImm(); | ||
| [[maybe_unused]] auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul); | ||
| assert(!IsFractional && "unexpected LMUL for tuple register classes"); | ||
| return TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul; | ||
| } | ||
|
|
||
| static bool isSegmentedStoreInstr(const MachineInstr &MI) { | ||
|
||
| switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { | ||
| case VSSEG_CASES(8): | ||
| case VSSSEG_CASES(8): | ||
| case VSUXSEG_CASES(8): | ||
| case VSOXSEG_CASES(8): | ||
| case VSSEG_CASES(16): | ||
| case VSSSEG_CASES(16): | ||
| case VSUXSEG_CASES(16): | ||
| case VSOXSEG_CASES(16): | ||
| case VSSEG_CASES(32): | ||
| case VSSSEG_CASES(32): | ||
| case VSUXSEG_CASES(32): | ||
| case VSOXSEG_CASES(32): | ||
| case VSSEG_CASES(64): | ||
| case VSSSEG_CASES(64): | ||
| case VSUXSEG_CASES(64): | ||
| case VSOXSEG_CASES(64): | ||
| return true; | ||
| default: | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| std::optional<MachineOperand> | ||
| RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { | ||
| std::optional<MachineOperand> CommonVL; | ||
|
|
@@ -1395,6 +1472,23 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { | |
| continue; | ||
| } | ||
|
|
||
| if (isTupleInsertInstr(UserMI, *MRI)) { | ||
| LLVM_DEBUG(dbgs().indent(4) << "Peeking through uses of INSERT_SUBREG\n"); | ||
| for (MachineOperand &UseOp : | ||
| MRI->use_operands(UserMI.getOperand(0).getReg())) { | ||
| const MachineInstr &CandidateMI = *UseOp.getParent(); | ||
| // We should not propagate the VL if the user is not a segmented store | ||
| // or another INSERT_SUBREG, since VL just works differently | ||
| // between segmented operations (per-field) v.s. other RVV ops (on the | ||
| // whole register group). | ||
| if (!isTupleInsertInstr(CandidateMI, *MRI) && | ||
| !isSegmentedStoreInstr(CandidateMI)) | ||
| return std::nullopt; | ||
| Worklist.insert(&UseOp); | ||
| } | ||
| continue; | ||
| } | ||
|
|
||
| if (UserMI.isPHI()) { | ||
| // Don't follow PHI cycles | ||
| if (!PHISeen.insert(&UserMI).second) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of using macros, we can also use the search tables we have for segmented loads/stores here. I'm open to either way
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The macros are fine by me, I think the search tables are on the pseudos anyway so it would be weird to mix the non-pseudo + pseudo opcodes.