Skip to content

Commit 645e0dc

Browse files
4vtomatCopilot
andauthored
[llvm][RISCV] Implement Zilsd load/store pair optimization (#158640)
This commit implements a complete load/store optimization pass for the RISC-V Zilsd extension, which combines pairs of 32-bit load/store instructions into single 64-bit LD/SD instructions when possible. Default alignment is 8, it also provide zilsd-4byte-align feature for looser condition. Related work: https://reviews.llvm.org/D144002 --------- Co-authored-by: Copilot <[email protected]>
1 parent cf837e2 commit 645e0dc

14 files changed

+2398
-11
lines changed

llvm/lib/Target/RISCV/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ add_llvm_target(RISCVCodeGen
7272
RISCVVLOptimizer.cpp
7373
RISCVVMV0Elimination.cpp
7474
RISCVZacasABIFix.cpp
75+
RISCVZilsdOptimizer.cpp
7576
GISel/RISCVCallLowering.cpp
7677
GISel/RISCVInstructionSelector.cpp
7778
GISel/RISCVLegalizerInfo.cpp

llvm/lib/Target/RISCV/RISCV.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ void initializeRISCVPushPopOptPass(PassRegistry &);
102102
FunctionPass *createRISCVLoadStoreOptPass();
103103
void initializeRISCVLoadStoreOptPass(PassRegistry &);
104104

105+
FunctionPass *createRISCVPreAllocZilsdOptPass();
106+
void initializeRISCVPreAllocZilsdOptPass(PassRegistry &);
107+
105108
FunctionPass *createRISCVZacasABIFixPass();
106109
void initializeRISCVZacasABIFixPass(PassRegistry &);
107110

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,10 @@ def HasStdExtZilsd : Predicate<"Subtarget->hasStdExtZilsd()">,
194194
AssemblerPredicate<(all_of FeatureStdExtZilsd),
195195
"'Zilsd' (Load/Store pair instructions)">;
196196

197+
def FeatureZilsd4ByteAlign
198+
: SubtargetFeature<"zilsd-4byte-align", "AllowZilsd4ByteAlign", "true",
199+
"Allow 4-byte alignment for Zilsd LD/SD instructions">;
200+
197201
// Multiply Extensions
198202

199203
def FeatureStdExtZmmul

llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,23 @@ let Predicates = [HasStdExtZilsd, IsRV32] in {
4747
def PseudoLD_RV32 : PseudoLoad<"ld", GPRPairRV32>;
4848
def PseudoSD_RV32 : PseudoStore<"sd", GPRPairRV32>;
4949

50+
// Pseudo instructions for load/store optimization with 2 separate registers
51+
def PseudoLD_RV32_OPT :
52+
Pseudo<(outs GPR:$rd1, GPR:$rd2),
53+
(ins GPR:$rs1, simm12_lo:$imm12), [], "", ""> {
54+
let hasSideEffects = 0;
55+
let mayLoad = 1;
56+
let mayStore = 0;
57+
}
58+
59+
def PseudoSD_RV32_OPT :
60+
Pseudo<(outs),
61+
(ins GPR:$rs1, GPR:$rs2, GPR:$rs3, simm12_lo:$imm12), [], "", ""> {
62+
let hasSideEffects = 0;
63+
let mayLoad = 0;
64+
let mayStore = 1;
65+
}
66+
5067
def : InstAlias<"ld $rd, (${rs1})", (LD_RV32 GPRPairRV32:$rd, GPR:$rs1, 0), 0>;
5168
def : InstAlias<"sd $rs2, (${rs1})", (SD_RV32 GPRPairRV32:$rs2, GPR:$rs1, 0), 0>;
5269
}

llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp

Lines changed: 219 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
// paired instruction, leveraging hardware support for paired memory accesses.
1212
// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
1313
//
14+
// Post-allocation Zilsd decomposition: Fixes invalid LD/SD instructions if
15+
// register allocation didn't provide suitable consecutive registers.
16+
//
1417
// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
1518
// merging zero store instructions, promoting loads that read directly from a
1619
// preceding store, and merging base register updates with load/store
@@ -23,6 +26,7 @@
2326

2427
#include "RISCV.h"
2528
#include "RISCVTargetMachine.h"
29+
#include "llvm/ADT/Statistic.h"
2630
#include "llvm/Analysis/AliasAnalysis.h"
2731
#include "llvm/CodeGen/Passes.h"
2832
#include "llvm/MC/TargetRegistry.h"
@@ -38,6 +42,8 @@ using namespace llvm;
3842
// pairs.
3943
static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128),
4044
cl::Hidden);
45+
STATISTIC(NumLD2LW, "Number of LD instructions split back to LW");
46+
STATISTIC(NumSD2SW, "Number of SD instructions split back to SW");
4147

4248
namespace {
4349

@@ -75,6 +81,13 @@ struct RISCVLoadStoreOpt : public MachineFunctionPass {
7581
mergePairedInsns(MachineBasicBlock::iterator I,
7682
MachineBasicBlock::iterator Paired, bool MergeForward);
7783

84+
// Post reg-alloc zilsd part
85+
bool fixInvalidRegPairOp(MachineBasicBlock &MBB,
86+
MachineBasicBlock::iterator &MBBI);
87+
bool isValidZilsdRegPair(Register First, Register Second);
88+
void splitLdSdIntoTwo(MachineBasicBlock &MBB,
89+
MachineBasicBlock::iterator &MBBI, bool IsLoad);
90+
7891
private:
7992
AliasAnalysis *AA;
8093
MachineRegisterInfo *MRI;
@@ -92,8 +105,6 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
92105
if (skipFunction(Fn.getFunction()))
93106
return false;
94107
const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
95-
if (!Subtarget.useMIPSLoadStorePairs())
96-
return false;
97108

98109
bool MadeChange = false;
99110
TII = Subtarget.getInstrInfo();
@@ -103,18 +114,34 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
103114
ModifiedRegUnits.init(*TRI);
104115
UsedRegUnits.init(*TRI);
105116

106-
for (MachineBasicBlock &MBB : Fn) {
107-
LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
117+
if (Subtarget.useMIPSLoadStorePairs()) {
118+
for (MachineBasicBlock &MBB : Fn) {
119+
LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
120+
121+
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
122+
MBBI != E;) {
123+
if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
124+
tryToPairLdStInst(MBBI))
125+
MadeChange = true;
126+
else
127+
++MBBI;
128+
}
129+
}
130+
}
108131

109-
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
110-
MBBI != E;) {
111-
if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
112-
tryToPairLdStInst(MBBI))
113-
MadeChange = true;
114-
else
115-
++MBBI;
132+
if (!Subtarget.is64Bit() && Subtarget.hasStdExtZilsd()) {
133+
for (auto &MBB : Fn) {
134+
for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) {
135+
if (fixInvalidRegPairOp(MBB, MBBI)) {
136+
MadeChange = true;
137+
// Iterator was updated by fixInvalidRegPairOp
138+
} else {
139+
++MBBI;
140+
}
141+
}
116142
}
117143
}
144+
118145
return MadeChange;
119146
}
120147

@@ -395,6 +422,187 @@ RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
395422
return NextI;
396423
}
397424

425+
//===----------------------------------------------------------------------===//
426+
// Post reg-alloc zilsd pass implementation
427+
//===----------------------------------------------------------------------===//
428+
429+
bool RISCVLoadStoreOpt::isValidZilsdRegPair(Register First, Register Second) {
430+
// Special case: First register can not be zero unless both registers are
431+
// zeros.
432+
// Spec says: LD instructions with destination x0 are processed as any other
433+
// load, but the result is discarded entirely and x1 is not written. If using
434+
// x0 as src of SD, the entire 64-bit operand is zero — i.e., register x1 is
435+
// not accessed.
436+
if (First == RISCV::X0)
437+
return Second == RISCV::X0;
438+
439+
// Check if registers form a valid even/odd pair for Zilsd
440+
unsigned FirstNum = TRI->getEncodingValue(First);
441+
unsigned SecondNum = TRI->getEncodingValue(Second);
442+
443+
// Must be consecutive and first must be even
444+
return (FirstNum % 2 == 0) && (SecondNum == FirstNum + 1);
445+
}
446+
447+
void RISCVLoadStoreOpt::splitLdSdIntoTwo(MachineBasicBlock &MBB,
448+
MachineBasicBlock::iterator &MBBI,
449+
bool IsLoad) {
450+
MachineInstr *MI = &*MBBI;
451+
DebugLoc DL = MI->getDebugLoc();
452+
453+
const MachineOperand &FirstOp = MI->getOperand(0);
454+
const MachineOperand &SecondOp = MI->getOperand(1);
455+
const MachineOperand &BaseOp = MI->getOperand(2);
456+
Register FirstReg = FirstOp.getReg();
457+
Register SecondReg = SecondOp.getReg();
458+
Register BaseReg = BaseOp.getReg();
459+
460+
// Handle both immediate and symbolic operands for offset
461+
const MachineOperand &OffsetOp = MI->getOperand(3);
462+
int BaseOffset;
463+
if (OffsetOp.isImm())
464+
BaseOffset = OffsetOp.getImm();
465+
else
466+
// For symbolic operands, extract the embedded offset
467+
BaseOffset = OffsetOp.getOffset();
468+
469+
unsigned Opc = IsLoad ? RISCV::LW : RISCV::SW;
470+
MachineInstrBuilder MIB1, MIB2;
471+
472+
// Create two separate instructions
473+
if (IsLoad) {
474+
// It's possible that first register is same as base register, when we split
475+
// it becomes incorrect because base register is overwritten, e.g.
476+
// X10, X13 = PseudoLD_RV32_OPT killed X10, 0
477+
// =>
478+
// X10 = LW X10, 0
479+
// X13 = LW killed X10, 4
480+
// we can just switch the order to resolve that:
481+
// X13 = LW X10, 4
482+
// X10 = LW killed X10, 0
483+
if (FirstReg == BaseReg) {
484+
MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
485+
.addReg(SecondReg,
486+
RegState::Define | getDeadRegState(SecondOp.isDead()))
487+
.addReg(BaseReg);
488+
MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
489+
.addReg(FirstReg,
490+
RegState::Define | getDeadRegState(FirstOp.isDead()))
491+
.addReg(BaseReg, getKillRegState(BaseOp.isKill()));
492+
493+
} else {
494+
MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
495+
.addReg(FirstReg,
496+
RegState::Define | getDeadRegState(FirstOp.isDead()))
497+
.addReg(BaseReg);
498+
499+
MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
500+
.addReg(SecondReg,
501+
RegState::Define | getDeadRegState(SecondOp.isDead()))
502+
.addReg(BaseReg, getKillRegState(BaseOp.isKill()));
503+
}
504+
505+
++NumLD2LW;
506+
LLVM_DEBUG(dbgs() << "Split LD back to two LW instructions\n");
507+
} else {
508+
assert(
509+
FirstReg != SecondReg &&
510+
"First register and second register is impossible to be same register");
511+
MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
512+
.addReg(FirstReg, getKillRegState(FirstOp.isKill()))
513+
.addReg(BaseReg);
514+
515+
MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
516+
.addReg(SecondReg, getKillRegState(SecondOp.isKill()))
517+
.addReg(BaseReg, getKillRegState(BaseOp.isKill()));
518+
519+
++NumSD2SW;
520+
LLVM_DEBUG(dbgs() << "Split SD back to two SW instructions\n");
521+
}
522+
523+
// Add offset operands - preserve symbolic references
524+
MIB1.add(OffsetOp);
525+
if (OffsetOp.isImm())
526+
MIB2.addImm(BaseOffset + 4);
527+
else if (OffsetOp.isGlobal())
528+
MIB2.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset + 4,
529+
OffsetOp.getTargetFlags());
530+
else if (OffsetOp.isCPI())
531+
MIB2.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset + 4,
532+
OffsetOp.getTargetFlags());
533+
else if (OffsetOp.isBlockAddress())
534+
MIB2.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset + 4,
535+
OffsetOp.getTargetFlags());
536+
537+
// Copy memory operands if the original instruction had them
538+
// FIXME: This is overly conservative; the new instruction accesses 4 bytes,
539+
// not 8.
540+
MIB1.cloneMemRefs(*MI);
541+
MIB2.cloneMemRefs(*MI);
542+
543+
// Remove the original paired instruction and update iterator
544+
MBBI = MBB.erase(MBBI);
545+
}
546+
547+
bool RISCVLoadStoreOpt::fixInvalidRegPairOp(MachineBasicBlock &MBB,
548+
MachineBasicBlock::iterator &MBBI) {
549+
MachineInstr *MI = &*MBBI;
550+
unsigned Opcode = MI->getOpcode();
551+
552+
// Check if this is a Zilsd pseudo that needs fixing
553+
if (Opcode != RISCV::PseudoLD_RV32_OPT && Opcode != RISCV::PseudoSD_RV32_OPT)
554+
return false;
555+
556+
bool IsLoad = Opcode == RISCV::PseudoLD_RV32_OPT;
557+
558+
const MachineOperand &FirstOp = MI->getOperand(0);
559+
const MachineOperand &SecondOp = MI->getOperand(1);
560+
Register FirstReg = FirstOp.getReg();
561+
Register SecondReg = SecondOp.getReg();
562+
563+
if (!isValidZilsdRegPair(FirstReg, SecondReg)) {
564+
// Need to split back into two instructions
565+
splitLdSdIntoTwo(MBB, MBBI, IsLoad);
566+
return true;
567+
}
568+
569+
// Registers are valid, convert to real LD/SD instruction
570+
const MachineOperand &BaseOp = MI->getOperand(2);
571+
Register BaseReg = BaseOp.getReg();
572+
DebugLoc DL = MI->getDebugLoc();
573+
// Handle both immediate and symbolic operands for offset
574+
const MachineOperand &OffsetOp = MI->getOperand(3);
575+
576+
unsigned RealOpc = IsLoad ? RISCV::LD_RV32 : RISCV::SD_RV32;
577+
578+
// Create register pair from the two individual registers
579+
unsigned RegPair = TRI->getMatchingSuperReg(FirstReg, RISCV::sub_gpr_even,
580+
&RISCV::GPRPairRegClass);
581+
// Create the real LD/SD instruction with register pair
582+
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(RealOpc));
583+
584+
if (IsLoad) {
585+
// For LD, the register pair is the destination
586+
MIB.addReg(RegPair, RegState::Define | getDeadRegState(FirstOp.isDead() &&
587+
SecondOp.isDead()));
588+
} else {
589+
// For SD, the register pair is the source
590+
MIB.addReg(RegPair, getKillRegState(FirstOp.isKill() && SecondOp.isKill()));
591+
}
592+
593+
MIB.addReg(BaseReg, getKillRegState(BaseOp.isKill()))
594+
.add(OffsetOp)
595+
.cloneMemRefs(*MI);
596+
597+
LLVM_DEBUG(dbgs() << "Converted pseudo to real instruction: " << *MIB
598+
<< "\n");
599+
600+
// Remove the pseudo instruction and update iterator
601+
MBBI = MBB.erase(MBBI);
602+
603+
return true;
604+
}
605+
398606
// Returns an instance of the Load / Store Optimization pass.
399607
FunctionPass *llvm::createRISCVLoadStoreOptPass() {
400608
return new RISCVLoadStoreOpt();

0 commit comments

Comments
 (0)