1111// paired instruction, leveraging hardware support for paired memory accesses.
1212// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
1313//
14+ // Post-allocation Zilsd decomposition: Fixes invalid LD/SD instructions if
15+ // register allocation didn't provide suitable consecutive registers.
16+ //
1417// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
1518// merging zero store instructions, promoting loads that read directly from a
1619// preceding store, and merging base register updates with load/store
2326
2427#include " RISCV.h"
2528#include " RISCVTargetMachine.h"
29+ #include " llvm/ADT/Statistic.h"
2630#include " llvm/Analysis/AliasAnalysis.h"
2731#include " llvm/CodeGen/Passes.h"
2832#include " llvm/MC/TargetRegistry.h"
@@ -38,6 +42,8 @@ using namespace llvm;
3842// pairs.
3943static cl::opt<unsigned > LdStLimit (" riscv-load-store-scan-limit" , cl::init(128 ),
4044 cl::Hidden);
45+ STATISTIC (NumLD2LW, " Number of LD instructions split back to LW" );
46+ STATISTIC (NumSD2SW, " Number of SD instructions split back to SW" );
4147
4248namespace {
4349
@@ -75,6 +81,13 @@ struct RISCVLoadStoreOpt : public MachineFunctionPass {
7581 mergePairedInsns (MachineBasicBlock::iterator I,
7682 MachineBasicBlock::iterator Paired, bool MergeForward);
7783
84+ // Post reg-alloc zilsd part
85+ bool fixInvalidRegPairOp (MachineBasicBlock &MBB,
86+ MachineBasicBlock::iterator &MBBI);
87+ bool isValidZilsdRegPair (Register First, Register Second);
88+ void splitLdSdIntoTwo (MachineBasicBlock &MBB,
89+ MachineBasicBlock::iterator &MBBI, bool IsLoad);
90+
7891private:
7992 AliasAnalysis *AA;
8093 MachineRegisterInfo *MRI;
@@ -92,8 +105,6 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
92105 if (skipFunction (Fn.getFunction ()))
93106 return false ;
94107 const RISCVSubtarget &Subtarget = Fn.getSubtarget <RISCVSubtarget>();
95- if (!Subtarget.useMIPSLoadStorePairs ())
96- return false ;
97108
98109 bool MadeChange = false ;
99110 TII = Subtarget.getInstrInfo ();
@@ -103,18 +114,34 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
103114 ModifiedRegUnits.init (*TRI);
104115 UsedRegUnits.init (*TRI);
105116
106- for (MachineBasicBlock &MBB : Fn) {
107- LLVM_DEBUG (dbgs () << " MBB: " << MBB.getName () << " \n " );
117+ if (Subtarget.useMIPSLoadStorePairs ()) {
118+ for (MachineBasicBlock &MBB : Fn) {
119+ LLVM_DEBUG (dbgs () << " MBB: " << MBB.getName () << " \n " );
120+
121+ for (MachineBasicBlock::iterator MBBI = MBB.begin (), E = MBB.end ();
122+ MBBI != E;) {
123+ if (TII->isPairableLdStInstOpc (MBBI->getOpcode ()) &&
124+ tryToPairLdStInst (MBBI))
125+ MadeChange = true ;
126+ else
127+ ++MBBI;
128+ }
129+ }
130+ }
108131
109- for (MachineBasicBlock::iterator MBBI = MBB.begin (), E = MBB.end ();
110- MBBI != E;) {
111- if (TII->isPairableLdStInstOpc (MBBI->getOpcode ()) &&
112- tryToPairLdStInst (MBBI))
113- MadeChange = true ;
114- else
115- ++MBBI;
132+ if (!Subtarget.is64Bit () && Subtarget.hasStdExtZilsd ()) {
133+ for (auto &MBB : Fn) {
134+ for (auto MBBI = MBB.begin (), E = MBB.end (); MBBI != E;) {
135+ if (fixInvalidRegPairOp (MBB, MBBI)) {
136+ MadeChange = true ;
137+ // Iterator was updated by fixInvalidRegPairOp
138+ } else {
139+ ++MBBI;
140+ }
141+ }
116142 }
117143 }
144+
118145 return MadeChange;
119146}
120147
@@ -395,6 +422,187 @@ RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
395422 return NextI;
396423}
397424
425+ // ===----------------------------------------------------------------------===//
426+ // Post reg-alloc zilsd pass implementation
427+ // ===----------------------------------------------------------------------===//
428+
429+ bool RISCVLoadStoreOpt::isValidZilsdRegPair (Register First, Register Second) {
430+ // Special case: First register can not be zero unless both registers are
431+ // zeros.
432+ // Spec says: LD instructions with destination x0 are processed as any other
433+ // load, but the result is discarded entirely and x1 is not written. If using
434+ // x0 as src of SD, the entire 64-bit operand is zero — i.e., register x1 is
435+ // not accessed.
436+ if (First == RISCV::X0)
437+ return Second == RISCV::X0;
438+
439+ // Check if registers form a valid even/odd pair for Zilsd
440+ unsigned FirstNum = TRI->getEncodingValue (First);
441+ unsigned SecondNum = TRI->getEncodingValue (Second);
442+
443+ // Must be consecutive and first must be even
444+ return (FirstNum % 2 == 0 ) && (SecondNum == FirstNum + 1 );
445+ }
446+
447+ void RISCVLoadStoreOpt::splitLdSdIntoTwo (MachineBasicBlock &MBB,
448+ MachineBasicBlock::iterator &MBBI,
449+ bool IsLoad) {
450+ MachineInstr *MI = &*MBBI;
451+ DebugLoc DL = MI->getDebugLoc ();
452+
453+ const MachineOperand &FirstOp = MI->getOperand (0 );
454+ const MachineOperand &SecondOp = MI->getOperand (1 );
455+ const MachineOperand &BaseOp = MI->getOperand (2 );
456+ Register FirstReg = FirstOp.getReg ();
457+ Register SecondReg = SecondOp.getReg ();
458+ Register BaseReg = BaseOp.getReg ();
459+
460+ // Handle both immediate and symbolic operands for offset
461+ const MachineOperand &OffsetOp = MI->getOperand (3 );
462+ int BaseOffset;
463+ if (OffsetOp.isImm ())
464+ BaseOffset = OffsetOp.getImm ();
465+ else
466+ // For symbolic operands, extract the embedded offset
467+ BaseOffset = OffsetOp.getOffset ();
468+
469+ unsigned Opc = IsLoad ? RISCV::LW : RISCV::SW;
470+ MachineInstrBuilder MIB1, MIB2;
471+
472+ // Create two separate instructions
473+ if (IsLoad) {
474+ // It's possible that first register is same as base register, when we split
475+ // it becomes incorrect because base register is overwritten, e.g.
476+ // X10, X13 = PseudoLD_RV32_OPT killed X10, 0
477+ // =>
478+ // X10 = LW X10, 0
479+ // X13 = LW killed X10, 4
480+ // we can just switch the order to resolve that:
481+ // X13 = LW X10, 4
482+ // X10 = LW killed X10, 0
483+ if (FirstReg == BaseReg) {
484+ MIB2 = BuildMI (MBB, MBBI, DL, TII->get (Opc))
485+ .addReg (SecondReg,
486+ RegState::Define | getDeadRegState (SecondOp.isDead ()))
487+ .addReg (BaseReg);
488+ MIB1 = BuildMI (MBB, MBBI, DL, TII->get (Opc))
489+ .addReg (FirstReg,
490+ RegState::Define | getDeadRegState (FirstOp.isDead ()))
491+ .addReg (BaseReg, getKillRegState (BaseOp.isKill ()));
492+
493+ } else {
494+ MIB1 = BuildMI (MBB, MBBI, DL, TII->get (Opc))
495+ .addReg (FirstReg,
496+ RegState::Define | getDeadRegState (FirstOp.isDead ()))
497+ .addReg (BaseReg);
498+
499+ MIB2 = BuildMI (MBB, MBBI, DL, TII->get (Opc))
500+ .addReg (SecondReg,
501+ RegState::Define | getDeadRegState (SecondOp.isDead ()))
502+ .addReg (BaseReg, getKillRegState (BaseOp.isKill ()));
503+ }
504+
505+ ++NumLD2LW;
506+ LLVM_DEBUG (dbgs () << " Split LD back to two LW instructions\n " );
507+ } else {
508+ assert (
509+ FirstReg != SecondReg &&
510+ " First register and second register is impossible to be same register" );
511+ MIB1 = BuildMI (MBB, MBBI, DL, TII->get (Opc))
512+ .addReg (FirstReg, getKillRegState (FirstOp.isKill ()))
513+ .addReg (BaseReg);
514+
515+ MIB2 = BuildMI (MBB, MBBI, DL, TII->get (Opc))
516+ .addReg (SecondReg, getKillRegState (SecondOp.isKill ()))
517+ .addReg (BaseReg, getKillRegState (BaseOp.isKill ()));
518+
519+ ++NumSD2SW;
520+ LLVM_DEBUG (dbgs () << " Split SD back to two SW instructions\n " );
521+ }
522+
523+ // Add offset operands - preserve symbolic references
524+ MIB1.add (OffsetOp);
525+ if (OffsetOp.isImm ())
526+ MIB2.addImm (BaseOffset + 4 );
527+ else if (OffsetOp.isGlobal ())
528+ MIB2.addGlobalAddress (OffsetOp.getGlobal (), BaseOffset + 4 ,
529+ OffsetOp.getTargetFlags ());
530+ else if (OffsetOp.isCPI ())
531+ MIB2.addConstantPoolIndex (OffsetOp.getIndex (), BaseOffset + 4 ,
532+ OffsetOp.getTargetFlags ());
533+ else if (OffsetOp.isBlockAddress ())
534+ MIB2.addBlockAddress (OffsetOp.getBlockAddress (), BaseOffset + 4 ,
535+ OffsetOp.getTargetFlags ());
536+
537+ // Copy memory operands if the original instruction had them
538+ // FIXME: This is overly conservative; the new instruction accesses 4 bytes,
539+ // not 8.
540+ MIB1.cloneMemRefs (*MI);
541+ MIB2.cloneMemRefs (*MI);
542+
543+ // Remove the original paired instruction and update iterator
544+ MBBI = MBB.erase (MBBI);
545+ }
546+
547+ bool RISCVLoadStoreOpt::fixInvalidRegPairOp (MachineBasicBlock &MBB,
548+ MachineBasicBlock::iterator &MBBI) {
549+ MachineInstr *MI = &*MBBI;
550+ unsigned Opcode = MI->getOpcode ();
551+
552+ // Check if this is a Zilsd pseudo that needs fixing
553+ if (Opcode != RISCV::PseudoLD_RV32_OPT && Opcode != RISCV::PseudoSD_RV32_OPT)
554+ return false ;
555+
556+ bool IsLoad = Opcode == RISCV::PseudoLD_RV32_OPT;
557+
558+ const MachineOperand &FirstOp = MI->getOperand (0 );
559+ const MachineOperand &SecondOp = MI->getOperand (1 );
560+ Register FirstReg = FirstOp.getReg ();
561+ Register SecondReg = SecondOp.getReg ();
562+
563+ if (!isValidZilsdRegPair (FirstReg, SecondReg)) {
564+ // Need to split back into two instructions
565+ splitLdSdIntoTwo (MBB, MBBI, IsLoad);
566+ return true ;
567+ }
568+
569+ // Registers are valid, convert to real LD/SD instruction
570+ const MachineOperand &BaseOp = MI->getOperand (2 );
571+ Register BaseReg = BaseOp.getReg ();
572+ DebugLoc DL = MI->getDebugLoc ();
573+ // Handle both immediate and symbolic operands for offset
574+ const MachineOperand &OffsetOp = MI->getOperand (3 );
575+
576+ unsigned RealOpc = IsLoad ? RISCV::LD_RV32 : RISCV::SD_RV32;
577+
578+ // Create register pair from the two individual registers
579+ unsigned RegPair = TRI->getMatchingSuperReg (FirstReg, RISCV::sub_gpr_even,
580+ &RISCV::GPRPairRegClass);
581+ // Create the real LD/SD instruction with register pair
582+ MachineInstrBuilder MIB = BuildMI (MBB, MBBI, DL, TII->get (RealOpc));
583+
584+ if (IsLoad) {
585+ // For LD, the register pair is the destination
586+ MIB.addReg (RegPair, RegState::Define | getDeadRegState (FirstOp.isDead () &&
587+ SecondOp.isDead ()));
588+ } else {
589+ // For SD, the register pair is the source
590+ MIB.addReg (RegPair, getKillRegState (FirstOp.isKill () && SecondOp.isKill ()));
591+ }
592+
593+ MIB.addReg (BaseReg, getKillRegState (BaseOp.isKill ()))
594+ .add (OffsetOp)
595+ .cloneMemRefs (*MI);
596+
597+ LLVM_DEBUG (dbgs () << " Converted pseudo to real instruction: " << *MIB
598+ << " \n " );
599+
600+ // Remove the pseudo instruction and update iterator
601+ MBBI = MBB.erase (MBBI);
602+
603+ return true ;
604+ }
605+
398606// Returns an instance of the Load / Store Optimization pass.
399607FunctionPass *llvm::createRISCVLoadStoreOptPass () {
400608 return new RISCVLoadStoreOpt ();
0 commit comments