Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ add_llvm_target(RISCVCodeGen
RISCVVLOptimizer.cpp
RISCVVMV0Elimination.cpp
RISCVZacasABIFix.cpp
RISCVZilsdOptimizer.cpp
GISel/RISCVCallLowering.cpp
GISel/RISCVInstructionSelector.cpp
GISel/RISCVLegalizerInfo.cpp
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCV.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ void initializeRISCVPushPopOptPass(PassRegistry &);
FunctionPass *createRISCVLoadStoreOptPass();
void initializeRISCVLoadStoreOptPass(PassRegistry &);

FunctionPass *createRISCVPreAllocZilsdOptPass();
void initializeRISCVPreAllocZilsdOptPass(PassRegistry &);

FunctionPass *createRISCVZacasABIFixPass();
void initializeRISCVZacasABIFixPass(PassRegistry &);

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ def HasStdExtZilsd : Predicate<"Subtarget->hasStdExtZilsd()">,
AssemblerPredicate<(all_of FeatureStdExtZilsd),
"'Zilsd' (Load/Store pair instructions)">;

def FeatureZilsd4ByteAlign
: SubtargetFeature<"zilsd-4byte-align", "AllowZilsd4ByteAlign", "true",
"Allow 4-byte alignment for Zilsd LD/SD instructions">;

// Multiply Extensions

def FeatureStdExtZmmul
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,23 @@ let Predicates = [HasStdExtZilsd, IsRV32] in {
def PseudoLD_RV32 : PseudoLoad<"ld", GPRPairRV32>;
def PseudoSD_RV32 : PseudoStore<"sd", GPRPairRV32>;

// Pseudo instructions for load/store optimization with 2 separate registers
def PseudoLD_RV32_OPT :
Pseudo<(outs GPR:$rd1, GPR:$rd2),
(ins GPR:$rs1, simm12_lo:$imm12), [], "", ""> {
let hasSideEffects = 0;
let mayLoad = 1;
let mayStore = 0;
}

def PseudoSD_RV32_OPT :
Pseudo<(outs),
(ins GPR:$rs1, GPR:$rs2, GPR:$rs3, simm12_lo:$imm12), [], "", ""> {
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 1;
}

def : InstAlias<"ld $rd, (${rs1})", (LD_RV32 GPRPairRV32:$rd, GPR:$rs1, 0), 0>;
def : InstAlias<"sd $rs2, (${rs1})", (SD_RV32 GPRPairRV32:$rs2, GPR:$rs1, 0), 0>;
}
281 changes: 270 additions & 11 deletions llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
// paired instruction, leveraging hardware support for paired memory accesses.
// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
//
// Post-allocation Zilsd decomposition: Fixes invalid LD/SD instructions if
// register allocation didn't provide suitable consecutive registers.
//
// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
// merging zero store instructions, promoting loads that read directly from a
// preceding store, and merging base register updates with load/store
Expand All @@ -23,6 +26,7 @@

#include "RISCV.h"
#include "RISCVTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/TargetRegistry.h"
Expand All @@ -38,6 +42,8 @@ using namespace llvm;
// pairs.
static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128),
cl::Hidden);
STATISTIC(NumLD2LW, "Number of LD instructions split back to LW");
STATISTIC(NumSD2SW, "Number of SD instructions split back to SW");

namespace {

Expand Down Expand Up @@ -75,6 +81,14 @@ struct RISCVLoadStoreOpt : public MachineFunctionPass {
mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired, bool MergeForward);

// Post reg-alloc zilsd part
bool fixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
bool isConsecutiveRegPair(Register First, Register Second);
void splitLdSdIntoTwo(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, bool IsLoad);
int64_t getLoadStoreOffset(const MachineInstr &MI);

private:
AliasAnalysis *AA;
MachineRegisterInfo *MRI;
Expand All @@ -91,9 +105,8 @@ INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false,
bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;

const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
if (!Subtarget.useMIPSLoadStorePairs())
return false;

bool MadeChange = false;
TII = Subtarget.getInstrInfo();
Expand All @@ -103,18 +116,34 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
ModifiedRegUnits.init(*TRI);
UsedRegUnits.init(*TRI);

for (MachineBasicBlock &MBB : Fn) {
LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
if (Subtarget.useMIPSLoadStorePairs()) {
for (MachineBasicBlock &MBB : Fn) {
LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");

for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
tryToPairLdStInst(MBBI))
MadeChange = true;
else
++MBBI;
}
}
}

for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
tryToPairLdStInst(MBBI))
MadeChange = true;
else
++MBBI;
if (!Subtarget.is64Bit() && Subtarget.hasStdExtZilsd()) {
for (auto &MBB : Fn) {
for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) {
if (fixInvalidRegPairOp(MBB, MBBI)) {
MadeChange = true;
// Iterator was updated by fixInvalidRegPairOp
} else {
++MBBI;
}
}
}
}

return MadeChange;
}

Expand Down Expand Up @@ -395,6 +424,236 @@ RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
return NextI;
}

//===----------------------------------------------------------------------===//
// Post reg-alloc zilsd pass implementation
//===----------------------------------------------------------------------===//

// Helper function to extract offset from load/store operands
int64_t RISCVLoadStoreOpt::getLoadStoreOffset(const MachineInstr &MI) {
const MachineOperand &OffsetOp = MI.getOperand(2);

// Handle immediate offset
if (OffsetOp.isImm())
return OffsetOp.getImm();

// Handle symbolic operands with MO_LO flag (from MergeBaseOffset)
if (OffsetOp.getTargetFlags() & RISCVII::MO_LO)
if (OffsetOp.isGlobal() || OffsetOp.isCPI() || OffsetOp.isBlockAddress() ||
OffsetOp.isSymbol())
return OffsetOp.getOffset();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am still not sure this is right.

If there is the MI equivalent of lw a0, %lo(sym+8)(a1), I think the getOffset() is 8. This doesn't mean that %lo(sym+4) is necessarily 8 (it depends on sym as well, which only the linker knows). We can't merge lw a0, %lo(sym+8)(a2) with lw a1, 12(a2) (this may be impossible to get in the compiler, because both should have %lo or neither), but maybe we can merge lw a0, %lo(sym+8)(a2) with lw a1, %lo(sym+12)(a2).

Maybe what is confusing the situation is that you're representing "couldn't understand an offset" with returning 0, when 0 is a valid (and not unlikely) offset.

I think this function would be clearer with a boolean return value for whether this understood an offset, and an out-parameter of the offset that was found? You might need another out parameter for the thing the offset is relative to in the %lo case.


return 0;
}

bool RISCVLoadStoreOpt::isConsecutiveRegPair(Register First, Register Second) {
// Special case: First register can not be zero
// zeros
if (First == RISCV::X0)
return true;

// Check if registers form a valid even/odd pair for Zilsd
unsigned FirstNum = TRI->getEncodingValue(First);
unsigned SecondNum = TRI->getEncodingValue(Second);

// Must be consecutive and first must be even
return (FirstNum % 2 == 0) && (SecondNum == FirstNum + 1);
Comment on lines +458 to +459
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If FirstNum == 0 and SecondNum == 1, then this will return true, when that's not a valid even-odd pair.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The case FirstNum == 0 and SecondNum == 1 will still return true.

}

void RISCVLoadStoreOpt::splitLdSdIntoTwo(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
bool IsLoad) {
MachineInstr *MI = &*MBBI;
DebugLoc DL = MI->getDebugLoc();

Register FirstReg = MI->getOperand(0).getReg();
Register SecondReg = MI->getOperand(1).getReg();
Register BaseReg = MI->getOperand(2).getReg();

// Handle both immediate and symbolic operands for offset
const MachineOperand &OffsetOp = MI->getOperand(3);
int BaseOffset;
if (OffsetOp.isImm())
BaseOffset = OffsetOp.getImm();
else
// For symbolic operands, extract the embedded offset
BaseOffset = OffsetOp.getOffset();

unsigned Opc = IsLoad ? RISCV::LW : RISCV::SW;

// Create two separate instructions
if (IsLoad) {
auto MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
.addReg(FirstReg, RegState::Define)
.addReg(BaseReg);

auto MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
.addReg(SecondReg, RegState::Define)
.addReg(BaseReg);

// Add offset operands - preserve symbolic references
if (OffsetOp.isImm()) {
MIB1.addImm(BaseOffset);
MIB2.addImm(BaseOffset + 4);
} else if (OffsetOp.isGlobal()) {
MIB1.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset,
OffsetOp.getTargetFlags());
MIB2.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset + 4,
OffsetOp.getTargetFlags());
} else if (OffsetOp.isCPI()) {
MIB1.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset,
OffsetOp.getTargetFlags());
MIB2.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset + 4,
OffsetOp.getTargetFlags());
} else if (OffsetOp.isSymbol()) {
MIB1.addExternalSymbol(OffsetOp.getSymbolName(),
OffsetOp.getTargetFlags());
MIB2.addExternalSymbol(OffsetOp.getSymbolName(),
OffsetOp.getTargetFlags());
} else if (OffsetOp.isBlockAddress()) {
MIB1.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset,
OffsetOp.getTargetFlags());
MIB2.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset + 4,
OffsetOp.getTargetFlags());
}

// Copy memory operands if the original instruction had them
// FIXME: This is overly conservative; the new instruction accesses 4 bytes,
// not 8.
if (MI->memoperands_begin() != MI->memoperands_end()) {
MIB1.cloneMemRefs(*MI);
MIB2.cloneMemRefs(*MI);
}

++NumLD2LW;
LLVM_DEBUG(dbgs() << "Split LD back to two LW instructions\n");
} else {
auto MIB1 =
BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(FirstReg).addReg(BaseReg);

auto MIB2 =
BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SecondReg).addReg(BaseReg);

// Add offset operands - preserve symbolic references
if (OffsetOp.isImm()) {
MIB1.addImm(BaseOffset);
MIB2.addImm(BaseOffset + 4);
} else if (OffsetOp.isGlobal()) {
MIB1.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset,
OffsetOp.getTargetFlags());
MIB2.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset + 4,
OffsetOp.getTargetFlags());
} else if (OffsetOp.isCPI()) {
MIB1.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset,
OffsetOp.getTargetFlags());
MIB2.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset + 4,
OffsetOp.getTargetFlags());
} else if (OffsetOp.isSymbol()) {
MIB1.addExternalSymbol(OffsetOp.getSymbolName(),
OffsetOp.getTargetFlags());
MIB2.addExternalSymbol(OffsetOp.getSymbolName(),
OffsetOp.getTargetFlags());
} else if (OffsetOp.isBlockAddress()) {
MIB1.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset,
OffsetOp.getTargetFlags());
MIB2.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset + 4,
OffsetOp.getTargetFlags());
}

// Copy memory operands if the original instruction had them
// FIXME: This is overly conservative; the new instruction accesses 4 bytes,
// not 8.
if (MI->memoperands_begin() != MI->memoperands_end()) {
MIB1.cloneMemRefs(*MI);
MIB2.cloneMemRefs(*MI);
}

++NumSD2SW;
LLVM_DEBUG(dbgs() << "Split SD back to two SW instructions\n");
}

// Remove the original paired instruction and update iterator
MBBI = MBB.erase(MBBI);
}

bool RISCVLoadStoreOpt::fixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) {
MachineInstr *MI = &*MBBI;
unsigned Opcode = MI->getOpcode();

// Check if this is a Zilsd pseudo that needs fixing
if (Opcode != RISCV::PseudoLD_RV32_OPT && Opcode != RISCV::PseudoSD_RV32_OPT)
return false;

bool IsLoad = (Opcode == RISCV::PseudoLD_RV32_OPT);

Register FirstReg = MI->getOperand(0).getReg();
Register SecondReg = MI->getOperand(1).getReg();

// Check if we have valid consecutive registers
if (!isConsecutiveRegPair(FirstReg, SecondReg)) {
// Need to split back into two instructions
splitLdSdIntoTwo(MBB, MBBI, IsLoad);
return true;
}

// Registers are valid, convert to real LD/SD instruction
Register BaseReg = MI->getOperand(2).getReg();
DebugLoc DL = MI->getDebugLoc();
// Handle both immediate and symbolic operands for offset
const MachineOperand &OffsetOp = MI->getOperand(3);
int BaseOffset;
if (OffsetOp.isImm())
BaseOffset = OffsetOp.getImm();
else
// For symbolic operands, extract the embedded offset
BaseOffset = OffsetOp.getOffset();

unsigned RealOpc = IsLoad ? RISCV::LD_RV32 : RISCV::SD_RV32;

// Create register pair from the two individual registers
unsigned RegPair = TRI->getMatchingSuperReg(FirstReg, RISCV::sub_gpr_even,
&RISCV::GPRPairRegClass);
// Create the real LD/SD instruction with register pair
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(RealOpc));

if (IsLoad) {
// For LD, the register pair is the destination
MIB.addReg(RegPair, RegState::Define);
} else {
// For SD, the register pair is the source
MIB.addReg(RegPair);
}

MIB.addReg(BaseReg);

// Add offset operand - preserve symbolic references
if (OffsetOp.isImm())
MIB.addImm(BaseOffset);
else if (OffsetOp.isGlobal())
MIB.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset,
OffsetOp.getTargetFlags());
else if (OffsetOp.isCPI())
MIB.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset,
OffsetOp.getTargetFlags());
else if (OffsetOp.isSymbol())
MIB.addExternalSymbol(OffsetOp.getSymbolName(), OffsetOp.getTargetFlags());
else if (OffsetOp.isBlockAddress())
MIB.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset,
OffsetOp.getTargetFlags());

// Copy memory operands if the original instruction had them
if (MI->memoperands_begin() != MI->memoperands_end())
MIB.cloneMemRefs(*MI);

LLVM_DEBUG(dbgs() << "Converted pseudo to real instruction: " << *MIB
<< "\n");

// Remove the pseudo instruction and update iterator
MBBI = MBB.erase(MBBI);

return true;
}

// Returns an instance of the Load / Store Optimization pass.
FunctionPass *llvm::createRISCVLoadStoreOptPass() {
return new RISCVLoadStoreOpt();
Expand Down
Loading