Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ add_llvm_target(RISCVCodeGen
RISCVVLOptimizer.cpp
RISCVVMV0Elimination.cpp
RISCVZacasABIFix.cpp
RISCVZilsdOptimizer.cpp
GISel/RISCVCallLowering.cpp
GISel/RISCVInstructionSelector.cpp
GISel/RISCVLegalizerInfo.cpp
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCV.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ void initializeRISCVPushPopOptPass(PassRegistry &);
FunctionPass *createRISCVLoadStoreOptPass();
void initializeRISCVLoadStoreOptPass(PassRegistry &);

FunctionPass *createRISCVPreAllocZilsdOptPass();
void initializeRISCVPreAllocZilsdOptPass(PassRegistry &);

FunctionPass *createRISCVZacasABIFixPass();
void initializeRISCVZacasABIFixPass(PassRegistry &);

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ def HasStdExtZilsd : Predicate<"Subtarget->hasStdExtZilsd()">,
AssemblerPredicate<(all_of FeatureStdExtZilsd),
"'Zilsd' (Load/Store pair instructions)">;

def FeatureZilsd4ByteAlign
: SubtargetFeature<"zilsd-4byte-align", "AllowZilsd4ByteAlign", "true",
"Allow 4-byte alignment for Zilsd LD/SD instructions">;

// Multiply Extensions

def FeatureStdExtZmmul
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,23 @@ let Predicates = [HasStdExtZilsd, IsRV32] in {
def PseudoLD_RV32 : PseudoLoad<"ld", GPRPairRV32>;
def PseudoSD_RV32 : PseudoStore<"sd", GPRPairRV32>;

// Pseudo instructions for load/store optimization with 2 separate registers
def PseudoLD_RV32_OPT :
Pseudo<(outs GPR:$rd1, GPR:$rd2),
(ins GPR:$rs1, simm12_lo:$imm12), [], "", ""> {
let hasSideEffects = 0;
let mayLoad = 1;
let mayStore = 0;
}

def PseudoSD_RV32_OPT :
Pseudo<(outs),
(ins GPR:$rs1, GPR:$rs2, GPR:$rs3, simm12_lo:$imm12), [], "", ""> {
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 1;
}

def : InstAlias<"ld $rd, (${rs1})", (LD_RV32 GPRPairRV32:$rd, GPR:$rs1, 0), 0>;
def : InstAlias<"sd $rs2, (${rs1})", (SD_RV32 GPRPairRV32:$rs2, GPR:$rs1, 0), 0>;
}
281 changes: 270 additions & 11 deletions llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
// paired instruction, leveraging hardware support for paired memory accesses.
// Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass.
//
// Post-allocation Zilsd decomposition: Fixes invalid LD/SD instructions if
// register allocation didn't provide suitable consecutive registers.
//
// NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as
// merging zero store instructions, promoting loads that read directly from a
// preceding store, and merging base register updates with load/store
Expand All @@ -23,6 +26,7 @@

#include "RISCV.h"
#include "RISCVTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/TargetRegistry.h"
Expand All @@ -38,6 +42,8 @@ using namespace llvm;
// pairs.
static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128),
cl::Hidden);
STATISTIC(NumLD2LW, "Number of LD instructions split back to LW");
STATISTIC(NumSD2SW, "Number of SD instructions split back to SW");

namespace {

Expand Down Expand Up @@ -75,6 +81,14 @@ struct RISCVLoadStoreOpt : public MachineFunctionPass {
mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired, bool MergeForward);

// Post reg-alloc zilsd part
bool fixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
bool isConsecutiveRegPair(Register First, Register Second);
void splitLdSdIntoTwo(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, bool IsLoad);
int64_t getLoadStoreOffset(const MachineInstr &MI);

private:
AliasAnalysis *AA;
MachineRegisterInfo *MRI;
Expand All @@ -91,9 +105,8 @@ INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false,
bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;

const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
if (!Subtarget.useLoadStorePairs())
return false;

bool MadeChange = false;
TII = Subtarget.getInstrInfo();
Expand All @@ -103,18 +116,34 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
ModifiedRegUnits.init(*TRI);
UsedRegUnits.init(*TRI);

for (MachineBasicBlock &MBB : Fn) {
LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
if (Subtarget.useLoadStorePairs()) {
for (MachineBasicBlock &MBB : Fn) {
LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");

for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
tryToPairLdStInst(MBBI))
MadeChange = true;
else
++MBBI;
}
}
}

for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
tryToPairLdStInst(MBBI))
MadeChange = true;
else
++MBBI;
if (!Subtarget.is64Bit() && Subtarget.hasStdExtZilsd()) {
for (auto &MBB : Fn) {
for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) {
if (fixInvalidRegPairOp(MBB, MBBI)) {
MadeChange = true;
// Iterator was updated by fixInvalidRegPairOp
} else {
++MBBI;
}
}
}
}

return MadeChange;
}

Expand Down Expand Up @@ -395,6 +424,236 @@ RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
return NextI;
}

//===----------------------------------------------------------------------===//
// Post reg-alloc zilsd pass implementation
//===----------------------------------------------------------------------===//

// Helper function to extract offset from load/store operands
int64_t RISCVLoadStoreOpt::getLoadStoreOffset(const MachineInstr &MI) {
const MachineOperand &OffsetOp = MI.getOperand(2);

// Handle immediate offset
if (OffsetOp.isImm())
return OffsetOp.getImm();

// Handle symbolic operands with MO_LO flag (from MergeBaseOffset)
if (OffsetOp.getTargetFlags() & RISCVII::MO_LO)
if (OffsetOp.isGlobal() || OffsetOp.isCPI() || OffsetOp.isBlockAddress() ||
OffsetOp.isSymbol())
return OffsetOp.getOffset();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am still not sure this is right.

If there is the MI equivalent of lw a0, %lo(sym+8)(a1), I think the getOffset() is 8. This doesn't mean that %lo(sym+4) is necessarily 8 (it depends on sym as well, which only the linker knows). We can't merge lw a0, %lo(sym+8)(a2) with lw a1, 12(a2) (this may be impossible to get in the compiler, because both should have %lo or neither), but maybe we can merge lw a0, %lo(sym+8)(a2) with lw a1, %lo(sym+12)(a2).

Maybe what is confusing the situation is that you're representing "couldn't understand an offset" with returning 0, when 0 is a valid (and not unlikely) offset.

I think this function would be clearer with a boolean return value for whether this understood an offset, and an out-parameter of the offset that was found? You might need another out parameter for the thing the offset is relative to in the %lo case.


return 0;
}

bool RISCVLoadStoreOpt::isConsecutiveRegPair(Register First, Register Second) {
// Special case: First register can not be zero
// zeros
if (First == RISCV::X0)
return true;

// Check if registers form a valid even/odd pair for Zilsd
unsigned FirstNum = TRI->getEncodingValue(First);
unsigned SecondNum = TRI->getEncodingValue(Second);

// Must be consecutive and first must be even
return (FirstNum % 2 == 0) && (SecondNum == FirstNum + 1);
Comment on lines +458 to +459
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If FirstNum == 0 and SecondNum == 1, then this will return true, when that's not a valid even-odd pair.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The case FirstNum == 0 and SecondNum == 1 will still return true.

}

void RISCVLoadStoreOpt::splitLdSdIntoTwo(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
bool IsLoad) {
MachineInstr *MI = &*MBBI;
DebugLoc DL = MI->getDebugLoc();

Register FirstReg = MI->getOperand(0).getReg();
Register SecondReg = MI->getOperand(1).getReg();
Register BaseReg = MI->getOperand(2).getReg();

// Handle both immediate and symbolic operands for offset
const MachineOperand &OffsetOp = MI->getOperand(3);
int BaseOffset;
if (OffsetOp.isImm())
BaseOffset = OffsetOp.getImm();
else
// For symbolic operands, extract the embedded offset
BaseOffset = OffsetOp.getOffset();

unsigned Opc = IsLoad ? RISCV::LW : RISCV::SW;

// Create two separate instructions
if (IsLoad) {
auto MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
.addReg(FirstReg, RegState::Define)
.addReg(BaseReg);

auto MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
.addReg(SecondReg, RegState::Define)
.addReg(BaseReg);

// Add offset operands - preserve symbolic references
if (OffsetOp.isImm()) {
MIB1.addImm(BaseOffset);
MIB2.addImm(BaseOffset + 4);
} else if (OffsetOp.isGlobal()) {
MIB1.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset,
OffsetOp.getTargetFlags());
MIB2.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset + 4,
OffsetOp.getTargetFlags());
} else if (OffsetOp.isCPI()) {
MIB1.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset,
OffsetOp.getTargetFlags());
MIB2.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset + 4,
OffsetOp.getTargetFlags());
} else if (OffsetOp.isSymbol()) {
MIB1.addExternalSymbol(OffsetOp.getSymbolName(),
OffsetOp.getTargetFlags());
MIB2.addExternalSymbol(OffsetOp.getSymbolName(),
OffsetOp.getTargetFlags());
} else if (OffsetOp.isBlockAddress()) {
MIB1.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset,
OffsetOp.getTargetFlags());
MIB2.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset + 4,
OffsetOp.getTargetFlags());
}

// Copy memory operands if the original instruction had them
// FIXME: This is overly conservative; the new instruction accesses 4 bytes,
// not 8.
if (MI->memoperands_begin() != MI->memoperands_end()) {
MIB1.cloneMemRefs(*MI);
MIB2.cloneMemRefs(*MI);
}

++NumLD2LW;
LLVM_DEBUG(dbgs() << "Split LD back to two LW instructions\n");
} else {
auto MIB1 =
BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(FirstReg).addReg(BaseReg);

auto MIB2 =
BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SecondReg).addReg(BaseReg);

// Add offset operands - preserve symbolic references
if (OffsetOp.isImm()) {
MIB1.addImm(BaseOffset);
MIB2.addImm(BaseOffset + 4);
} else if (OffsetOp.isGlobal()) {
MIB1.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset,
OffsetOp.getTargetFlags());
MIB2.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset + 4,
OffsetOp.getTargetFlags());
} else if (OffsetOp.isCPI()) {
MIB1.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset,
OffsetOp.getTargetFlags());
MIB2.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset + 4,
OffsetOp.getTargetFlags());
} else if (OffsetOp.isSymbol()) {
MIB1.addExternalSymbol(OffsetOp.getSymbolName(),
OffsetOp.getTargetFlags());
MIB2.addExternalSymbol(OffsetOp.getSymbolName(),
OffsetOp.getTargetFlags());
} else if (OffsetOp.isBlockAddress()) {
MIB1.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset,
OffsetOp.getTargetFlags());
MIB2.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset + 4,
OffsetOp.getTargetFlags());
}

// Copy memory operands if the original instruction had them
// FIXME: This is overly conservative; the new instruction accesses 4 bytes,
// not 8.
if (MI->memoperands_begin() != MI->memoperands_end()) {
MIB1.cloneMemRefs(*MI);
MIB2.cloneMemRefs(*MI);
}

++NumSD2SW;
LLVM_DEBUG(dbgs() << "Split SD back to two SW instructions\n");
}

// Remove the original paired instruction and update iterator
MBBI = MBB.erase(MBBI);
}

bool RISCVLoadStoreOpt::fixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) {
MachineInstr *MI = &*MBBI;
unsigned Opcode = MI->getOpcode();

// Check if this is a Zilsd pseudo that needs fixing
if (Opcode != RISCV::PseudoLD_RV32_OPT && Opcode != RISCV::PseudoSD_RV32_OPT)
return false;

bool IsLoad = (Opcode == RISCV::PseudoLD_RV32_OPT);

Register FirstReg = MI->getOperand(0).getReg();
Register SecondReg = MI->getOperand(1).getReg();

// Check if we have valid consecutive registers
if (!isConsecutiveRegPair(FirstReg, SecondReg)) {
// Need to split back into two instructions
splitLdSdIntoTwo(MBB, MBBI, IsLoad);
return true;
}

// Registers are valid, convert to real LD/SD instruction
Register BaseReg = MI->getOperand(2).getReg();
DebugLoc DL = MI->getDebugLoc();
// Handle both immediate and symbolic operands for offset
const MachineOperand &OffsetOp = MI->getOperand(3);
int BaseOffset;
if (OffsetOp.isImm())
BaseOffset = OffsetOp.getImm();
else
// For symbolic operands, extract the embedded offset
BaseOffset = OffsetOp.getOffset();

unsigned RealOpc = IsLoad ? RISCV::LD_RV32 : RISCV::SD_RV32;

// Create register pair from the two individual registers
unsigned RegPair = TRI->getMatchingSuperReg(FirstReg, RISCV::sub_gpr_even,
&RISCV::GPRPairRegClass);
// Create the real LD/SD instruction with register pair
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(RealOpc));

if (IsLoad) {
// For LD, the register pair is the destination
MIB.addReg(RegPair, RegState::Define);
} else {
// For SD, the register pair is the source
MIB.addReg(RegPair);
}

MIB.addReg(BaseReg);

// Add offset operand - preserve symbolic references
if (OffsetOp.isImm())
MIB.addImm(BaseOffset);
else if (OffsetOp.isGlobal())
MIB.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset,
OffsetOp.getTargetFlags());
else if (OffsetOp.isCPI())
MIB.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset,
OffsetOp.getTargetFlags());
else if (OffsetOp.isSymbol())
MIB.addExternalSymbol(OffsetOp.getSymbolName(), OffsetOp.getTargetFlags());
else if (OffsetOp.isBlockAddress())
MIB.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset,
OffsetOp.getTargetFlags());

// Copy memory operands if the original instruction had them
if (MI->memoperands_begin() != MI->memoperands_end())
MIB.cloneMemRefs(*MI);

LLVM_DEBUG(dbgs() << "Converted pseudo to real instruction: " << *MIB
<< "\n");

// Remove the pseudo instruction and update iterator
MBBI = MBB.erase(MBBI);

return true;
}

// Returns an instance of the Load / Store Optimization pass.
FunctionPass *llvm::createRISCVLoadStoreOptPass() {
return new RISCVLoadStoreOpt();
Expand Down
Loading