-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[llvm][RISCV] Implement Zilsd load/store pair optimization #158640
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
4bbf382
5722750
569a86c
ce8fc11
2ff058f
a4cca81
294d83e
14c0463
57c10b5
7c9b744
90f3aa4
ed57a2c
fff9d4b
483104e
66d95f1
c4c4925
7c377e3
fe67c22
696bad8
d79e21c
2a87065
44f5809
9453b78
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -11,6 +11,9 @@ | |||||||
| // paired instruction, leveraging hardware support for paired memory accesses. | ||||||||
| // Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass. | ||||||||
| // | ||||||||
| // Post-allocation Zilsd decomposition: Fixes invalid LD/SD instructions if | ||||||||
| // register allocation didn't provide suitable consecutive registers. | ||||||||
| // | ||||||||
| // NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as | ||||||||
| // merging zero store instructions, promoting loads that read directly from a | ||||||||
| // preceding store, and merging base register updates with load/store | ||||||||
|
|
@@ -23,6 +26,7 @@ | |||||||
|
|
||||||||
| #include "RISCV.h" | ||||||||
| #include "RISCVTargetMachine.h" | ||||||||
| #include "llvm/ADT/Statistic.h" | ||||||||
| #include "llvm/Analysis/AliasAnalysis.h" | ||||||||
| #include "llvm/CodeGen/Passes.h" | ||||||||
| #include "llvm/MC/TargetRegistry.h" | ||||||||
|
|
@@ -38,6 +42,8 @@ using namespace llvm; | |||||||
| // pairs. | ||||||||
| static cl::opt<unsigned> LdStLimit("riscv-load-store-scan-limit", cl::init(128), | ||||||||
| cl::Hidden); | ||||||||
| STATISTIC(NumLD2LW, "Number of LD instructions split back to LW"); | ||||||||
| STATISTIC(NumSD2SW, "Number of SD instructions split back to SW"); | ||||||||
|
|
||||||||
| namespace { | ||||||||
|
|
||||||||
|
|
@@ -75,6 +81,13 @@ struct RISCVLoadStoreOpt : public MachineFunctionPass { | |||||||
| mergePairedInsns(MachineBasicBlock::iterator I, | ||||||||
| MachineBasicBlock::iterator Paired, bool MergeForward); | ||||||||
|
|
||||||||
| // Post reg-alloc zilsd part | ||||||||
| bool fixInvalidRegPairOp(MachineBasicBlock &MBB, | ||||||||
| MachineBasicBlock::iterator &MBBI); | ||||||||
| bool isConsecutiveRegPair(Register First, Register Second); | ||||||||
| void splitLdSdIntoTwo(MachineBasicBlock &MBB, | ||||||||
| MachineBasicBlock::iterator &MBBI, bool IsLoad); | ||||||||
|
|
||||||||
| private: | ||||||||
| AliasAnalysis *AA; | ||||||||
| MachineRegisterInfo *MRI; | ||||||||
|
|
@@ -91,9 +104,8 @@ INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false, | |||||||
| bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { | ||||||||
| if (skipFunction(Fn.getFunction())) | ||||||||
| return false; | ||||||||
|
|
||||||||
| const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>(); | ||||||||
| if (!Subtarget.useMIPSLoadStorePairs()) | ||||||||
| return false; | ||||||||
|
|
||||||||
| bool MadeChange = false; | ||||||||
| TII = Subtarget.getInstrInfo(); | ||||||||
|
|
@@ -103,18 +115,34 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { | |||||||
| ModifiedRegUnits.init(*TRI); | ||||||||
| UsedRegUnits.init(*TRI); | ||||||||
|
|
||||||||
| for (MachineBasicBlock &MBB : Fn) { | ||||||||
| LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); | ||||||||
| if (Subtarget.useMIPSLoadStorePairs()) { | ||||||||
| for (MachineBasicBlock &MBB : Fn) { | ||||||||
| LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); | ||||||||
|
|
||||||||
| for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); | ||||||||
| MBBI != E;) { | ||||||||
| if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) && | ||||||||
| tryToPairLdStInst(MBBI)) | ||||||||
| MadeChange = true; | ||||||||
| else | ||||||||
| ++MBBI; | ||||||||
| } | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); | ||||||||
| MBBI != E;) { | ||||||||
| if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) && | ||||||||
| tryToPairLdStInst(MBBI)) | ||||||||
| MadeChange = true; | ||||||||
| else | ||||||||
| ++MBBI; | ||||||||
| if (!Subtarget.is64Bit() && Subtarget.hasStdExtZilsd()) { | ||||||||
| for (auto &MBB : Fn) { | ||||||||
| for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { | ||||||||
| if (fixInvalidRegPairOp(MBB, MBBI)) { | ||||||||
| MadeChange = true; | ||||||||
| // Iterator was updated by fixInvalidRegPairOp | ||||||||
| } else { | ||||||||
| ++MBBI; | ||||||||
| } | ||||||||
| } | ||||||||
topperc marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| return MadeChange; | ||||||||
| } | ||||||||
|
|
||||||||
|
|
@@ -395,6 +423,219 @@ RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, | |||||||
| return NextI; | ||||||||
| } | ||||||||
|
|
||||||||
| //===----------------------------------------------------------------------===// | ||||||||
| // Post reg-alloc zilsd pass implementation | ||||||||
| //===----------------------------------------------------------------------===// | ||||||||
|
|
||||||||
| bool RISCVLoadStoreOpt::isConsecutiveRegPair(Register First, Register Second) { | ||||||||
| // Special case: First register can not be zero | ||||||||
|
||||||||
| // zeros | ||||||||
|
||||||||
| // Special case: First register can not be zero | |
| // zeros | |
| // Special case: First register can not be zero |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't we need to check that Second is also X0?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes I think we should
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code here isn't great, but this does now do the correct thing.
if (First == X0)
return (Second == x0);
Would be equivalent to these two initial ifs and simpler.
4vtomat marked this conversation as resolved.
Show resolved
Hide resolved
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible for BaseReg to be the same as FirstReg? If that happens the next instruction will get the wrong value for its base.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Test case that miscompiles with -mattr=+zilsd,+unaligned-scalar-mem
declare void @bar(i32, i32, i32)
define void @foo(ptr %x) {
%a = load i32, ptr %x, align 4
%b = getelementptr i32, ptr %x, i32 1
%c = load i32, ptr %b, align 4
call void @bar(i32 %a, i32 1, i32 %c)
ret void
}
It produces
lw a0, 0(a0)
lw a2, 4(a0)
The second load is using the result of the first as a pointer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right, it's possible to be same, I just use early clobber to resolve this but not sure it's too aggressive?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the other choice is to just emit the two loads in the opposite order. i.e. for the case above,
lw a2, 4(a0)
lw a0, 2(a0)
Would be acceptable, and would prevent needing the earlyclobber. This could be a follow-up change though.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can pass FirstReg as a 5th argument to BuildMI instead of explicitly using .addReg(FirstReg, RegState::Define)
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should preserve the Kill flag for base register.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that right!
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we can get here, as the offset of an external symbol is always 0.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@4vtomat this was not addressed. And if it can happen, it would be incorrect. As now we would have 2 4-byte loads/stores accessing the same memory. We would need an offset for the second access to maintain the behavior of the paired instruction.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sorry about that, I just forgot to push some fixes and I thought I did..
I think it can be removed lol
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use memoperands_empty()? Though I'm not sure why we need this check at all. Doesn't cloneMemRefs work even if there are no memory operands?
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should preserve the kill flags for the registers. Need to be careful if FirstReg and SecondReg are the same register.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we already ensure FirstReg and SecondReg is not same register in pre-ra pass, I think there's no something like CSE happened between pre-ra pass and post-ra pass?
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use memoperands_empty()? Though I'm not sure why we need this check at all. Doesn't cloneMemRefs work even if there are no memory operands?
topperc marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
topperc marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why are you bothering? Just use MIB.add(OffsetOp) I think.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@4vtomat was this addressed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
addressed!
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use memoperands_empty()? Though I'm not sure why we need this check at all. Doesn't cloneMemRefs work even if there are no memory operands?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can just remove the check lol
Uh oh!
There was an error while loading. Please reload this page.