Skip to content

Commit a04d3ca

Browse files
[MCA] Use Bare Reference for InstrPostProcess (#160229)
This patch makes it so that InstrPostProcess::postProcessInstruction takes in a reference to a mca::Instruction rather than a reference to a std::unique_ptr. Without this, InstrPostProcess cannot be used with MCA instruction recycling because it needs to be called on both newly created instructions and instructions that have been recycled. We only have access to a raw pointer for instructions that have been recycled rather than a reference to the std::unique_ptr that owns them. This patch adds a call in the existing instruction recycling unit test to ensure the API remains compatible with this use case.
1 parent 05d032a commit a04d3ca

File tree

7 files changed

+28
-27
lines changed

7 files changed

+28
-27
lines changed

llvm/include/llvm/MCA/CustomBehaviour.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,7 @@ class InstrPostProcess {
4949
/// object after it has been lowered from the MCInst.
5050
/// This is generally a less disruptive alternative to modifying the
5151
/// scheduling model.
52-
virtual void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
53-
const MCInst &MCI) {}
52+
virtual void postProcessInstruction(Instruction &Inst, const MCInst &MCI) {}
5453

5554
// The resetState() method gets invoked at the beginning of each code region
5655
// so that targets that override this function can clear any state that they

llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121

2222
namespace llvm::mca {
2323

24-
void AMDGPUInstrPostProcess::postProcessInstruction(
25-
std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
24+
void AMDGPUInstrPostProcess::postProcessInstruction(Instruction &Inst,
25+
const MCInst &MCI) {
2626
switch (MCI.getOpcode()) {
2727
case AMDGPU::S_WAITCNT:
2828
case AMDGPU::S_WAITCNT_soft:
@@ -44,7 +44,7 @@ void AMDGPUInstrPostProcess::postProcessInstruction(
4444

4545
// s_waitcnt instructions encode important information as immediate operands
4646
// which are lost during the MCInst -> mca::Instruction lowering.
47-
void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst,
47+
void AMDGPUInstrPostProcess::processWaitCnt(Instruction &Inst,
4848
const MCInst &MCI) {
4949
for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) {
5050
MCAOperand Op;
@@ -55,7 +55,7 @@ void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst,
5555
Op = MCAOperand::createImm(MCOp.getImm());
5656
}
5757
Op.setIndex(Idx);
58-
Inst->addOperand(Op);
58+
Inst.addOperand(Op);
5959
}
6060
}
6161

llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,15 @@ namespace llvm {
2626
namespace mca {
2727

2828
class AMDGPUInstrPostProcess : public InstrPostProcess {
29-
void processWaitCnt(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
29+
void processWaitCnt(Instruction &Inst, const MCInst &MCI);
3030

3131
public:
3232
AMDGPUInstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
3333
: InstrPostProcess(STI, MCII) {}
3434

3535
~AMDGPUInstrPostProcess() = default;
3636

37-
void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
38-
const MCInst &MCI) override;
37+
void postProcessInstruction(Instruction &Inst, const MCInst &MCI) override;
3938
};
4039

4140
struct WaitCntInfo {

llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,22 @@
2020
namespace llvm {
2121
namespace mca {
2222

23-
void X86InstrPostProcess::setMemBarriers(std::unique_ptr<Instruction> &Inst,
24-
const MCInst &MCI) {
23+
void X86InstrPostProcess::setMemBarriers(Instruction &Inst, const MCInst &MCI) {
2524
switch (MCI.getOpcode()) {
2625
case X86::MFENCE:
27-
Inst->setLoadBarrier(true);
28-
Inst->setStoreBarrier(true);
26+
Inst.setLoadBarrier(true);
27+
Inst.setStoreBarrier(true);
2928
break;
3029
case X86::LFENCE:
31-
Inst->setLoadBarrier(true);
30+
Inst.setLoadBarrier(true);
3231
break;
3332
case X86::SFENCE:
34-
Inst->setStoreBarrier(true);
33+
Inst.setStoreBarrier(true);
3534
break;
3635
}
3736
}
3837

39-
void X86InstrPostProcess::useStackEngine(std::unique_ptr<Instruction> &Inst,
40-
const MCInst &MCI) {
38+
void X86InstrPostProcess::useStackEngine(Instruction &Inst, const MCInst &MCI) {
4139
// TODO(boomanaiden154): We currently do not handle PUSHF/POPF because we
4240
// have not done the necessary benchmarking to see if they are also
4341
// optimized by the stack engine.
@@ -46,18 +44,18 @@ void X86InstrPostProcess::useStackEngine(std::unique_ptr<Instruction> &Inst,
4644
// delay subsequent rsp using non-stack instructions.
4745
if (X86::isPOP(MCI.getOpcode()) || X86::isPUSH(MCI.getOpcode())) {
4846
auto *StackRegisterDef =
49-
llvm::find_if(Inst->getDefs(), [](const WriteState &State) {
47+
llvm::find_if(Inst.getDefs(), [](const WriteState &State) {
5048
return State.getRegisterID() == X86::RSP;
5149
});
5250
assert(
53-
StackRegisterDef != Inst->getDefs().end() &&
51+
StackRegisterDef != Inst.getDefs().end() &&
5452
"Expected push instruction to implicitly use stack pointer register.");
55-
Inst->getDefs().erase(StackRegisterDef);
53+
Inst.getDefs().erase(StackRegisterDef);
5654
}
5755
}
5856

59-
void X86InstrPostProcess::postProcessInstruction(
60-
std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
57+
void X86InstrPostProcess::postProcessInstruction(Instruction &Inst,
58+
const MCInst &MCI) {
6159
// Set IsALoadBarrier and IsAStoreBarrier flags.
6260
setMemBarriers(Inst, MCI);
6361
useStackEngine(Inst, MCI);

llvm/lib/Target/X86/MCA/X86CustomBehaviour.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,21 +26,20 @@ namespace mca {
2626
class X86InstrPostProcess : public InstrPostProcess {
2727
/// Called within X86InstrPostProcess to specify certain instructions
2828
/// as load and store barriers.
29-
void setMemBarriers(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
29+
void setMemBarriers(Instruction &Inst, const MCInst &MCI);
3030

3131
/// Called within X86InstrPostPorcess to remove some rsp read operands
3232
/// on stack instructions to better simulate the stack engine. We currently
3333
/// do not model features of the stack engine like sync uops.
34-
void useStackEngine(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
34+
void useStackEngine(Instruction &Inst, const MCInst &MCI);
3535

3636
public:
3737
X86InstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
3838
: InstrPostProcess(STI, MCII) {}
3939

4040
~X86InstrPostProcess() = default;
4141

42-
void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
43-
const MCInst &MCI) override;
42+
void postProcessInstruction(Instruction &Inst, const MCInst &MCI) override;
4443
};
4544

4645
} // namespace mca

llvm/tools/llvm-mca/llvm-mca.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,7 @@ int main(int argc, char **argv) {
668668
return 1;
669669
}
670670

671-
IPP->postProcessInstruction(Inst.get(), MCI);
671+
IPP->postProcessInstruction(*Inst.get(), MCI);
672672
InstToInstruments.insert({&MCI, Instruments});
673673
LoweredSequence.emplace_back(std::move(Inst.get()));
674674
}

llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ TEST_F(X86TestBase, TestInstructionRecycling) {
130130
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100);
131131
IB.setInstRecycleCallback(GetRecycledInst);
132132

133+
// Setup a generic IPP that does not do anything (as it is not target
134+
// specific) for testing purposes.
135+
auto IPP = std::make_unique<InstrPostProcess>(*STI, *MCII);
136+
133137
const SmallVector<mca::Instrument *> Instruments;
134138
// Tile size = 7
135139
for (unsigned i = 0U, E = MCIs.size(); i < E;) {
@@ -147,8 +151,10 @@ TEST_F(X86TestBase, TestInstructionRecycling) {
147151
});
148152
ASSERT_FALSE(bool(RemainingE));
149153
ASSERT_TRUE(RecycledInst);
154+
IPP->postProcessInstruction(*RecycledInst, MCIs[i]);
150155
ISM.addRecycledInst(RecycledInst);
151156
} else {
157+
IPP->postProcessInstruction(*InstOrErr.get(), MCIs[i]);
152158
ISM.addInst(std::move(InstOrErr.get()));
153159
}
154160
}

0 commit comments

Comments
 (0)