Skip to content

Commit 710b929

Browse files
authored
Add naive branch predictor unit (#29)
2 parents d63c493 + 15a4547 commit 710b929

File tree

10 files changed

+174
-14
lines changed

10 files changed

+174
-14
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ set(_MCAVIEWS_SOURCE_FILES
9797
set(_CUSTOMHW_SOURCE_FILES
9898
CustomHWUnits/CustomSourceMgr.cpp
9999
CustomHWUnits/MCADLSUnit.cpp
100+
CustomHWUnits/NaiveBranchPredictorUnit.cpp
100101
)
101102

102103
set(_CUSTOM_STAGES_SOURCE_FILES
@@ -118,6 +119,7 @@ set(_SOURCE_FILES
118119
${_CUSTOM_STAGES_SOURCE_FILES}
119120
${_BROKERS_SOURCE_FILES}
120121
MCAWorker.cpp
122+
MetadataCategories.cpp
121123
PipelinePrinter.cpp
122124
)
123125

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#ifndef LLVM_MCAD_ABSTRACT_BRANCH_PREDICTOR_UNIT_H
2+
#define LLVM_MCAD_ABSTRACT_BRANCH_PREDICTOR_UNIT_H
3+
4+
#include <optional>
5+
#include "llvm/MCA/Instruction.h"
6+
#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
7+
#include "MetadataRegistry.h"
8+
#include "MetadataCategories.h"
9+
10+
namespace llvm {
11+
namespace mcad {
12+
13+
class AbstractBranchPredictorUnit : public llvm::mca::HardwareUnit {
14+
15+
public:
16+
~AbstractBranchPredictorUnit() {}
17+
virtual void recordTakenBranch(MDInstrAddr IA, MDInstrAddr destAddr) = 0;
18+
virtual MDInstrAddr predictBranch(MDInstrAddr IA) = 0;
19+
virtual unsigned getMispredictionPenalty() = 0;
20+
21+
};
22+
23+
}
24+
}
25+
26+
#endif
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#include <map>
2+
#include "CustomHWUnits/NaiveBranchPredictorUnit.h"
3+
4+
namespace llvm {
5+
namespace mcad {
6+
7+
void NaiveBranchPredictorUnit::recordTakenBranch(MDInstrAddr IA, MDInstrAddr destAddr) {
8+
branchHistory[IA] = destAddr;
9+
}
10+
11+
MDInstrAddr NaiveBranchPredictorUnit::predictBranch(MDInstrAddr IA) {
12+
if(branchHistory.find(IA) != branchHistory.end()) {
13+
return branchHistory[IA];
14+
}
15+
// We have no history on this; predict a fall-through branch
16+
// FIXME: fix this to use actual branch instruction size, which is likely
17+
// larger than one byte.
18+
return MDInstrAddr { IA.addr + 1 };
19+
}
20+
21+
}
22+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#ifndef LLVM_MCAD_NAIVE_BRANCH_PREDICTOR_UNIT_H
2+
#define LLVM_MCAD_NAIVE_BRANCH_PREDICTOR_UNIT_H
3+
4+
#include <map>
5+
#include "CustomHWUnits/AbstractBranchPredictorUnit.h"
6+
7+
namespace llvm {
8+
namespace mcad {
9+
10+
class NaiveBranchPredictorUnit : public AbstractBranchPredictorUnit {
11+
unsigned mispredictionPenalty;
12+
std::map<MDInstrAddr, MDInstrAddr> branchHistory = {};
13+
14+
public:
15+
NaiveBranchPredictorUnit(unsigned mispredictionPenalty = 20) : mispredictionPenalty(mispredictionPenalty) {};
16+
17+
void recordTakenBranch(MDInstrAddr IA, MDInstrAddr destAddr) override;
18+
MDInstrAddr predictBranch(MDInstrAddr IA) override;
19+
unsigned getMispredictionPenalty() override {
20+
return mispredictionPenalty;
21+
}
22+
23+
};
24+
25+
}
26+
}
27+
28+
#endif

CustomStages/MCADFetchDelayStage.cpp

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1-
#include <iostream>
21
#include "CustomStages/MCADFetchDelayStage.h"
2+
#include "MetadataCategories.h"
3+
#include "llvm/Support/Debug.h"
4+
#define DEBUG_TYPE "llvm-mca"
5+
6+
#include <iostream>
7+
#include <iomanip>
8+
#include <optional>
39

410
namespace llvm {
511
namespace mcad {
@@ -34,11 +40,35 @@ llvm::Error MCADFetchDelayStage::execute(llvm::mca::InstRef &IR) {
3440
const llvm::MCInstrDesc &MCID = MCII.get(I->getOpcode());
3541
bool immediatelyExecute = true;
3642
unsigned delayCyclesLeft = 0;
37-
if(MCID.isBranch()) {
38-
// delayed, will have to wait
39-
delayCyclesLeft = 100;
43+
std::optional<MDInstrAddr> instrAddr = getMDInstrAddrForInstr(MD, IR);
44+
// Check if previous instruction was a branch, and if so if the predicted
45+
// branch target matched what we ended up executing
46+
if(predictedNextInstrAddr.has_value() && instrAddr.has_value()) {
47+
if(previousInstrAddr.has_value()) {
48+
BPU.recordTakenBranch(*previousInstrAddr, *instrAddr);
49+
}
50+
if(*predictedNextInstrAddr != *instrAddr) {
51+
// Previous prediction was wrong; this instruction will have extra
52+
// latency due to misprediction.
53+
delayCyclesLeft += BPU.getMispredictionPenalty();
54+
LLVM_DEBUG(dbgs() << "[MCAD FetchDelayStage] Previous branch at ");
55+
LLVM_DEBUG(dbgs().write_hex(instrAddr->addr));
56+
LLVM_DEBUG(dbgs() << " mispredicted, delaying next instruction by "
57+
<< delayCyclesLeft << " cycle(s).\n");
58+
} else {
59+
LLVM_DEBUG(dbgs() << "[MCAD FetchDelayStage] Previous branch at ");
60+
LLVM_DEBUG(dbgs().write_hex(instrAddr->addr));
61+
LLVM_DEBUG(dbgs() << " predicted correctly.\n" );
62+
}
63+
}
64+
// Update branch prediction state
65+
if(MCID.isBranch() && instrAddr.has_value()) {
66+
predictedNextInstrAddr = BPU.predictBranch(*instrAddr);
67+
} else {
68+
predictedNextInstrAddr = std::nullopt;
4069
}
4170
instrQueue.emplace_back(DelayedInstr { delayCyclesLeft, IR });
71+
previousInstrAddr = instrAddr;
4272
// if the instruction is not delayed, execute it immediately (it will
4373
// have a delayCyclesLeft of 0 and be at the top of the queue)
4474
return forwardDueInstrs();

CustomStages/MCADFetchDelayStage.h

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,12 @@
88
#include "llvm/MC/MCInstrInfo.h"
99
#include "llvm/MCA/SourceMgr.h"
1010
#include "llvm/MCA/Stages/Stage.h"
11+
#include "CustomHWUnits/AbstractBranchPredictorUnit.h"
12+
#include "MetadataRegistry.h"
1113

1214
#include <vector>
1315
#include <queue>
16+
#include <optional>
1417

1518
namespace llvm {
1619
namespace mcad {
@@ -25,23 +28,30 @@ class MCADFetchDelayStage : public llvm::mca::Stage {
2528
const llvm::MCInstrInfo &MCII;
2629
std::deque<DelayedInstr> instrQueue = {};
2730

31+
AbstractBranchPredictorUnit &BPU;
32+
MetadataRegistry &MD;
33+
34+
// Whenever a branch instruction is executed, we run the branch predictor
35+
// and store the predicted instruction address here.
36+
// At the next instruction, we compare the predicted address to the actual
37+
// address and add a penalty if there is a mismatch.
38+
// Non-branch instructions set this member to nullopt.
39+
std::optional<MDInstrAddr> predictedNextInstrAddr = std::nullopt;
40+
41+
// Stores the address of the last executed instruction.
42+
std::optional<MDInstrAddr> previousInstrAddr = std::nullopt;
43+
2844
public:
29-
MCADFetchDelayStage(const llvm::MCInstrInfo &MCII) : MCII(MCII) {}
45+
MCADFetchDelayStage(const llvm::MCInstrInfo &MCII, MetadataRegistry &MD, AbstractBranchPredictorUnit &BPU) : MCII(MCII), MD(MD), BPU(BPU) {}
3046

3147
bool hasWorkToComplete() const override;
3248
bool isAvailable(const llvm::mca::InstRef &IR) const override;
3349
llvm::Error execute(llvm::mca::InstRef &IR) override;
3450

35-
//llvm::Error cycleStart() override;
3651
llvm::Error cycleStart() override;
3752

3853
llvm::Error forwardDueInstrs();
3954

40-
///// Called after the pipeline is resumed from pausing state.
41-
//virtual Error cycleResume() { return ErrorSuccess(); }
42-
43-
///// Called once at the end of each cycle.
44-
4555
};
4656

4757
}

MCAWorker.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include <unistd.h>
3838

3939
#include "CustomHWUnits/MCADLSUnit.h"
40+
#include "CustomHWUnits/NaiveBranchPredictorUnit.h"
4041
#include "CustomStages/MCADFetchDelayStage.h"
4142
#include "MCAViews/SummaryView.h"
4243
#include "MCAViews/TimelineView.h"
@@ -181,10 +182,11 @@ std::unique_ptr<mca::Pipeline> MCAWorker::createDefaultPipeline() {
181182
MCAPO.StoreQueueSize,
182183
MCAPO.AssumeNoAlias, &MDRegistry);
183184
auto HWS = std::make_unique<Scheduler>(SM, *LSU);
185+
auto BPU = std::make_unique<NaiveBranchPredictorUnit>(20);
184186

185187
// Create the pipeline stages.
186188
auto Fetch = std::make_unique<EntryStage>(SrcMgr);
187-
auto FetchDelay = std::make_unique<MCADFetchDelayStage>(MCII);
189+
auto FetchDelay = std::make_unique<MCADFetchDelayStage>(MCII, MDRegistry, *BPU);
188190
auto Dispatch = std::make_unique<DispatchStage>(STI, MRI, MCAPO.DispatchWidth,
189191
*RCU, *PRF);
190192
auto Execute =
@@ -196,6 +198,7 @@ std::unique_ptr<mca::Pipeline> MCAWorker::createDefaultPipeline() {
196198
TheMCA.addHardwareUnit(std::move(PRF));
197199
TheMCA.addHardwareUnit(std::move(LSU));
198200
TheMCA.addHardwareUnit(std::move(HWS));
201+
TheMCA.addHardwareUnit(std::move(BPU));
199202

200203
// Build the pipeline.
201204
auto StagePipeline = std::make_unique<Pipeline>();
@@ -224,16 +227,18 @@ std::unique_ptr<mca::Pipeline> MCAWorker::createInOrderPipeline() {
224227
auto LSU = std::make_unique<MCADLSUnit>(SM, MCAPO.LoadQueueSize,
225228
MCAPO.StoreQueueSize,
226229
MCAPO.AssumeNoAlias, &MDRegistry);
230+
auto BPU = std::make_unique<NaiveBranchPredictorUnit>(20);
227231

228232
// Create the pipeline stages.
229233
auto Entry = std::make_unique<EntryStage>(SrcMgr);
230-
auto FetchDelay = std::make_unique<MCADFetchDelayStage>(MCII);
234+
auto FetchDelay = std::make_unique<MCADFetchDelayStage>(MCII, MDRegistry, *BPU);
231235
auto InOrderIssue = std::make_unique<InOrderIssueStage>(STI, *PRF, *CB, *LSU);
232236
auto StagePipeline = std::make_unique<Pipeline>();
233237

234238
// Pass the ownership of all the hardware units to this Context.
235239
TheMCA.addHardwareUnit(std::move(PRF));
236240
TheMCA.addHardwareUnit(std::move(LSU));
241+
TheMCA.addHardwareUnit(std::move(BPU));
237242

238243
// Build the pipeline.
239244
StagePipeline->appendStage(std::move(Entry));

MetadataCategories.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#include "llvm/MCA/Instruction.h"
2+
#include <optional>
3+
#include "MetadataRegistry.h"
4+
#include "MetadataCategories.h"
5+
6+
namespace llvm {
7+
namespace mcad {
8+
9+
std::optional<MDInstrAddr> getMDInstrAddrForInstr(MetadataRegistry &MD, const llvm::mca::InstRef &IR) {
10+
const llvm::mca::Instruction *I = IR.getInstruction();
11+
auto instrId = I->getIdentifier();
12+
if (instrId.has_value()) {
13+
auto &Registry = MD[llvm::mcad::MD_InstrAddr];
14+
auto instrAddr = Registry.get<MDInstrAddr>(*instrId);
15+
return instrAddr;
16+
}
17+
return std::nullopt;
18+
}
19+
20+
}
21+
}

MetadataCategories.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,21 @@ MD_BinaryRegionMarkers
2222

2323
};
2424

25+
struct MDInstrAddr {
26+
unsigned long long addr;
27+
const bool operator<(const MDInstrAddr &b) const {
28+
return addr < b.addr;
29+
}
30+
const bool operator==(const MDInstrAddr &b) const {
31+
return addr == b.addr;
32+
}
33+
const bool operator!=(const MDInstrAddr &b) const {
34+
return addr != b.addr;
35+
}
36+
};
37+
38+
std::optional<MDInstrAddr> getMDInstrAddrForInstr(MetadataRegistry &MD, const llvm::mca::InstRef &IR);
39+
2540
} // end namespace mcad
2641
} // end namespace llvm
2742
#endif

plugins/vivisect-broker/Broker.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ using namespace mcad;
4040
// Needed so the TypeID of the shared library and main executable refer to the
4141
// same type.
4242
extern template class Any::TypeId<MDMemoryAccess>;
43+
extern template class Any::TypeId<MDInstrAddr>;
4344

4445
class EmulatorService final : public Emulator::Service {
4546
grpc::Status RecordEmulatorActions(grpc::ServerContext *ctxt,
@@ -143,7 +144,7 @@ class VivisectBroker : public Broker {
143144
IndexMap[i] = TotalNumTraces;
144145

145146
auto &InstrAddrCat = Registry[MD_InstrAddr];
146-
InstrAddrCat[TotalNumTraces] = insn.addr();
147+
InstrAddrCat[TotalNumTraces] = MDInstrAddr { insn.addr() };
147148

148149
if (insn.has_memory_access()) {
149150
auto MemAccess = insn.memory_access();

0 commit comments

Comments
 (0)