From 4550c9664a96d2cae8705d85992ea21b6a2424ce Mon Sep 17 00:00:00 2001 From: Phillip Assmann Date: Thu, 10 Apr 2025 11:15:27 +0000 Subject: [PATCH 1/3] cpu: fix bac memory leak (by @dhschall) --- src/mem/cache/prefetch/fdp.cc | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/mem/cache/prefetch/fdp.cc b/src/mem/cache/prefetch/fdp.cc index 6eff00a450d..95176a65457 100644 --- a/src/mem/cache/prefetch/fdp.cc +++ b/src/mem/cache/prefetch/fdp.cc @@ -171,23 +171,25 @@ FetchDirectedPrefetcher::translationComplete(PrefetchRequest *pfr, bool failed) assert(cache != nullptr); if (failed) { - DPRINTF(HWPrefetch, "Translation of %#x failed\n", pfr->addr); + DPRINTF(HWPrefetch, "Translation of %#x failed\n", it->addr); stats.translationFail++; } else { - DPRINTF(HWPrefetch, "Translation of %#x succeeded\n", pfr->addr); + DPRINTF(HWPrefetch, "Translation of %#x succeeded\n", it->addr); stats.translationSuccess++; - it->createPkt(curTick() + latency); - stats.pfPacketsCreated++; - - if (cacheSnoop && (cache->inCache(pfr->pkt->getAddr(), pfr->pkt->isSecure()) - || (cache->inMissQueue(pfr->pkt->getAddr(), pfr->pkt->isSecure())))) { + if (cacheSnoop && (cache->inCache( + it->req->getPaddr(), it->req->isSecure() + ) || (cache->inMissQueue( + it->req->getPaddr(), it->req->isSecure() + )))) { stats.pfInCache++; DPRINTF(HWPrefetch, "Drop Packet. In Cache / MSHR\n"); } else { - + it->createPkt(curTick() + latency); + stats.pfPacketsCreated++; + DPRINTF(HWPrefetch, "Addr: %#x Add packet to PFQ. pkt PA:%#x, " - "PFQ sz:%i\n", pfr->addr, pfr->pkt->getAddr(), pfq.size()); - + "PFQ sz:%i\n", it->addr, it->pkt->getAddr(), pfq.size()); + stats.pfCandidatesAdded++; pfq.push_back(*it); } From 39a8609f14749e129a8549462ad6f99433441174 Mon Sep 17 00:00:00 2001 From: Phillip Assmann Date: Thu, 10 Apr 2025 11:24:42 +0000 Subject: [PATCH 2/3] cpu: add bac branch predictor delay --- src/cpu/o3/BaseO3CPU.py | 1 + src/cpu/o3/bac.cc | 15 +++++++++++++++ src/cpu/o3/bac.hh | 6 ++++++ 3 files changed, 22 insertions(+) diff --git a/src/cpu/o3/BaseO3CPU.py b/src/cpu/o3/BaseO3CPU.py index a6906c16437..e1cc8358854 100644 --- a/src/cpu/o3/BaseO3CPU.py +++ b/src/cpu/o3/BaseO3CPU.py @@ -106,6 +106,7 @@ def support_take_over(cls): # Forward pipeline delays bacToFetchDelay = Param.Cycles(1, "Branch address calc. to fetch delay") + bacBranchPredictDelay = Param.Cycles(0, "BAC Branch Predictor delay") fetchToDecodeDelay = Param.Cycles(1, "Fetch to decode delay") decodeWidth = Param.Unsigned(8, "Decode width") diff --git a/src/cpu/o3/bac.cc b/src/cpu/o3/bac.cc index cbb11762693..b7dce266ae1 100644 --- a/src/cpu/o3/bac.cc +++ b/src/cpu/o3/bac.cc @@ -72,6 +72,7 @@ BAC::BAC(CPU *_cpu, const BaseO3CPUParams ¶ms) decodeToFetchDelay(params.decodeToFetchDelay), commitToFetchDelay(params.commitToFetchDelay), bacToFetchDelay(params.bacToFetchDelay), + bacBranchPredictDelay(params.bacBranchPredictDelay), fetchTargetWidth(params.fetchTargetWidth), minInstSize(params.minInstSize), numThreads(params.numThreads), @@ -83,6 +84,7 @@ BAC::BAC(CPU *_cpu, const BaseO3CPUParams ¶ms) for (int i = 0; i < MaxThreads; i++) { bacPC[i].reset(params.isa[0]->newPCState()); stalls[i] = {false, false, false}; + branchPredictRemaining[i] = Cycles(0); } assert(bpu!=nullptr); @@ -401,12 +403,24 @@ BAC::checkSignalsAndUpdate(ThreadID tid) return true; } + if (branchPredictRemaining[tid] > Cycles(0)) { + --branchPredictRemaining[tid]; + DPRINTF(BAC, + "[global] Stalling for Branch Predictor for %i more cycles.\n", + branchPredictRemaining + ); + stalls[tid].bpu = true; + } else { + stalls[tid].bpu = false; + } + if (checkStall(tid)) { // return block(tid); bacStatus[tid] = Blocked; return false; } + // If at this point the FTQ is still invalid we need to wait for // A resteer/squash signal. if (!ftq->isValid(tid) && bacStatus[tid] != Idle) { @@ -679,6 +693,7 @@ BAC::generateFetchTargets(ThreadID tid, bool &status_change) // Now make the actual prediction. Note the BPU will advance // the PC to the next instruction. predict_taken = predict(tid, staticInst, curFT, *next_pc); + branchPredictRemaining[tid] = Cycles(bacBranchPredictDelay); DPRINTF(BAC, "[tid:%i, ftn:%llu] Branch found at PC %#x " "taken?:%i, target:%#x\n", diff --git a/src/cpu/o3/bac.hh b/src/cpu/o3/bac.hh index 8d6e59072b5..7e1f899bdf9 100644 --- a/src/cpu/o3/bac.hh +++ b/src/cpu/o3/bac.hh @@ -373,6 +373,9 @@ class BAC */ bool wroteToTimeBuffer; + /** Tracks remaining cycles that the branch predictor stalls BAC */ + Cycles branchPredictRemaining[MaxThreads]; + /** Source of possible stalls. */ struct Stalls { @@ -399,6 +402,9 @@ class BAC /** BAC to fetch delay. */ const Cycles bacToFetchDelay; + /** BAC branch predict delay. */ + const Cycles bacBranchPredictDelay; + /** The maximum width of a fetch target. This also determines the * maximum addresses searched in one cycle. (FT width / minInstSize) */ const unsigned fetchTargetWidth; From b2c1442fd7e648ad32a092855b26ee43041445da Mon Sep 17 00:00:00 2001 From: Phillip Assmann Date: Thu, 10 Apr 2025 15:23:50 +0000 Subject: [PATCH 3/3] cpu: move cond pred out of bpred unit --- src/cpu/minor/BaseMinorCPU.py | 5 +- src/cpu/o3/BaseO3CPU.py | 5 +- src/cpu/pred/2bit_local.cc | 8 +- src/cpu/pred/2bit_local.hh | 7 +- src/cpu/pred/BranchPredictor.py | 28 +++- src/cpu/pred/SConscript | 2 + src/cpu/pred/bi_mode.cc | 2 +- src/cpu/pred/bi_mode.hh | 4 +- src/cpu/pred/bpred_unit.cc | 24 ++-- src/cpu/pred/bpred_unit.hh | 97 +++---------- src/cpu/pred/conditional.cc | 52 +++++++ src/cpu/pred/conditional.hh | 148 ++++++++++++++++++++ src/cpu/pred/multiperspective_perceptron.cc | 2 +- src/cpu/pred/multiperspective_perceptron.hh | 4 +- src/cpu/pred/tage.cc | 4 +- src/cpu/pred/tage.hh | 4 +- src/cpu/pred/tagescl_ref.cc | 2 +- src/cpu/pred/tagescl_ref.hh | 4 +- src/cpu/pred/tournament.cc | 2 +- src/cpu/pred/tournament.hh | 4 +- 20 files changed, 295 insertions(+), 113 deletions(-) create mode 100644 src/cpu/pred/conditional.cc create mode 100644 src/cpu/pred/conditional.hh diff --git a/src/cpu/minor/BaseMinorCPU.py b/src/cpu/minor/BaseMinorCPU.py index 545dfeaee56..5484a11a882 100644 --- a/src/cpu/minor/BaseMinorCPU.py +++ b/src/cpu/minor/BaseMinorCPU.py @@ -426,7 +426,10 @@ def support_take_over(cls): ) branchPred = Param.BranchPredictor( - TournamentBP(numThreads=Parent.numThreads), "Branch Predictor" + BranchPredictor( + conditionalBranchPred=TournamentBP(numThreads=Parent.numThreads) + ), + "Branch Predictor", ) def addCheckerCpu(self): diff --git a/src/cpu/o3/BaseO3CPU.py b/src/cpu/o3/BaseO3CPU.py index e1cc8358854..d97c1f2c7dd 100644 --- a/src/cpu/o3/BaseO3CPU.py +++ b/src/cpu/o3/BaseO3CPU.py @@ -210,7 +210,10 @@ def support_take_over(cls): smtCommitPolicy = Param.CommitPolicy("RoundRobin", "SMT Commit Policy") branchPred = Param.BranchPredictor( - TournamentBP(numThreads=Parent.numThreads), "Branch Predictor" + BranchPredictor( + conditionalBranchPred=TournamentBP(numThreads=Parent.numThreads) + ), + "Branch Predictor", ) needsTSO = Param.Bool(False, "Enable TSO Memory model") diff --git a/src/cpu/pred/2bit_local.cc b/src/cpu/pred/2bit_local.cc index 7c27355b20f..34b495bc218 100644 --- a/src/cpu/pred/2bit_local.cc +++ b/src/cpu/pred/2bit_local.cc @@ -52,7 +52,7 @@ namespace branch_prediction { LocalBP::LocalBP(const LocalBPParams ¶ms) - : BPredUnit(params), + : ConditionalPredictor(params), localPredictorSize(params.localPredictorSize), localCtrBits(params.localCtrBits), localPredictorSets(localPredictorSize / localCtrBits), @@ -78,6 +78,12 @@ LocalBP::LocalBP(const LocalBPParams ¶ms) instShiftAmt); } +void LocalBP::branchPlaceholder(ThreadID tid, Addr pc, + bool uncond, void * &bpHistory) +{ +// Placeholder for a function that only returns history items +} + void LocalBP::updateHistories(ThreadID tid, Addr pc, bool uncond, bool taken, Addr target, const StaticInstPtr &inst, diff --git a/src/cpu/pred/2bit_local.hh b/src/cpu/pred/2bit_local.hh index 8d77289f966..bb539d37a9c 100644 --- a/src/cpu/pred/2bit_local.hh +++ b/src/cpu/pred/2bit_local.hh @@ -46,7 +46,7 @@ #include "base/sat_counter.hh" #include "base/types.hh" -#include "cpu/pred/bpred_unit.hh" +#include "cpu/pred/conditional.hh" #include "params/LocalBP.hh" namespace gem5 @@ -62,7 +62,7 @@ namespace branch_prediction * predictor state that needs to be recorded or updated; the update can be * determined solely by the branch being taken or not taken. */ -class LocalBP : public BPredUnit +class LocalBP : public ConditionalPredictor { public: /** @@ -73,6 +73,9 @@ class LocalBP : public BPredUnit // Overriding interface functions bool lookup(ThreadID tid, Addr pc, void * &bp_history) override; + void branchPlaceholder(ThreadID tid, Addr pc, bool uncond, + void * &bpHistory) override; + void updateHistories(ThreadID tid, Addr pc, bool uncond, bool taken, Addr target, const StaticInstPtr &inst, void * &bp_history) override; diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py index 496b92a02d7..d23249899a5 100644 --- a/src/cpu/pred/BranchPredictor.py +++ b/src/cpu/pred/BranchPredictor.py @@ -142,6 +142,18 @@ class SimpleBTB(BranchTargetBuffer): ) +class ConditionalPredictor(SimObject): + type = "ConditionalPredictor" + cxx_class = "gem5::branch_prediction::ConditionalPredictor" + cxx_header = "cpu/pred/conditional.hh" + abstract = True + + numThreads = Param.Unsigned(Parent.numThreads, "Number of threads") + instShiftAmt = Param.Unsigned( + Parent.instShiftAmt, "Number of bits to shift instructions by" + ) + + class IndirectPredictor(SimObject): type = "IndirectPredictor" cxx_class = "gem5::branch_prediction::IndirectPredictor" @@ -179,7 +191,6 @@ class BranchPredictor(SimObject): type = "BranchPredictor" cxx_class = "gem5::branch_prediction::BPredUnit" cxx_header = "cpu/pred/bpred_unit.hh" - abstract = True numThreads = Param.Unsigned(Parent.numThreads, "Number of threads") instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by") @@ -197,6 +208,9 @@ class BranchPredictor(SimObject): ras = Param.ReturnAddrStack( ReturnAddrStack(), "Return address stack, set to NULL to disable RAS." ) + conditionalBranchPred = Param.ConditionalPredictor( + "Conditional branch predictor" + ) indirectBranchPred = Param.IndirectPredictor( SimpleIndirectPredictor(), "Indirect branch predictor, set to NULL to disable " @@ -212,7 +226,7 @@ class BranchPredictor(SimObject): ) -class LocalBP(BranchPredictor): +class LocalBP(ConditionalPredictor): type = "LocalBP" cxx_class = "gem5::branch_prediction::LocalBP" cxx_header = "cpu/pred/2bit_local.hh" @@ -221,7 +235,7 @@ class LocalBP(BranchPredictor): localCtrBits = Param.Unsigned(2, "Bits per counter") -class TournamentBP(BranchPredictor): +class TournamentBP(ConditionalPredictor): type = "TournamentBP" cxx_class = "gem5::branch_prediction::TournamentBP" cxx_header = "cpu/pred/tournament.hh" @@ -235,7 +249,7 @@ class TournamentBP(BranchPredictor): choiceCtrBits = Param.Unsigned(2, "Bits of choice counters") -class BiModeBP(BranchPredictor): +class BiModeBP(ConditionalPredictor): type = "BiModeBP" cxx_class = "gem5::branch_prediction::BiModeBP" cxx_header = "cpu/pred/bi_mode.hh" @@ -310,7 +324,7 @@ class TAGEBase(SimObject): # TAGE branch predictor as described in https://www.jilp.org/vol8/v8paper1.pdf # The default sizes below are for the 8C-TAGE configuration (63.5 Kbits) -class TAGE(BranchPredictor): +class TAGE(ConditionalPredictor): type = "TAGE" cxx_class = "gem5::branch_prediction::TAGE" cxx_header = "cpu/pred/tage.hh" @@ -776,7 +790,7 @@ class TAGE_SC_L_8KB(TAGE_SC_L): statistical_corrector = TAGE_SC_L_8KB_StatisticalCorrector() -class MultiperspectivePerceptron(BranchPredictor): +class MultiperspectivePerceptron(ConditionalPredictor): type = "MultiperspectivePerceptron" cxx_class = "gem5::branch_prediction::MultiperspectivePerceptron" cxx_header = "cpu/pred/multiperspective_perceptron.hh" @@ -1117,7 +1131,7 @@ class MultiperspectivePerceptronTAGE8KB(MultiperspectivePerceptronTAGE): statistical_corrector = MPP_StatisticalCorrector_8KB() -class TageSCLRef(BranchPredictor): +class TageSCLRef(ConditionalPredictor): type = "TageSCLRef" cxx_class = "gem5::branch_prediction::TageSCLRef" cxx_header = "cpu/pred/tagescl_ref.hh" diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript index f52299fecc0..e791f318d26 100644 --- a/src/cpu/pred/SConscript +++ b/src/cpu/pred/SConscript @@ -44,6 +44,7 @@ Import('*') SimObject('BranchPredictor.py', sim_objects=[ 'BranchPredictor', + 'ConditionalPredictor', 'IndirectPredictor', 'SimpleIndirectPredictor', 'BranchTargetBuffer', 'SimpleBTB', 'BTBIndexingPolicy', 'BTBSetAssociative', 'ReturnAddrStack', @@ -68,6 +69,7 @@ Source('bpred_unit.cc') Source('2bit_local.cc') Source('simple_indirect.cc') Source('it_tage.cc') +Source('conditional.cc') Source('indirect.cc') Source('ras.cc') Source('tournament.cc') diff --git a/src/cpu/pred/bi_mode.cc b/src/cpu/pred/bi_mode.cc index f9f9330b883..ed39731ab28 100644 --- a/src/cpu/pred/bi_mode.cc +++ b/src/cpu/pred/bi_mode.cc @@ -54,7 +54,7 @@ namespace branch_prediction { BiModeBP::BiModeBP(const BiModeBPParams ¶ms) - : BPredUnit(params), + : ConditionalPredictor(params), globalHistoryReg(params.numThreads, 0), globalHistoryBits(ceilLog2(params.globalPredictorSize)), choicePredictorSize(params.choicePredictorSize), diff --git a/src/cpu/pred/bi_mode.hh b/src/cpu/pred/bi_mode.hh index c0513826730..2131677ae90 100644 --- a/src/cpu/pred/bi_mode.hh +++ b/src/cpu/pred/bi_mode.hh @@ -46,7 +46,7 @@ #define __CPU_PRED_BI_MODE_PRED_HH__ #include "base/sat_counter.hh" -#include "cpu/pred/bpred_unit.hh" +#include "cpu/pred/conditional.hh" #include "params/BiModeBP.hh" namespace gem5 @@ -69,7 +69,7 @@ namespace branch_prediction * the branch's PC to choose between the two, destructive aliasing is reduced. */ -class BiModeBP : public BPredUnit +class BiModeBP : public ConditionalPredictor { public: BiModeBP(const BiModeBPParams ¶ms); diff --git a/src/cpu/pred/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc index 2e0daec6760..95aa71026fc 100644 --- a/src/cpu/pred/bpred_unit.cc +++ b/src/cpu/pred/bpred_unit.cc @@ -63,6 +63,7 @@ BPredUnit::BPredUnit(const Params ¶ms) predHist(numThreads), btb(params.btb), ras(params.ras), + cPred(params.conditionalBranchPred), iPred(params.indirectBranchPred), stats(this) { @@ -94,12 +95,6 @@ BPredUnit::drainSanityCheck() const assert(ph.empty()); } -void -BPredUnit::branchPlaceholder(ThreadID tid, Addr pc, - bool uncond, void * &bp_history) -{ - panic("BPredUnit::branchPlaceholder() not implemented for this BP.\n"); -} bool BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, @@ -156,7 +151,7 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, } else { // Conditional branches ------- ++stats.condPredicted; - hist->condPred = lookup(tid, pc.instAddr(), hist->bpHistory); + hist->condPred = cPred->lookup(tid, pc.instAddr(), hist->bpHistory); if (hist->condPred) { ++stats.condPredictedTaken; @@ -326,7 +321,7 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, * The actual prediction tables will updated once * we know the correct direction. **/ - updateHistories(tid, hist->pc, hist->uncond, hist->predTaken, + cPred->updateHistories(tid, hist->pc, hist->uncond, hist->predTaken, hist->target->instAddr(), hist->inst, hist->bpHistory); @@ -383,7 +378,7 @@ BPredUnit::commitBranch(ThreadID tid, PredictorHistory* &hist) hist->target->instAddr()); // Update the branch predictor with the correct results. - update(tid, hist->pc, + cPred->update(tid, hist->pc, hist->actuallyTaken, hist->bpHistory, false, hist->inst, @@ -469,7 +464,7 @@ BPredUnit::squashHistory(ThreadID tid, PredictorHistory* &history) } // This call will delete the bpHistory. - squash(tid, history->bpHistory); + cPred->squash(tid, history->bpHistory); delete history; history = nullptr; @@ -548,7 +543,7 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, set(hist->target, corr_target); // Correct Direction predictor ------------------ - update(tid, hist->pc, actually_taken, hist->bpHistory, + cPred->update(tid, hist->pc, actually_taken, hist->bpHistory, true, hist->inst, corr_target.instAddr()); @@ -633,6 +628,13 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, } } +void +BPredUnit::branchPlaceholder(ThreadID tid, Addr pc, + bool uncond, void * &bp_history) +{ + // Delegate to conditional predictor + cPred->branchPlaceholder(tid, pc, uncond, bp_history); +} void BPredUnit::dump() diff --git a/src/cpu/pred/bpred_unit.hh b/src/cpu/pred/bpred_unit.hh index c94e5231818..f3f4ceded38 100644 --- a/src/cpu/pred/bpred_unit.hh +++ b/src/cpu/pred/bpred_unit.hh @@ -49,6 +49,7 @@ #include "cpu/inst_seq.hh" #include "cpu/pred/branch_type.hh" #include "cpu/pred/btb.hh" +#include "cpu/pred/conditional.hh" #include "cpu/pred/indirect.hh" #include "cpu/pred/ras.hh" #include "cpu/static_inst.hh" @@ -141,81 +142,6 @@ class BPredUnit : public SimObject * Interface functions to the conditional branch predictor * */ - - /** - * Looks up a given conditional branch PC of in the BP to see if it - * is taken or not taken. - * @param tid The thread id. - * @param pc The PC to look up. - * @param bp_history Pointer that will be set to an object that - * has the branch predictor state associated with the lookup. - * @return Whether the branch is taken or not taken. - */ - virtual bool lookup(ThreadID tid, Addr pc, void * &bp_history) = 0; - - /** - * Ones done with the prediction this function updates the - * path and global history. All branches call this function - * including unconditional once. - * @param tid The thread id. - * @param pc The branch's pc that will be updated. - * @param uncond Wheather or not this branch is an unconditional branch. - * @param taken Whether or not the branch was taken - * @param target The final target of branch. Some modern - * predictors use the target in their history. - * @param inst Static instruction information - * @param bp_history Pointer that will be set to an object that - * has the branch predictor state associated with the lookup. - * - */ - virtual void updateHistories(ThreadID tid, Addr pc, bool uncond, - bool taken, Addr target, - const StaticInstPtr &inst, void * &bp_history) = 0; - - /** - * @param tid The thread id. - * @param bp_history Pointer to the history object. The predictor - * will need to update any state and delete the object. - */ - virtual void squash(ThreadID tid, void * &bp_history) = 0; - - - /** - * Updates the BP with taken/not taken information. - * @param tid The thread id. - * @param pc The branch's PC that will be updated. - * @param taken Whether the branch was taken or not taken. - * @param bp_history Pointer to the branch predictor state that is - * associated with the branch lookup that is being updated. - * @param squashed Set to true when this function is called during a - * squash operation. - * @param inst Static instruction information - * @param target The resolved target of the branch (only needed - * for squashed branches) - * @todo Make this update flexible enough to handle a global predictor. - */ - virtual void update(ThreadID tid, Addr pc, bool taken, - void * &bp_history, bool squashed, - const StaticInstPtr &inst, Addr target) = 0; - - /** - * Special function for the decoupled front-end. In it there can be - * branches which are not detected by the BPU in the first place as it - * requires a BTB hit. This function will generate a placeholder for - * such a branch once it is pre-decoded in the fetch stage. It will - * only create the branch history object but not update any internal state - * of the BPU. - * If the branch turns to be wrong then decode or commit will - * be able to use the normal squash functionality to correct the branch. - * Note that not all branch predictors implement this functionality. - * @param tid The thread id. - * @param pc The branch's PC. - * @param uncond Whether or not this branch is an unconditional branch. - * @param bp_history Pointer that will be set to an branch history object. - */ - virtual void branchPlaceholder(ThreadID tid, Addr pc, - bool uncond, void * &bp_history); - /** * Looks up a given PC in the BTB to see if a matching entry exists. * @param tid The thread id. @@ -270,6 +196,24 @@ class BPredUnit : public SimObject return btb->update(tid, pc, target); } + /** + * Special function for the decoupled front-end. In it there can be + * branches which are not detected by the BPU in the first place as it + * requires a BTB hit. This function will generate a placeholder for + * such a branch once it is pre-decoded in the fetch stage. It will + * only create the branch history object but not update any internal state + * of the BPU. + * If the branch turns to be wrong then decode or commit will + * be able to use the normal squash functionality to correct the branch. + * Note that not all branch predictors implement this functionality. + * @param tid The thread id. + * @param pc The branch's PC. + * @param uncond Whether or not this branch is an unconditional branch. + * @param bp_history Pointer that will be set to an branch history object. + */ + void branchPlaceholder(ThreadID tid, Addr pc, + bool uncond, void * &bp_history); + void dump(); @@ -492,6 +436,9 @@ class BPredUnit : public SimObject /** The return address stack. */ ReturnAddrStack * ras; + /** The conditional branch predictor. */ + ConditionalPredictor * cPred; + /** The indirect target predictor. */ IndirectPredictor * iPred; diff --git a/src/cpu/pred/conditional.cc b/src/cpu/pred/conditional.cc new file mode 100644 index 00000000000..a59ecd99120 --- /dev/null +++ b/src/cpu/pred/conditional.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2025 Technical University of Munich + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/pred/conditional.hh" + +namespace gem5 +{ + +namespace branch_prediction +{ + +ConditionalPredictor::ConditionalPredictor(const Params ¶ms) + : SimObject(params), + instShiftAmt(params.instShiftAmt) +{ +} + + +void +ConditionalPredictor::branchPlaceholder(ThreadID tid, Addr pc, + bool uncond, void * &bp_history) +{ + panic("BPredUnit::branchPlaceholder() not implemented for this BP.\n"); +} + +} // namespace branch_prediction +} // namespace gem5 diff --git a/src/cpu/pred/conditional.hh b/src/cpu/pred/conditional.hh new file mode 100644 index 00000000000..46dbf2b3a34 --- /dev/null +++ b/src/cpu/pred/conditional.hh @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2025 Technical University of Munich + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* @file + * Conditional branch predictor interface + */ + +#ifndef __CPU_PRED_CONDITIONAL_BASE_HH__ +#define __CPU_PRED_CONDITIONAL_BASE_HH__ + +#include "arch/generic/pcstate.hh" +#include "cpu/inst_seq.hh" +#include "cpu/pred/branch_type.hh" +#include "params/ConditionalPredictor.hh" +#include "sim/sim_object.hh" + +namespace gem5 +{ + +namespace branch_prediction +{ + +class ConditionalPredictor : public SimObject +{ + public: + + typedef ConditionalPredictorParams Params; + + ConditionalPredictor(const Params ¶ms); + + + /** + * Looks up a given conditional branch PC of in the BP to see if it + * is taken or not taken. + * @param tid The thread id. + * @param pc The PC to look up. + * @param bp_history Pointer that will be set to an object that + * has the branch predictor state associated with the lookup. + * @return Whether the branch is taken or not taken. + */ + virtual bool lookup(ThreadID tid, Addr pc, void * &bp_history) = 0; + + /** + * Ones done with the prediction this function updates the + * path and global history. All branches call this function + * including unconditional once. + * @param tid The thread id. + * @param pc The branch's pc that will be updated. + * @param uncond Wheather or not this branch is an unconditional branch. + * @param taken Whether or not the branch was taken + * @param target The final target of branch. Some modern + * predictors use the target in their history. + * @param inst Static instruction information + * @param bp_history Pointer that will be set to an object that + * has the branch predictor state associated with the lookup. + * + */ + virtual void updateHistories(ThreadID tid, Addr pc, bool uncond, + bool taken, Addr target, + const StaticInstPtr &inst, void * &bp_history) = 0; + + /** + * @param tid The thread id. + * @param bp_history Pointer to the history object. The predictor + * will need to update any state and delete the object. + */ + virtual void squash(ThreadID tid, void * &bp_history) = 0; + + + /** + * Updates the BP with taken/not taken information. + * @param tid The thread id. + * @param pc The branch's PC that will be updated. + * @param taken Whether the branch was taken or not taken. + * @param bp_history Pointer to the branch predictor state that is + * associated with the branch lookup that is being updated. + * @param squashed Set to true when this function is called during a + * squash operation. + * @param inst Static instruction information + * @param target The resolved target of the branch (only needed + * for squashed branches) + * @todo Make this update flexible enough to handle a global predictor. + */ + virtual void update(ThreadID tid, Addr pc, bool taken, + void * &bp_history, bool squashed, + const StaticInstPtr &inst, Addr target) = 0; + + /** + * Special function for the decoupled front-end. In it there can be + * branches which are not detected by the BPU in the first place as it + * requires a BTB hit. This function will generate a placeholder for + * such a branch once it is pre-decoded in the fetch stage. It will + * only create the branch history object but not update any internal state + * of the BPU. + * If the branch turns to be wrong then decode or commit will + * be able to use the normal squash functionality to correct the branch. + * Note that not all branch predictors implement this functionality. + * @param tid The thread id. + * @param pc The branch's PC. + * @param uncond Whether or not this branch is an unconditional branch. + * @param bp_history Pointer that will be set to an branch history object. + */ + virtual void branchPlaceholder(ThreadID tid, Addr pc, + bool uncond, void * &bp_history); + protected: + + /** Number of bits to shift instructions by for predictor addresses. */ + const unsigned instShiftAmt; +}; + +} // namespace branch_prediction +} // namespace gem5 + +#endif // __CPU_PRED_CONDITIONAL_BASE_HH__ diff --git a/src/cpu/pred/multiperspective_perceptron.cc b/src/cpu/pred/multiperspective_perceptron.cc index c8284e49a1d..3f1bdec504b 100644 --- a/src/cpu/pred/multiperspective_perceptron.cc +++ b/src/cpu/pred/multiperspective_perceptron.cc @@ -128,7 +128,7 @@ MultiperspectivePerceptron::ThreadData::ThreadData(int num_filters, } MultiperspectivePerceptron::MultiperspectivePerceptron( - const MultiperspectivePerceptronParams &p) : BPredUnit(p), + const MultiperspectivePerceptronParams &p) : ConditionalPredictor(p), blockSize(p.block_size), pcshift(p.pcshift), threshold(p.threshold), bias0(p.bias0), bias1(p.bias1), biasmostly0(p.biasmostly0), biasmostly1(p.biasmostly1), nbest(p.nbest), tunebits(p.tunebits), diff --git a/src/cpu/pred/multiperspective_perceptron.hh b/src/cpu/pred/multiperspective_perceptron.hh index f1055d5fae3..af00715fdf1 100644 --- a/src/cpu/pred/multiperspective_perceptron.hh +++ b/src/cpu/pred/multiperspective_perceptron.hh @@ -55,7 +55,7 @@ #include #include "base/random.hh" -#include "cpu/pred/bpred_unit.hh" +#include "cpu/pred/conditional.hh" #include "params/MultiperspectivePerceptron.hh" namespace gem5 @@ -64,7 +64,7 @@ namespace gem5 namespace branch_prediction { -class MultiperspectivePerceptron : public BPredUnit +class MultiperspectivePerceptron : public ConditionalPredictor { protected: /** diff --git a/src/cpu/pred/tage.cc b/src/cpu/pred/tage.cc index dd6ef5ddbe0..831d1834aaa 100644 --- a/src/cpu/pred/tage.cc +++ b/src/cpu/pred/tage.cc @@ -62,7 +62,9 @@ namespace gem5 namespace branch_prediction { -TAGE::TAGE(const TAGEParams ¶ms) : BPredUnit(params), tage(params.tage) +TAGE::TAGE(const TAGEParams ¶ms) : + ConditionalPredictor(params), + tage(params.tage) { } diff --git a/src/cpu/pred/tage.hh b/src/cpu/pred/tage.hh index 329ba922ad4..482b546c391 100644 --- a/src/cpu/pred/tage.hh +++ b/src/cpu/pred/tage.hh @@ -64,7 +64,7 @@ #include "base/random.hh" #include "base/types.hh" -#include "cpu/pred/bpred_unit.hh" +#include "cpu/pred/conditional.hh" #include "cpu/pred/tage_base.hh" #include "params/TAGE.hh" @@ -74,7 +74,7 @@ namespace gem5 namespace branch_prediction { -class TAGE: public BPredUnit +class TAGE: public ConditionalPredictor { protected: TAGEBase *tage; diff --git a/src/cpu/pred/tagescl_ref.cc b/src/cpu/pred/tagescl_ref.cc index 032bded5285..8a13365b62a 100644 --- a/src/cpu/pred/tagescl_ref.cc +++ b/src/cpu/pred/tagescl_ref.cc @@ -54,7 +54,7 @@ namespace branch_prediction { TageSCLRef::TageSCLRef(const TageSCLRefParams ¶ms) - : BPredUnit(params) + : ConditionalPredictor(params) { predictor = new PREDICTOR(); } diff --git a/src/cpu/pred/tagescl_ref.hh b/src/cpu/pred/tagescl_ref.hh index 7ae2db85c0d..f1cb86e94b3 100644 --- a/src/cpu/pred/tagescl_ref.hh +++ b/src/cpu/pred/tagescl_ref.hh @@ -46,7 +46,7 @@ #include "base/sat_counter.hh" #include "base/types.hh" -#include "cpu/pred/bpred_unit.hh" +#include "cpu/pred/conditional.hh" #include "params/TageSCLRef.hh" namespace gem5 @@ -63,7 +63,7 @@ namespace branch_prediction * predictor state that needs to be recorded or updated; the update can be * determined solely by the branch being taken or not taken. */ -class TageSCLRef : public BPredUnit +class TageSCLRef : public ConditionalPredictor { public: /** diff --git a/src/cpu/pred/tournament.cc b/src/cpu/pred/tournament.cc index a6428575f28..bbedaf15504 100644 --- a/src/cpu/pred/tournament.cc +++ b/src/cpu/pred/tournament.cc @@ -51,7 +51,7 @@ namespace branch_prediction { TournamentBP::TournamentBP(const TournamentBPParams ¶ms) - : BPredUnit(params), + : ConditionalPredictor(params), localPredictorSize(params.localPredictorSize), localCtrBits(params.localCtrBits), localCtrs(localPredictorSize, SatCounter8(localCtrBits)), diff --git a/src/cpu/pred/tournament.hh b/src/cpu/pred/tournament.hh index 36b50c706a4..8de9faa695e 100644 --- a/src/cpu/pred/tournament.hh +++ b/src/cpu/pred/tournament.hh @@ -46,7 +46,7 @@ #include "base/sat_counter.hh" #include "base/types.hh" -#include "cpu/pred/bpred_unit.hh" +#include "cpu/pred/conditional.hh" #include "params/TournamentBP.hh" namespace gem5 @@ -63,7 +63,7 @@ namespace branch_prediction * predictor chooses between the two. Both the global history register * and the selected local history are speculatively updated. */ -class TournamentBP : public BPredUnit +class TournamentBP : public ConditionalPredictor { public: /**