Skip to content
144 changes: 95 additions & 49 deletions src/cpu/pred/btb/btb_tage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,9 @@ BTBTAGE::generateSinglePrediction(const BTBEntry &btb_entry,

// Do not use LRU; keep logic simple and align with CBP-style replacement

DPRINTF(TAGE, "hit table %d[%lu][%u]: valid %d, tag %lu, ctr %d, useful %d, btb_pc %#lx, pos %u\n",
i, index, way, entry.valid, entry.tag, entry.counter, entry.useful, btb_entry.pc, position);
DPRINTF(TAGE, "hit table %d[%lu][%u]: valid %d, tag %lu, ctr %d, useful %u, btb_pc %#lx, pos %u\n",
i, index, way, entry.valid, entry.tag, entry.counter,
static_cast<unsigned>(entry.useful), btb_entry.pc, position);
break; // only one way can be matched, aviod multi hit, TODO: RTL how to do this?
}
}
Expand Down Expand Up @@ -470,27 +471,12 @@ BTBTAGE::updatePredictorStateAndCheckAllocation(const BTBEntry &entry,

// Update useful bit based on several conditions
bool main_is_correct = main_info.taken() == actual_taken;
bool alt_is_correct_and_strong = alt_info.found &&
(alt_info.taken() == actual_taken) &&
(abs(2 * alt_info.entry.counter + 1) == 7);

// a. Special reset (humility mechanism)
if (alt_is_correct_and_strong && main_is_correct) {
way.useful = 0;
DPRINTF(TAGEUseful, "useful bit reset to 0 due to humility rule\n");
} else if (main_info.taken() != alt_taken) {
// b. Original logic to set useful bit high
if (main_is_correct) {
way.useful = 1;
}
}

// c. Reset u on counter sign flip (becomes weak)
if (way.counter == 0 || way.counter == -1) {
way.useful = 0;
DPRINTF(TAGEUseful, "useful bit reset to 0 due to weak counter\n");
// Match current RTL behavior: useful is only set when the provider
// proves itself against an alternative prediction.
if (main_info.taken() != alt_taken && main_is_correct) {
usefulCtrIncrease(way);
}
DPRINTF(TAGE, "useful bit is now %d\n", way.useful);
DPRINTF(TAGE, "useful counter is now %u\n", static_cast<unsigned>(way.useful));

// No LRU maintenance
}
Expand Down Expand Up @@ -564,10 +550,10 @@ BTBTAGE::handleNewEntryAllocation(const Addr &startPC,
uint64_t &allocated_table,
uint64_t &allocated_index,
uint64_t &allocated_way) {
// Simple set-associative allocation (no LFSR, no per-way table gating):
// - For each table from start_table upward, check the set at computed index.
// - Prefer invalid ways; else choose any way with useful==0 and weak counter.
// - If none, apply a one-step age penalty to a strong, not-useful way (no allocation).
// Match RTL allocation priority:
// 1) invalid way
// 2) weak and not-useful way
// 3) any not-useful way

// Calculate branch position within the block (like RTL's cfiPosition)
unsigned position = getBranchIndexInBlock(entry.pc, startPC);
Expand All @@ -579,48 +565,69 @@ BTBTAGE::handleNewEntryAllocation(const Addr &startPC,

auto &set = tageTable[ti][newIndex];

// Allocate into invalid way or not-useful and weak way
int invalid_way = -1;
for (unsigned way = 0; way < numWays; ++way) {
auto &cand = set[way];
const bool weakish = std::abs(cand.counter * 2 + 1) <= 3; // -3,-2,-1,0,1,2
if (!cand.valid || (!cand.useful && weakish)) {
short newCounter = actual_taken ? 0 : -1;
DPRINTF(TAGE, "allocating entry in table %d[%lu][%u], tag %lu (with pos %u), counter %d, pc %#lx\n",
ti, newIndex, way, newTag, position, newCounter, entry.pc);
cand = TageEntry(newTag, newCounter, entry.pc); // u = 0 default
tageStats.updateAllocSuccess++;
allocated_table = ti;
allocated_index = newIndex;
allocated_way = way;
usefulResetCnt = usefulResetCnt <= 0 ? 0 : usefulResetCnt - 1;
return true;
if (!set[way].valid) {
invalid_way = way;
break;
}
}

// 3) Apply age penalty to one strong, not-useful way to make it replacable later
int weak_not_useful_way = -1;
int strong_not_useful_way = -1;
for (unsigned way = 0; way < numWays; ++way) {
auto &cand = set[way];
if (!usefulCtrIsSaturateNegative(cand)) {
continue;
}

const bool weakish = std::abs(cand.counter * 2 + 1) <= 3;
if (!cand.useful && !weakish) {
if (cand.counter > 0) cand.counter--; else cand.counter++;
DPRINTF(TAGE, "age penalty applied on table %d[%lu][%u], new ctr %d\n",
ti, newIndex, way, cand.counter);
break; // one penalty per table per update
if (weakish) {
if (weak_not_useful_way == -1) {
weak_not_useful_way = way;
}
} else if (strong_not_useful_way == -1) {
strong_not_useful_way = way;
}
}

int selected_way = -1;
if (invalid_way != -1) {
tageStats.allocBucketHasInvalid++;
selected_way = invalid_way;
} else if (weak_not_useful_way != -1) {
tageStats.allocBucketHasWeakNotUseful++;
selected_way = weak_not_useful_way;
} else if (strong_not_useful_way != -1) {
tageStats.allocBucketHasStrongNotUsefulButNoWeakNotUseful++;
selected_way = strong_not_useful_way;
} else {
tageStats.allocBucketAllUsefulOrNoCandidate++;
}

if (selected_way != -1) {
short newCounter = actual_taken ? 0 : -1;
DPRINTF(TAGE, "allocating entry in table %d[%lu][%u], tag %lu (with pos %u), counter %d, pc %#lx\n",
ti, newIndex, selected_way, newTag, position, newCounter, entry.pc);
set[selected_way] = TageEntry(newTag, newCounter, entry.pc); // u = 0 default
tageStats.updateAllocSuccess++;
allocated_table = ti;
allocated_index = newIndex;
allocated_way = selected_way;
return true;
}

tageStats.updateAllocFailure++;
usefulResetCnt++;
}

usefulResetCnt++;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Gate useful-reset counter on actual table probes

Incrementing usefulResetCnt unconditionally here means mispredictions whose provider is already in the longest-history table (start_table == numPredictors) still advance the global reset counter even though no allocation set is probed. After enough such events, the code resets usefulness for all entries spuriously, which can destabilize trained entries in unrelated tables and skew long-running performance results. The counter should only advance when at least one candidate table was examined and no victim was found.

Useful? React with 👍 / 👎.

if (usefulResetCnt >= 256) {
usefulResetCnt = 0;
tageStats.updateResetU++;
DPRINTF(TAGE, "reset useful bit of all entries\n");
for (auto &table : tageTable) {
for (auto &set : table) {
for (auto &way : set) {
way.useful = false;
usefulCtrReset(way);
}
}
}
Expand Down Expand Up @@ -885,6 +892,32 @@ BTBTAGE::satDecrement(int min, short &counter)
return counter == min;
}

bool
BTBTAGE::usefulCtrIsSaturateNegative(const TageEntry &entry) const
{
return entry.useful == usefulCtrInit;
}

bool
BTBTAGE::usefulCtrIsSaturatePositive(const TageEntry &entry) const
{
return entry.useful == usefulCtrMax;
}

void
BTBTAGE::usefulCtrIncrease(TageEntry &entry)
{
if (!usefulCtrIsSaturatePositive(entry)) {
++entry.useful;
}
}

void
BTBTAGE::usefulCtrReset(TageEntry &entry)
{
entry.useful = usefulCtrInit;
}

Addr
BTBTAGE::getUseAltIdx(Addr pc) const {
Addr shiftedPc = pc >> instShiftAmt;
Expand Down Expand Up @@ -1026,6 +1059,19 @@ BTBTAGE::TageStats::TageStats(statistics::Group* parent, int numPredictors, int
ADD_STAT(updateAllocFailure, statistics::units::Count::get(), "alloc failure when update"),
ADD_STAT(updateAllocFailureNoValidTable, statistics::units::Count::get(), "alloc failure no valid table when update"),
ADD_STAT(updateAllocSuccess, statistics::units::Count::get(), "alloc success when update"),
ADD_STAT(
allocBucketHasInvalid, statistics::units::Count::get(),
"allocation table probes with an invalid victim"),
ADD_STAT(
allocBucketHasWeakNotUseful, statistics::units::Count::get(),
"allocation table probes with weak and saturate-negative useful victim"),
ADD_STAT(
allocBucketHasStrongNotUsefulButNoWeakNotUseful,
statistics::units::Count::get(),
"allocation table probes with only strong saturate-negative useful victims"),
ADD_STAT(
allocBucketAllUsefulOrNoCandidate, statistics::units::Count::get(),
"allocation table probes with no eligible victim"),
ADD_STAT(updateMispred, statistics::units::Count::get(), "mispred when update"),
ADD_STAT(updateResetU, statistics::units::Count::get(), "reset u when update"),
ADD_STAT(recomputedVsActualDiff, statistics::units::Count::get(), "fetchBlocks where recomputed.taken != actual_taken"),
Expand Down
21 changes: 18 additions & 3 deletions src/cpu/pred/btb/btb_tage.hh
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@ class BTBTAGE : public TimedBaseBTBPredictor
bool valid; // Whether this entry is valid
Addr tag; // Tag for matching
short counter; // Prediction counter (-4 to 3), 3bits, 0 and -1 are weak
bool useful; // 1-bit usefulness counter; true means useful
uint8_t useful; // 2-bit usefulness counter; 0 means saturate-negative
Addr pc; // branch pc, like branch position, for btb entry pc check
unsigned lruCounter; // Counter for LRU replacement policy

TageEntry() : valid(false), tag(0), counter(0), useful(false), pc(0), lruCounter(0) {}
TageEntry() : valid(false), tag(0), counter(0), useful(0), pc(0), lruCounter(0) {}

TageEntry(Addr tag, short counter, Addr pc) :
valid(true), tag(tag), counter(counter), useful(false), pc(pc), lruCounter(0) {}
valid(true), tag(tag), counter(counter), useful(0), pc(pc), lruCounter(0) {}
bool taken() const {
return counter >= 0;
}
Expand Down Expand Up @@ -257,6 +257,9 @@ class BTBTAGE : public TimedBaseBTBPredictor
// useful bit reset counter, when cnt >= 256, reset useful bit of all entries
int usefulResetCnt{0};

static constexpr uint8_t usefulCtrInit = 0;
static constexpr uint8_t usefulCtrMax = 3;

// Check if a tag matches
bool matchTag(Addr expected, Addr found);

Expand All @@ -281,6 +284,14 @@ class BTBTAGE : public TimedBaseBTBPredictor
// Decrement counter with saturation
bool satDecrement(int min, short &counter);

bool usefulCtrIsSaturateNegative(const TageEntry &entry) const;

bool usefulCtrIsSaturatePositive(const TageEntry &entry) const;

void usefulCtrIncrease(TageEntry &entry);

void usefulCtrReset(TageEntry &entry);

// Get index for useAlt table
Addr getUseAltIdx(Addr pc) const;

Expand Down Expand Up @@ -340,6 +351,10 @@ class BTBTAGE : public TimedBaseBTBPredictor
Scalar updateAllocFailure;
Scalar updateAllocFailureNoValidTable;
Scalar updateAllocSuccess;
Scalar allocBucketHasInvalid;
Scalar allocBucketHasWeakNotUseful;
Scalar allocBucketHasStrongNotUsefulButNoWeakNotUseful;
Scalar allocBucketAllUsefulOrNoCandidate;
Scalar updateMispred;
Scalar updateResetU;

Expand Down
64 changes: 36 additions & 28 deletions src/cpu/pred/btb/microtage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -498,10 +498,10 @@ MicroTAGE::handleNewEntryAllocation(const Addr &startPC,
uint64_t &allocated_table,
uint64_t &allocated_index,
uint64_t &allocated_way) {
// Simple set-associative allocation (no LFSR, no per-way table gating):
// - For each table from start_table upward, check the set at computed index.
// - Prefer invalid ways; else choose any way with useful==0 and weak counter.
// - If none, apply a one-step age penalty to a strong, not-useful way (no allocation).
// Match RTL allocation priority:
// 1) invalid way
// 2) weak and not-useful way
// 3) any not-useful way

// Calculate branch position within the block (like RTL's cfiPosition)
unsigned position = getBranchIndexInBlock(entry.pc, startPC);
Expand All @@ -514,35 +514,43 @@ MicroTAGE::handleNewEntryAllocation(const Addr &startPC,

auto &set = tageTable[ti][newIndex];

// Allocate into invalid way or not-useful and weak way
int selected_way = -1;
for (unsigned way = 0; way < numWays; ++way) {
auto &cand = set[way];
const bool weakish = std::abs(cand.counter * 2 + 1) <= 3; // -3,-2,-1,0,1,2
if (!cand.valid || (!cand.useful && weakish)) {
short newCounter = actual_taken ? 0 : -1;
DPRINTF(UTAGE, "allocating entry in table %d[%lu][%u], tag %lu (with pos %u), counter %d, pc %#lx\n",
ti, newIndex, way, newTag, position, newCounter, entry.pc);
cand = TageEntry(newTag, newCounter, entry.pc); // u = 0 default
tageStats.updateAllocSuccess++;
allocated_table = ti;
allocated_index = newIndex;
allocated_way = way;
usefulResetCnt = usefulResetCnt <= 0 ? 0 : usefulResetCnt - 1;
return true;
if (!set[way].valid) {
selected_way = way;
break;
}
}

// 3) Apply age penalty to one strong, not-useful way to make it replaceable later
for (unsigned way = 0; way < numWays; ++way) {
auto &cand = set[way];
const bool weakish = std::abs(cand.counter * 2 + 1) <= 3;
if (!cand.useful && !weakish) {
if (cand.counter > 0) cand.counter--; else cand.counter++;
DPRINTF(UTAGE, "age penalty applied on table %d[%lu][%u], new ctr %d\n",
ti, newIndex, way, cand.counter);
break; // one penalty per table per update
if (selected_way == -1) {
for (unsigned way = 0; way < numWays; ++way) {
auto &cand = set[way];
const bool weakish = std::abs(cand.counter * 2 + 1) <= 3;
if (!cand.useful && weakish) {
selected_way = way;
break;
}
}
}
if (selected_way == -1) {
for (unsigned way = 0; way < numWays; ++way) {
if (!set[way].useful) {
selected_way = way;
break;
}
}
}
if (selected_way != -1) {
short newCounter = actual_taken ? 0 : -1;
DPRINTF(UTAGE, "allocating entry in table %d[%lu][%u], tag %lu (with pos %u), counter %d, pc %#lx\n",
ti, newIndex, selected_way, newTag, position, newCounter, entry.pc);
set[selected_way] = TageEntry(newTag, newCounter, entry.pc); // u = 0 default
tageStats.updateAllocSuccess++;
allocated_table = ti;
allocated_index = newIndex;
allocated_way = selected_way;
usefulResetCnt = usefulResetCnt <= 0 ? 0 : usefulResetCnt - 1;
return true;
}

tageStats.updateAllocFailure++;
usefulResetCnt++;
Expand Down
Loading
Loading