Skip to content

Commit f582165

Browse files
committed
L1 hit prediction_table[hashed_pc]--
1 parent 853bbae commit f582165

File tree

3 files changed

+200
-4
lines changed

3 files changed

+200
-4
lines changed

src/gpgpu-sim/gpu-cache.cc

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,51 @@ enum cache_request_status tag_array::probe(new_addr_type addr, unsigned &idx,
332332
return MISS;
333333
}
334334

335+
/// cwpeng
336+
void tag_array::set_hashed_pc_from_tag(new_addr_type addr, mem_fetch *mf, uint8_t hashed_pc){
337+
unsigned set_index = m_config.set_index(addr);
338+
new_addr_type tag = m_config.tag(addr);
339+
340+
// check for line in cache and update on HIT access with most recent PC. Rajesh CS752
341+
for (unsigned way = 0; way < m_config.m_assoc; way++) {
342+
unsigned index = set_index * m_config.m_assoc + way;
343+
cache_block_t *line = m_lines[index];
344+
if (line->m_tag == tag) {
345+
line->m_hashed_pc = hashed_pc;
346+
}
347+
}
348+
}
349+
350+
void tag_array::set_bypass_bit_from_tag(new_addr_type addr, mem_fetch *mf, bool bypassBit){
351+
unsigned set_index = m_config.set_index(addr);
352+
new_addr_type tag = m_config.tag(addr);
353+
354+
// check for line in cache and update on HIT access with most recent PC. Rajesh CS752
355+
for (unsigned way = 0; way < m_config.m_assoc; way++) {
356+
unsigned index = set_index * m_config.m_assoc + way;
357+
cache_block_t *line = m_lines[index];
358+
if (line->m_tag == tag) {
359+
line->m_bypassBit = bypassBit;
360+
}
361+
}
362+
}
363+
364+
bool tag_array::get_bypass_bit_from_tag(new_addr_type addr, mem_fetch *mf){
365+
unsigned set_index = m_config.set_index(addr);
366+
new_addr_type tag = m_config.tag(addr);
367+
368+
// check for line in cache and update on HIT access with most recent PC. Rajesh CS752
369+
for (unsigned way = 0; way < m_config.m_assoc; way++) {
370+
unsigned index = set_index * m_config.m_assoc + way;
371+
cache_block_t *line = m_lines[index];
372+
if (line->m_tag == tag) {
373+
return line->m_bypassBit;
374+
}
375+
}
376+
}
377+
378+
//cwpeng
379+
335380
enum cache_request_status tag_array::access(new_addr_type addr, unsigned time,
336381
unsigned &idx, mem_fetch *mf) {
337382
bool wb = false;
@@ -1836,6 +1881,38 @@ enum cache_request_status data_cache::rd_hit_base(
18361881
return HIT;
18371882
}
18381883

1884+
//cwpeng
1885+
enum cache_request_status data_cache::rd_hit_base_l1d(
1886+
new_addr_type addr, unsigned cache_index, mem_fetch *mf, unsigned time,
1887+
std::list<cache_event> &events, enum cache_request_status status,
1888+
uint8_t* l1d_prediction_table //cwpeng
1889+
) {
1890+
new_addr_type block_addr = m_config.block_addr(addr);
1891+
1892+
uint8_t storedhashedPC = m_tag_array->get_hashed_pc_from_tag(addr, mf); // Rajesh CS752
1893+
printf("HashPC: %d\n", storedhashedPC) ;
1894+
if(l1d_prediction_table[storedhashedPC] > 0 ){ // Saturating counter stays 0 on 0
1895+
l1d_prediction_table[storedhashedPC]--;
1896+
//fprintf(stdout,"HIT Time: %d PC: %d Value: %d\n", time, storedhashedPC, l1d_prediction_table[storedhashedPC]);
1897+
}
1898+
m_tag_array->set_hashed_pc_from_tag(addr, mf, (uint8_t) mf->get_pc()); //cwpeng
1899+
1900+
m_tag_array->access(block_addr, time, cache_index, mf);
1901+
// Atomics treated as global read/write requests - Perform read, mark line as
1902+
// MODIFIED
1903+
if (mf->isatomic()) {
1904+
assert(mf->get_access_type() == GLOBAL_ACC_R);
1905+
cache_block_t *block = m_tag_array->get_block(cache_index);
1906+
if (!block->is_modified_line()) {
1907+
m_tag_array->inc_dirty();
1908+
}
1909+
block->set_status(MODIFIED,
1910+
mf->get_access_sector_mask()); // mark line as
1911+
block->set_byte_mask(mf);
1912+
}
1913+
return HIT;
1914+
}
1915+
18391916
/****** Read miss functions (Set by config file) ******/
18401917

18411918
/// Baseline read miss: Send read request to lower level memory,
@@ -1969,6 +2046,52 @@ enum cache_request_status data_cache::process_tag_probe(
19692046
return access_status;
19702047
}
19712048

2049+
enum cache_request_status data_cache::process_tag_probe(
2050+
bool wr, enum cache_request_status probe_status, new_addr_type addr,
2051+
unsigned cache_index, mem_fetch *mf, unsigned time,
2052+
std::list<cache_event> &events,
2053+
uint8_t* l1d_prediction_table //cwpeng
2054+
) {
2055+
// Each function pointer ( m_[rd/wr]_[hit/miss] ) is set in the
2056+
// data_cache constructor to reflect the corresponding cache configuration
2057+
// options. Function pointers were used to avoid many long conditional
2058+
// branches resulting from many cache configuration options.
2059+
cache_request_status access_status = probe_status;
2060+
if (wr) { // Write
2061+
if (probe_status == HIT) {
2062+
access_status =
2063+
(this->*m_wr_hit)(addr, cache_index, mf, time, events, probe_status);
2064+
} else if ((probe_status != RESERVATION_FAIL) ||
2065+
(probe_status == RESERVATION_FAIL &&
2066+
m_config.m_write_alloc_policy == NO_WRITE_ALLOCATE)) {
2067+
access_status =
2068+
(this->*m_wr_miss)(addr, cache_index, mf, time, events, probe_status);
2069+
} else {
2070+
// the only reason for reservation fail here is LINE_ALLOC_FAIL (i.e all
2071+
// lines are reserved)
2072+
m_stats.inc_fail_stats(mf->get_access_type(), LINE_ALLOC_FAIL,
2073+
mf->get_streamID());
2074+
}
2075+
} else { // Read
2076+
if (probe_status == HIT) {
2077+
access_status =
2078+
(this->*m_rd_hit_l1d)(addr, cache_index, mf, time, events, probe_status, l1d_prediction_table);
2079+
} else if (probe_status != RESERVATION_FAIL) {
2080+
access_status =
2081+
(this->*m_rd_miss)(addr, cache_index, mf, time, events, probe_status);
2082+
} else {
2083+
// the only reason for reservation fail here is LINE_ALLOC_FAIL (i.e all
2084+
// lines are reserved)
2085+
m_stats.inc_fail_stats(mf->get_access_type(), LINE_ALLOC_FAIL,
2086+
mf->get_streamID());
2087+
}
2088+
}
2089+
2090+
m_bandwidth_management.use_data_port(mf, access_status, events);
2091+
return access_status;
2092+
}
2093+
2094+
19722095
// Both the L1 and L2 currently use the same access function.
19732096
// Differentiation between the two caches is done through configuration
19742097
// of caching policies.
@@ -1994,6 +2117,28 @@ enum cache_request_status data_cache::access(new_addr_type addr, mem_fetch *mf,
19942117
return access_status;
19952118
}
19962119

2120+
enum cache_request_status data_cache::access(new_addr_type addr, mem_fetch *mf,
2121+
unsigned time,
2122+
std::list<cache_event> &events,
2123+
uint8_t* l1d_prediction_table // cwpeng
2124+
) {
2125+
assert(mf->get_data_size() <= m_config.get_atom_sz());
2126+
bool wr = mf->get_is_write();
2127+
new_addr_type block_addr = m_config.block_addr(addr);
2128+
unsigned cache_index = (unsigned)-1;
2129+
enum cache_request_status probe_status =
2130+
m_tag_array->probe(block_addr, cache_index, mf, mf->is_write(), true);
2131+
enum cache_request_status access_status =
2132+
process_tag_probe(wr, probe_status, addr, cache_index, mf, time, events, l1d_prediction_table); //cwpeng
2133+
m_stats.inc_stats(mf->get_access_type(),
2134+
m_stats.select_stats_status(probe_status, access_status),
2135+
mf->get_streamID());
2136+
m_stats.inc_stats_pw(mf->get_access_type(),
2137+
m_stats.select_stats_status(probe_status, access_status),
2138+
mf->get_streamID());
2139+
return access_status;
2140+
}
2141+
19972142
/// This is meant to model the first level data cache in Fermi.
19982143
/// It is write-evict (global) or write-back (local) at the
19992144
/// granularity of individual blocks (Set by GPGPU-Sim configuration file)
@@ -2004,6 +2149,13 @@ enum cache_request_status l1_cache::access(new_addr_type addr, mem_fetch *mf,
20042149
return data_cache::access(addr, mf, time, events);
20052150
}
20062151

2152+
enum cache_request_status l1_cache::access(new_addr_type addr, mem_fetch *mf,
2153+
unsigned time,
2154+
std::list<cache_event> &events,
2155+
uint8_t* l1_prediction_table) { // cwpeng
2156+
return data_cache::access(addr, mf, time, events, l1_prediction_table);
2157+
}
2158+
20072159
// The l2 cache access function calls the base data_cache access
20082160
// implementation. When the L2 needs to diverge from L1, L2 specific
20092161
// changes should be made here.

src/gpgpu-sim/gpu-cache.h

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ struct cache_block_t {
126126
cache_block_t() {
127127
m_tag = 0;
128128
m_block_addr = 0;
129-
hashPC = 0 ; // cwpeng initialize hashed PC
129+
m_hashed_pc = 0 ; // cwpeng initialize hashed PC
130130
}
131131

132132
virtual void allocate(new_addr_type tag, new_addr_type block_addr,
@@ -169,7 +169,9 @@ struct cache_block_t {
169169
new_addr_type m_tag;
170170
new_addr_type m_block_addr;
171171

172-
uint8_t hashPC ; // cwpeng hashed PC in memory block (7 bits)
172+
// uint8_t hashPC ; // cwpeng hashed PC in memory block (7 bits)
173+
uint8_t m_hashed_pc; // cwpeng hashed PC in memory block (7 bits)
174+
bool m_bypassBit; // rajesh cs752 L2 Bypass Bit
173175
};
174176

175177
struct line_cache_block : public cache_block_t {
@@ -182,6 +184,9 @@ struct line_cache_block : public cache_block_t {
182184
m_set_modified_on_fill = false;
183185
m_set_readable_on_fill = false;
184186
m_readable = true;
187+
188+
m_hashed_pc = 0 ; // cwpeng initialize hashed PC
189+
// record the last PC that access this block
185190
}
186191
void allocate(new_addr_type tag, new_addr_type block_addr, unsigned time,
187192
mem_access_sector_mask_t sector_mask) {
@@ -988,6 +993,11 @@ class tag_array {
988993
void remove_pending_line(mem_fetch *mf);
989994
void inc_dirty() { m_dirty++; }
990995

996+
uint8_t get_hashed_pc_from_tag(new_addr_type addr, mem_fetch *mf);
997+
void set_hashed_pc_from_tag(new_addr_type addr, mem_fetch *mf, uint8_t hashed_pc);
998+
void set_bypass_bit_from_tag(new_addr_type addr, mem_fetch *mf, bool bypassBit);
999+
bool get_bypass_bit_from_tag(new_addr_type addr, mem_fetch *mf);
1000+
9911001
protected:
9921002
// This constructor is intended for use only from derived classes that wish to
9931003
// avoid unnecessary memory allocation that takes place in the
@@ -1531,6 +1541,7 @@ class data_cache : public baseline_cache {
15311541

15321542
// Set read hit function
15331543
m_rd_hit = &data_cache::rd_hit_base;
1544+
m_rd_hit_l1d = &data_cache::rd_hit_base_l1d;
15341545

15351546
// Set read miss function
15361547
m_rd_miss = &data_cache::rd_miss_base;
@@ -1582,6 +1593,12 @@ class data_cache : public baseline_cache {
15821593
unsigned time,
15831594
std::list<cache_event> &events);
15841595

1596+
virtual enum cache_request_status access(new_addr_type addr, mem_fetch *mf,
1597+
unsigned time,
1598+
std::list<cache_event> &events,
1599+
uint8_t* l1_prediction_table // cwpeng
1600+
);
1601+
15851602
protected:
15861603
data_cache(const char *name, cache_config &config, int core_id, int type_id,
15871604
mem_fetch_interface *memport, mem_fetch_allocator *mfcreator,
@@ -1612,6 +1629,15 @@ class data_cache : public baseline_cache {
16121629
mem_fetch *mf, unsigned time,
16131630
std::list<cache_event> &events);
16141631

1632+
enum cache_request_status process_tag_probe(bool wr,
1633+
enum cache_request_status status,
1634+
new_addr_type addr,
1635+
unsigned cache_index,
1636+
mem_fetch *mf, unsigned time,
1637+
std::list<cache_event> &events,
1638+
uint8_t* l1_prediction_table // cwpeng
1639+
);
1640+
16151641
protected:
16161642
mem_fetch_allocator *m_memfetch_creator;
16171643

@@ -1681,12 +1707,23 @@ class data_cache : public baseline_cache {
16811707
enum cache_request_status (data_cache::*m_rd_hit)(
16821708
new_addr_type addr, unsigned cache_index, mem_fetch *mf, unsigned time,
16831709
std::list<cache_event> &events, enum cache_request_status status);
1710+
enum cache_request_status (data_cache::*m_rd_hit_l1d)( //cwpeng
1711+
new_addr_type addr, unsigned cache_index, mem_fetch *mf, unsigned time,
1712+
std::list<cache_event> &events, enum cache_request_status status, uint8_t *l1d_prediction_table);
16841713
enum cache_request_status rd_hit_base(new_addr_type addr,
16851714
unsigned cache_index, mem_fetch *mf,
16861715
unsigned time,
16871716
std::list<cache_event> &events,
16881717
enum cache_request_status status);
16891718

1719+
enum cache_request_status rd_hit_base_l1d(new_addr_type addr,
1720+
unsigned cache_index, mem_fetch *mf,
1721+
unsigned time,
1722+
std::list<cache_event> &events,
1723+
enum cache_request_status status,
1724+
uint8_t* l1_prediction_table // cwpeng
1725+
);
1726+
16901727
/******* Read-miss configs *******/
16911728
enum cache_request_status (data_cache::*m_rd_miss)(
16921729
new_addr_type addr, unsigned cache_index, mem_fetch *mf, unsigned time,
@@ -1719,7 +1756,14 @@ class l1_cache : public data_cache {
17191756

17201757
virtual enum cache_request_status access(new_addr_type addr, mem_fetch *mf,
17211758
unsigned time,
1722-
std::list<cache_event> &events);
1759+
std::list<cache_event> &events
1760+
);
1761+
1762+
virtual enum cache_request_status access(new_addr_type addr, mem_fetch *mf,
1763+
unsigned time,
1764+
std::list<cache_event> &events,
1765+
uint8_t* l1_prediction_table // cwpeng
1766+
);
17231767

17241768
uint8_t prediction_table[128] ; // cwpeng prediction table in L1 cache (4 bits each entry)
17251769

src/gpgpu-sim/shader.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2127,7 +2127,7 @@ void ldst_unit::L1_latency_queue_cycle() {
21272127
m_L1D->access(mf_next->get_addr(), mf_next,
21282128
m_core->get_gpu()->gpu_sim_cycle +
21292129
m_core->get_gpu()->gpu_tot_sim_cycle,
2130-
events);
2130+
events, m_L1D->prediction_table);
21312131

21322132
bool write_sent = was_write_sent(events);
21332133
bool read_sent = was_read_sent(events);

0 commit comments

Comments
 (0)