Skip to content

Commit e9fa09a

Browse files
committed
Upgrade MMU cache to 8×2 set-associative
This implements 8-set × 2-way set-associative cache for both load and store operations, replacing the previous direct-mapped design. This provides better hit rates while maintaining code simplicity. - Load cache: 65% → 98% hit rate (2-entry → 8×2 set-associative) - Store cache: 83% → 99% hit rate (1-entry → 8×2 set-associative) - 3-bit parity hash for even distribution across 8 sets - Simple 1-bit LRU for replacement policy - 94% reduction in store cache misses Memory cost: +512 bytes per hart (256B for load + 256B for store)
1 parent bb10925 commit e9fa09a

File tree

3 files changed

+126
-44
lines changed

3 files changed

+126
-44
lines changed

main.c

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,18 @@ static int semu_step(emu_state_t *emu)
910910
}
911911

912912
#ifdef MMU_CACHE_STATS
913+
static vm_t *global_vm_for_signal = NULL;
914+
915+
/* Forward declaration */
916+
static void print_mmu_cache_stats(vm_t *vm);
917+
918+
static void signal_handler_stats(int sig UNUSED)
919+
{
920+
if (global_vm_for_signal)
921+
print_mmu_cache_stats(global_vm_for_signal);
922+
exit(0);
923+
}
924+
913925
static void print_mmu_cache_stats(vm_t *vm)
914926
{
915927
fprintf(stderr, "\n=== MMU Cache Statistics ===\n");
@@ -918,15 +930,25 @@ static void print_mmu_cache_stats(vm_t *vm)
918930
uint64_t fetch_total =
919931
hart->cache_fetch.hits + hart->cache_fetch.misses;
920932

921-
/* Combine 2-way load cache statistics */
922-
uint64_t load_hits =
923-
hart->cache_load[0].hits + hart->cache_load[1].hits;
924-
uint64_t load_misses =
925-
hart->cache_load[0].misses + hart->cache_load[1].misses;
933+
/* Combine 8-set × 2-way load cache statistics */
934+
uint64_t load_hits = 0, load_misses = 0;
935+
for (int set = 0; set < 8; set++) {
936+
for (int way = 0; way < 2; way++) {
937+
load_hits += hart->cache_load[set].ways[way].hits;
938+
load_misses += hart->cache_load[set].ways[way].misses;
939+
}
940+
}
926941
uint64_t load_total = load_hits + load_misses;
927942

928-
uint64_t store_total =
929-
hart->cache_store.hits + hart->cache_store.misses;
943+
/* Combine 8-set × 2-way store cache statistics */
944+
uint64_t store_hits = 0, store_misses = 0;
945+
for (int set = 0; set < 8; set++) {
946+
for (int way = 0; way < 2; way++) {
947+
store_hits += hart->cache_store[set].ways[way].hits;
948+
store_misses += hart->cache_store[set].ways[way].misses;
949+
}
950+
}
951+
uint64_t store_total = store_hits + store_misses;
930952

931953
fprintf(stderr, "\nHart %u:\n", i);
932954
fprintf(stderr, " Fetch: %12llu hits, %12llu misses",
@@ -936,18 +958,18 @@ static void print_mmu_cache_stats(vm_t *vm)
936958
100.0 * hart->cache_fetch.hits / fetch_total);
937959
fprintf(stderr, "\n");
938960

939-
fprintf(stderr, " Load: %12llu hits, %12llu misses (2-way)",
940-
load_hits, load_misses);
961+
fprintf(stderr, " Load: %12llu hits, %12llu misses (8x2)", load_hits,
962+
load_misses);
941963
if (load_total > 0)
942964
fprintf(stderr, " (%.2f%% hit rate)",
943965
100.0 * load_hits / load_total);
944966
fprintf(stderr, "\n");
945967

946-
fprintf(stderr, " Store: %12llu hits, %12llu misses",
947-
hart->cache_store.hits, hart->cache_store.misses);
968+
fprintf(stderr, " Store: %12llu hits, %12llu misses (8x2)", store_hits,
969+
store_misses);
948970
if (store_total > 0)
949971
fprintf(stderr, " (%.2f%% hit rate)",
950-
100.0 * hart->cache_store.hits / store_total);
972+
100.0 * store_hits / store_total);
951973
fprintf(stderr, "\n");
952974
}
953975
}
@@ -1246,6 +1268,12 @@ int main(int argc, char **argv)
12461268
if (ret)
12471269
return ret;
12481270

1271+
#ifdef MMU_CACHE_STATS
1272+
global_vm_for_signal = &emu.vm;
1273+
signal(SIGINT, signal_handler_stats);
1274+
signal(SIGTERM, signal_handler_stats);
1275+
#endif
1276+
12491277
if (emu.debug)
12501278
ret = semu_run_debug(&emu);
12511279
else

riscv.c

Lines changed: 76 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,18 @@ static inline uint32_t read_rs2(const hart_t *vm, uint32_t insn)
185185
void mmu_invalidate(hart_t *vm)
186186
{
187187
vm->cache_fetch.n_pages = 0xFFFFFFFF;
188-
vm->cache_load[0].n_pages = 0xFFFFFFFF;
189-
vm->cache_load[1].n_pages = 0xFFFFFFFF;
190-
vm->cache_store.n_pages = 0xFFFFFFFF;
188+
/* Invalidate all 8 sets × 2 ways for load cache */
189+
for (int set = 0; set < 8; set++) {
190+
for (int way = 0; way < 2; way++)
191+
vm->cache_load[set].ways[way].n_pages = 0xFFFFFFFF;
192+
vm->cache_load[set].lru = 0; /* Reset LRU to way 0 */
193+
}
194+
/* Invalidate all 8 sets × 2 ways for store cache */
195+
for (int set = 0; set < 8; set++) {
196+
for (int way = 0; way < 2; way++)
197+
vm->cache_store[set].ways[way].n_pages = 0xFFFFFFFF;
198+
vm->cache_store[set].lru = 0; /* Reset LRU to way 0 */
199+
}
191200
}
192201

193202
/* Pre-verify the root page table to minimize page table access during
@@ -333,13 +342,36 @@ static void mmu_load(hart_t *vm,
333342
{
334343
uint32_t vpn = addr >> RV_PAGE_SHIFT;
335344
uint32_t phys_addr;
336-
/* 2-entry direct-mapped cache: use parity hash to select entry */
337-
uint32_t index = __builtin_parity(vpn) & 0x1;
345+
/* 8-set × 2-way set-associative cache: use 3-bit parity hash */
346+
uint32_t set_idx = (__builtin_parity(vpn & 0xAAAAAAAA) << 2) |
347+
(__builtin_parity(vpn & 0x55555555) << 1) |
348+
__builtin_parity(vpn & 0xCCCCCCCC);
349+
350+
mmu_cache_set_t *set = &vm->cache_load[set_idx];
351+
352+
/* Check both ways in the set */
353+
int hit_way = -1;
354+
for (int way = 0; way < 2; way++) {
355+
if (likely(set->ways[way].n_pages == vpn)) {
356+
hit_way = way;
357+
break;
358+
}
359+
}
338360

339-
if (unlikely(vpn != vm->cache_load[index].n_pages)) {
361+
if (likely(hit_way >= 0)) {
362+
/* Cache hit: reconstruct physical address from cached PPN */
363+
#ifdef MMU_CACHE_STATS
364+
set->ways[hit_way].hits++;
365+
#endif
366+
phys_addr = (set->ways[hit_way].phys_ppn << RV_PAGE_SHIFT) |
367+
(addr & MASK(RV_PAGE_SHIFT));
368+
/* Update LRU: mark the other way as replacement candidate */
369+
set->lru = 1 - hit_way;
370+
} else {
340371
/* Cache miss: do full translation */
372+
int victim_way = set->lru; /* Use LRU bit to select victim */
341373
#ifdef MMU_CACHE_STATS
342-
vm->cache_load[index].misses++;
374+
set->ways[victim_way].misses++;
343375
#endif
344376
phys_addr = addr;
345377
mmu_translate(vm, &phys_addr,
@@ -348,16 +380,11 @@ static void mmu_load(hart_t *vm,
348380
RV_EXC_LOAD_PFAULT);
349381
if (vm->error)
350382
return;
351-
/* Cache physical page number (not a pointer) */
352-
vm->cache_load[index].n_pages = vpn;
353-
vm->cache_load[index].phys_ppn = phys_addr >> RV_PAGE_SHIFT;
354-
} else {
355-
/* Cache hit: reconstruct physical address from cached PPN */
356-
#ifdef MMU_CACHE_STATS
357-
vm->cache_load[index].hits++;
358-
#endif
359-
phys_addr = (vm->cache_load[index].phys_ppn << RV_PAGE_SHIFT) |
360-
(addr & MASK(RV_PAGE_SHIFT));
383+
/* Replace victim way with new translation */
384+
set->ways[victim_way].n_pages = vpn;
385+
set->ways[victim_way].phys_ppn = phys_addr >> RV_PAGE_SHIFT;
386+
/* Update LRU: mark the other way for next eviction */
387+
set->lru = 1 - victim_way;
361388
}
362389

363390
vm->mem_load(vm, phys_addr, width, value);
@@ -376,28 +403,48 @@ static bool mmu_store(hart_t *vm,
376403
{
377404
uint32_t vpn = addr >> RV_PAGE_SHIFT;
378405
uint32_t phys_addr;
406+
/* 8-set × 2-way set-associative cache: use 3-bit parity hash */
407+
uint32_t set_idx = (__builtin_parity(vpn & 0xAAAAAAAA) << 2) |
408+
(__builtin_parity(vpn & 0x55555555) << 1) |
409+
__builtin_parity(vpn & 0xCCCCCCCC);
410+
411+
mmu_cache_set_t *set = &vm->cache_store[set_idx];
412+
413+
/* Check both ways in the set */
414+
int hit_way = -1;
415+
for (int way = 0; way < 2; way++) {
416+
if (likely(set->ways[way].n_pages == vpn)) {
417+
hit_way = way;
418+
break;
419+
}
420+
}
379421

380-
if (unlikely(vpn != vm->cache_store.n_pages)) {
422+
if (likely(hit_way >= 0)) {
423+
/* Cache hit: reconstruct physical address from cached PPN */
424+
#ifdef MMU_CACHE_STATS
425+
set->ways[hit_way].hits++;
426+
#endif
427+
phys_addr = (set->ways[hit_way].phys_ppn << RV_PAGE_SHIFT) |
428+
(addr & MASK(RV_PAGE_SHIFT));
429+
/* Update LRU: mark the other way as replacement candidate */
430+
set->lru = 1 - hit_way;
431+
} else {
381432
/* Cache miss: do full translation */
433+
int victim_way = set->lru; /* Use LRU bit to select victim */
382434
#ifdef MMU_CACHE_STATS
383-
vm->cache_store.misses++;
435+
set->ways[victim_way].misses++;
384436
#endif
385437
phys_addr = addr;
386438
mmu_translate(vm, &phys_addr, (1 << 2), (1 << 6) | (1 << 7),
387439
vm->sstatus_sum && vm->s_mode, RV_EXC_STORE_FAULT,
388440
RV_EXC_STORE_PFAULT);
389441
if (vm->error)
390442
return false;
391-
/* Cache physical page number (not a pointer) */
392-
vm->cache_store.n_pages = vpn;
393-
vm->cache_store.phys_ppn = phys_addr >> RV_PAGE_SHIFT;
394-
} else {
395-
/* Cache hit: reconstruct physical address from cached PPN */
396-
#ifdef MMU_CACHE_STATS
397-
vm->cache_store.hits++;
398-
#endif
399-
phys_addr = (vm->cache_store.phys_ppn << RV_PAGE_SHIFT) |
400-
(addr & MASK(RV_PAGE_SHIFT));
443+
/* Replace victim way with new translation */
444+
set->ways[victim_way].n_pages = vpn;
445+
set->ways[victim_way].phys_ppn = phys_addr >> RV_PAGE_SHIFT;
446+
/* Update LRU: mark the other way for next eviction */
447+
set->lru = 1 - victim_way;
401448
}
402449

403450
if (unlikely(cond)) {

riscv.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ typedef struct {
5151
#endif
5252
} mmu_addr_cache_t;
5353

54+
/* Set-associative cache structure for load operations */
55+
typedef struct {
56+
mmu_addr_cache_t ways[2]; /* 2-way associative */
57+
uint8_t lru; /* LRU bit: 0 or 1 (which way to replace) */
58+
} mmu_cache_set_t;
59+
5460
/* To use the emulator, start by initializing a hart_t object with zero values,
5561
* invoke vm_init(), and set the required environment-supplied callbacks. You
5662
* may also set other necessary fields such as argument registers and s_mode,
@@ -101,9 +107,10 @@ struct __hart_internal {
101107
uint32_t exc_cause, exc_val;
102108

103109
mmu_fetch_cache_t cache_fetch;
104-
/* 2-entry direct-mapped with hash-based indexing */
105-
mmu_addr_cache_t cache_load[2];
106-
mmu_addr_cache_t cache_store;
110+
/* 8-set × 2-way set-associative cache with 3-bit parity hash indexing */
111+
mmu_cache_set_t cache_load[8];
112+
/* 8-set × 2-way set-associative cache for store operations */
113+
mmu_cache_set_t cache_store[8];
107114

108115
/* Supervisor state */
109116
bool s_mode;

0 commit comments

Comments
 (0)