-
Notifications
You must be signed in to change notification settings - Fork 64
Optimize the instruction fetch path #103
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| #include <stdio.h> | ||
| #include <string.h> | ||
|
|
||
| #include "common.h" | ||
| #include "device.h" | ||
|
|
@@ -180,11 +181,17 @@ static inline uint32_t read_rs2(const hart_t *vm, uint32_t insn) | |
| return vm->x_regs[decode_rs2(insn)]; | ||
| } | ||
|
|
||
| static inline void icache_invalidate_all(hart_t *vm) | ||
| { | ||
| memset(&vm->icache, 0, sizeof(vm->icache)); | ||
| } | ||
|
|
||
| /* virtual addressing */ | ||
|
|
||
| void mmu_invalidate(hart_t *vm) | ||
| { | ||
| vm->cache_fetch.n_pages = 0xFFFFFFFF; | ||
| vm->cache_fetch[0].n_pages = 0xFFFFFFFF; | ||
| vm->cache_fetch[1].n_pages = 0xFFFFFFFF; | ||
| /* Invalidate all 8 sets × 2 ways for load cache */ | ||
| for (int set = 0; set < 8; set++) { | ||
| for (int way = 0; way < 2; way++) | ||
|
|
@@ -197,6 +204,7 @@ void mmu_invalidate(hart_t *vm) | |
| vm->cache_store[set].ways[way].n_pages = 0xFFFFFFFF; | ||
| vm->cache_store[set].lru = 0; /* Reset LRU to way 0 */ | ||
| } | ||
| icache_invalidate_all(vm); | ||
| } | ||
|
|
||
| /* Invalidate MMU caches for a specific virtual address range. | ||
|
|
@@ -226,10 +234,24 @@ void mmu_invalidate_range(hart_t *vm, uint32_t start_addr, uint32_t size) | |
| end_addr = UINT32_MAX; | ||
| uint32_t end_vpn = (uint32_t) end_addr >> RV_PAGE_SHIFT; | ||
|
|
||
| /* Cache invalidation for fetch cache */ | ||
| if (vm->cache_fetch.n_pages >= start_vpn && | ||
| vm->cache_fetch.n_pages <= end_vpn) | ||
| vm->cache_fetch.n_pages = 0xFFFFFFFF; | ||
| /* Invalidate fetch cache: 2 entry */ | ||
| for (int i = 0; i < 2; i++) { | ||
| if (vm->cache_fetch[i].n_pages >= start_vpn && | ||
| vm->cache_fetch[i].n_pages <= end_vpn) | ||
| vm->cache_fetch[i].n_pages = 0xFFFFFFFF; | ||
| } | ||
|
|
||
| /* Invalidate I-cache: 256 blocks */ | ||
| for (int i = 0; i < ICACHE_BLOCKS; i++) { | ||
| icache_block_t *blk = &vm->icache.block[i]; | ||
| if (!blk->valid) | ||
| continue; | ||
|
|
||
| uint32_t icache_vpn = (blk->tag << ICACHE_INDEX_BITS) | i; | ||
| icache_vpn >>= (RV_PAGE_SHIFT - ICACHE_OFFSET_BITS); | ||
| if (icache_vpn >= start_vpn && icache_vpn <= end_vpn) | ||
| blk->valid = false; | ||
| } | ||
|
|
||
| /* Invalidate load cache: 8 sets × 2 ways */ | ||
| for (int set = 0; set < 8; set++) { | ||
|
|
@@ -361,10 +383,38 @@ static void mmu_fence(hart_t *vm, uint32_t insn UNUSED) | |
|
|
||
| static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value) | ||
| { | ||
| uint32_t idx = (addr >> ICACHE_OFFSET_BITS) & ICACHE_INDEX_MASK; | ||
| uint32_t tag = addr >> (ICACHE_OFFSET_BITS + ICACHE_INDEX_BITS); | ||
| icache_block_t *blk = &vm->icache.block[idx]; | ||
| uint32_t vpn = addr >> RV_PAGE_SHIFT; | ||
| if (unlikely(vpn != vm->cache_fetch.n_pages)) { | ||
| uint32_t index = __builtin_parity(vpn) & 0x1; | ||
|
|
||
| #ifdef MMU_CACHE_STATS | ||
| vm->cache_fetch[index].total_fetch++; | ||
| #endif | ||
|
|
||
| /* I-cache lookup */ | ||
| if (likely(blk->valid && blk->tag == tag)) { | ||
| /* I-cache hit */ | ||
| #ifdef MMU_CACHE_STATS | ||
| vm->cache_fetch[index].icache_hits++; | ||
| #endif | ||
| uint32_t ofs = addr & ICACHE_BLOCK_MASK; | ||
| *value = *(const uint32_t *) (blk->base + ofs); | ||
| return; | ||
| } | ||
| /* I-cache miss */ | ||
| else { | ||
| #ifdef MMU_CACHE_STATS | ||
| vm->cache_fetch[index].icache_misses++; | ||
| #endif | ||
| } | ||
|
|
||
| /* I-cache miss, 2-entry TLB lookup */ | ||
| if (unlikely(vpn != vm->cache_fetch[index].n_pages)) { | ||
| /* TLB miss */ | ||
| #ifdef MMU_CACHE_STATS | ||
| vm->cache_fetch.misses++; | ||
| vm->cache_fetch[index].tlb_misses++; | ||
| #endif | ||
| mmu_translate(vm, &addr, (1 << 3), (1 << 6), false, RV_EXC_FETCH_FAULT, | ||
| RV_EXC_FETCH_PFAULT); | ||
|
|
@@ -374,15 +424,24 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value) | |
| vm->mem_fetch(vm, addr >> RV_PAGE_SHIFT, &page_addr); | ||
| if (vm->error) | ||
| return; | ||
| vm->cache_fetch.n_pages = vpn; | ||
| vm->cache_fetch.page_addr = page_addr; | ||
| vm->cache_fetch[index].n_pages = vpn; | ||
| vm->cache_fetch[index].page_addr = page_addr; | ||
| } | ||
| #ifdef MMU_CACHE_STATS | ||
| /* TLB hit */ | ||
| else { | ||
| vm->cache_fetch.hits++; | ||
| } | ||
| #ifdef MMU_CACHE_STATS | ||
| vm->cache_fetch[index].tlb_hits++; | ||
| #endif | ||
| *value = vm->cache_fetch.page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)]; | ||
| } | ||
|
|
||
| *value = | ||
| vm->cache_fetch[index].page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)]; | ||
|
|
||
| /* fill into the I-cache */ | ||
| uint32_t block_off = (addr & RV_PAGE_MASK) & ~ICACHE_BLOCK_MASK; | ||
| blk->base = (const uint8_t *) vm->cache_fetch[index].page_addr + block_off; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pointer aliasing time bomb:
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this mean that I also need to add I-cache and victim-cache invalidation inside the
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have added the relevant code in for (int i = 0; i < ICACHE_BLOCKS; i++) {
icache_block_t *blk = &vm->icache.block[i];
if (!blk->valid)
continue;
uint32_t icache_vpn = (blk->tag << ICACHE_INDEX_BITS) | i;
icache_vpn >>= (RV_PAGE_SHIFT - ICACHE_OFFSET_BITS);
if (icache_vpn >= start_vpn && icache_vpn <= end_vpn)
blk->valid = false;
} |
||
| blk->tag = tag; | ||
| blk->valid = true; | ||
| } | ||
|
|
||
| static void mmu_load(hart_t *vm, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mmu_invalidate_range now needs to flush the new instruction cache as well; otherwise pages whose mappings change keep returning stale instructions from vm->icache.
Prompt for AI agents
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I’m a bit confused about this.
The current implementation is as follows:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@yy214123 You're right to be confused—the code snippet you showed only handles invalidating the page translation caches (
cache_fetch), but the comment was pointing out a bigger gap: the actual instruction cache blocks (vm->icache.i_blockandvm->icache.v_block) aren't being touched at all.Right now, when you call
mmu_invalidate_range, it only invalidates the 2-entry page cache entries that fall in your VPN range. But if the instruction cache already holds translated/cached instruction blocks from those pages (which it does after fetches), they'll keep serving stale data even after the page mappings change—until those specific cache lines get evicted naturally by future misses.To fix it properly, you need to add logic in
mmu_invalidate_rangeto scan the I-cache and victim cache for blocks whose tags correspond to the invalidated address range, and mark thosevalid = false. Something like:This is a bit more involved than the full
icache_invalidate_all(which you already call inmmu_invalidate), but it's necessary for range-based invalidation to work correctly without flushing the entire cache unnecessarily. The tag math might need tweaking based on how you're storing virtual vs. physical addresses in the blocks—double-check that the reconstructed address comparison works as expected.