Skip to content

Commit c1ec84e

Browse files
committed
Reduce interpreter and JIT overhead
This introduces three optimizations: 1. Block-level cycle counting - Remove per-instruction cycle++ from RVOP macro - Pre-compute block->cycle_cost at translation time - Add cycle cost at block entry (interpreter) or exit (JIT) 2. Timer derivation from cycle counter (SYSTEM mode) - Remove per-instruction rv->timer++ - Derive timer on-demand: timer = csr_cycle + timer_offset - Extend CSR sync to TIME/TIMEH registers 3. Page-boundary block termination with fallthrough chaining - Terminate blocks at 4KB page boundaries - Implement fallthrough chaining via branch_taken pointer - Add page_index_insert() for O(1) cache invalidation
1 parent 1c5112e commit c1ec84e

File tree

8 files changed

+475
-114
lines changed

8 files changed

+475
-114
lines changed

mk/common.mk

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ endef
3535

3636
# Get specified feature (supports both ENABLE_* and CONFIG_*)
3737
POSITIVE_WORDS = 1 true yes y
38+
NEGATIVE_WORDS = 0 false no n
3839
define has
39-
$(if $(filter $(firstword $(ENABLE_$(strip $1))), $(POSITIVE_WORDS)),1,$(call config-to-feature,$1))
40+
$(if $(filter $(firstword $(ENABLE_$(strip $1))),$(NEGATIVE_WORDS)),0,$(if $(filter $(firstword $(ENABLE_$(strip $1))),$(POSITIVE_WORDS)),1,$(call config-to-feature,$1)))
4041
endef
4142

4243
# Set compiler feature flag from config

src/cache.c

Lines changed: 102 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,36 @@ typedef struct {
5454
* detached and freed, and the stored information will be inherited by the new
5555
* entry.
5656
*/
57+
58+
#if RV32_HAS(JIT) && RV32_HAS(SYSTEM)
59+
/* Page index entry: links blocks in the same page bucket */
60+
typedef struct page_block_entry {
61+
void *block; /* pointer to block_t */
62+
struct page_block_entry *next; /* next entry in bucket chain */
63+
} page_block_entry_t;
64+
#endif
65+
5766
typedef struct cache {
5867
struct list_head list; /* list of live cache */
5968
struct list_head ghost_list; /* list of evicted cache */
6069
hashtable_t map; /* hash map which contains both live and evicted cache */
6170
uint32_t size;
6271
uint32_t ghost_list_size;
6372
uint32_t capacity;
73+
#if RV32_HAS(JIT) && RV32_HAS(SYSTEM)
74+
/* Page index for O(1) invalidation by virtual address.
75+
* Each bucket contains a linked list of blocks starting in that page.
76+
*/
77+
page_block_entry_t *page_index[PAGE_INDEX_SIZE];
78+
#endif
6479
} cache_t;
6580

81+
#if RV32_HAS(JIT) && RV32_HAS(SYSTEM)
82+
/* Forward declarations for page index functions */
83+
static void page_index_insert(cache_t *cache, block_t *block);
84+
static void page_index_remove(cache_t *cache, block_t *block);
85+
#endif
86+
6687
#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
6788

6889
static inline void INIT_HLIST_NODE(struct hlist_node *h)
@@ -178,6 +199,11 @@ cache_t *cache_create(uint32_t size_bits)
178199
for (uint32_t i = 0; i < cache_size; i++)
179200
INIT_HLIST_HEAD(&cache->map.ht_list_head[i]);
180201

202+
#if RV32_HAS(JIT) && RV32_HAS(SYSTEM)
203+
/* Initialize page index for O(1) invalidation lookup */
204+
memset(cache->page_index, 0, sizeof(cache->page_index));
205+
#endif
206+
181207
return cache;
182208

183209
fail_cache:
@@ -290,6 +316,11 @@ void *cache_put(cache_t *cache, uint32_t key, void *value)
290316
assert(replaced->alive);
291317

292318
replaced_value = replaced->value;
319+
#if RV32_HAS(JIT) && RV32_HAS(SYSTEM)
320+
/* Remove replaced block from page index before eviction */
321+
if (replaced_value)
322+
page_index_remove(cache, (block_t *) replaced_value);
323+
#endif
293324
replaced->alive = false;
294325
list_del_init(&replaced->list);
295326
cache->size--;
@@ -333,6 +364,13 @@ void *cache_put(cache_t *cache, uint32_t key, void *value)
333364

334365
cache->size++;
335366

367+
#if RV32_HAS(JIT) && RV32_HAS(SYSTEM)
368+
/* Page index for O(1) invalidation - blocks are page-terminated
369+
* and use fallthrough chaining for non-branch block boundaries.
370+
*/
371+
page_index_insert(cache, (block_t *) value);
372+
#endif
373+
336374
cache_ghost_list_update(cache);
337375

338376
assert(cache->size <= cache->capacity);
@@ -342,6 +380,17 @@ void *cache_put(cache_t *cache, uint32_t key, void *value)
342380

343381
void cache_free(cache_t *cache)
344382
{
383+
#if RV32_HAS(JIT) && RV32_HAS(SYSTEM)
384+
/* Free all page index entries */
385+
for (uint32_t i = 0; i < PAGE_INDEX_SIZE; i++) {
386+
page_block_entry_t *entry = cache->page_index[i];
387+
while (entry) {
388+
page_block_entry_t *next = entry->next;
389+
free(entry);
390+
entry = next;
391+
}
392+
}
393+
#endif
345394
free(cache->map.ht_list_head);
346395
free(cache);
347396
}
@@ -436,6 +485,43 @@ void clear_cache_hot(const struct cache *cache, clear_func_t func)
436485
* changes, appropriate locking must be added around cache->list traversal.
437486
*/
438487

488+
/* Hash function for page index using golden ratio multiplicative hash */
489+
HASH_FUNC_IMPL(page_index_hash, PAGE_INDEX_BITS, PAGE_INDEX_SIZE)
490+
491+
/* Insert a block into the page index */
492+
static void page_index_insert(cache_t *cache, block_t *block)
493+
{
494+
uint32_t page = block->pc_start & ~(RV_PG_SIZE - 1);
495+
uint32_t bucket = page_index_hash(page >> RV_PG_SHIFT);
496+
497+
page_block_entry_t *entry = malloc(sizeof(page_block_entry_t));
498+
if (!entry)
499+
return; /* Graceful degradation: O(1) lookup unavailable for this block
500+
*/
501+
502+
entry->block = block;
503+
entry->next = cache->page_index[bucket];
504+
cache->page_index[bucket] = entry;
505+
}
506+
507+
/* Remove a block from the page index */
508+
static void page_index_remove(cache_t *cache, block_t *block)
509+
{
510+
uint32_t page = block->pc_start & ~(RV_PG_SIZE - 1);
511+
uint32_t bucket = page_index_hash(page >> RV_PG_SHIFT);
512+
513+
page_block_entry_t **pp = &cache->page_index[bucket];
514+
while (*pp) {
515+
if ((*pp)->block == block) {
516+
page_block_entry_t *tmp = *pp;
517+
*pp = (*pp)->next;
518+
free(tmp);
519+
return;
520+
}
521+
pp = &(*pp)->next;
522+
}
523+
}
524+
439525
uint32_t cache_invalidate_satp(cache_t *cache, uint32_t satp)
440526
{
441527
if (unlikely(!cache->capacity))
@@ -465,36 +551,25 @@ uint32_t cache_invalidate_va(cache_t *cache, uint32_t va, uint32_t satp)
465551

466552
/* Extract page-aligned VA for the target address */
467553
uint32_t va_page = va & ~(RV_PG_SIZE - 1);
554+
uint32_t bucket = page_index_hash(va_page >> RV_PG_SHIFT);
468555
uint32_t count = 0;
469556

470-
cache_entry_t *entry = NULL;
471-
#ifdef __HAVE_TYPEOF
472-
list_for_each_entry (entry, &cache->list, list)
473-
#else
474-
list_for_each_entry (entry, &cache->list, list, cache_entry_t)
475-
#endif
476-
{
477-
block_t *block = (block_t *) entry->value;
478-
if (!block || block->satp != satp || block->invalidated)
479-
continue;
480-
481-
/* Check if target VA page overlaps with block's address range.
482-
* A block may span multiple pages, so we check if va_page falls
483-
* within [block_start_page, block_end_page].
484-
*
485-
* Note: pc_end is exclusive (address after last instruction), so we
486-
* use (pc_end - 1) to get the page containing the last byte. This
487-
* avoids false invalidation when pc_end falls exactly on a page
488-
* boundary.
489-
*/
490-
uint32_t block_start_page = block->pc_start & ~(RV_PG_SIZE - 1);
491-
uint32_t last_byte = block->pc_end > block->pc_start ? block->pc_end - 1
492-
: block->pc_start;
493-
uint32_t block_end_page = last_byte & ~(RV_PG_SIZE - 1);
494-
if (va_page >= block_start_page && va_page <= block_end_page) {
495-
block->invalidated = true;
496-
count++;
557+
/* O(1) lookup via page index.
558+
* With page-bounded blocks, each block fits entirely within one 4KB page.
559+
* We only need to check the bucket for this specific page.
560+
*/
561+
page_block_entry_t *entry = cache->page_index[bucket];
562+
while (entry) {
563+
block_t *block = (block_t *) entry->block;
564+
if (block && block->satp == satp && !block->invalidated) {
565+
/* Verify block belongs to this page (hash collision check) */
566+
uint32_t block_page = block->pc_start & ~(RV_PG_SIZE - 1);
567+
if (block_page == va_page) {
568+
block->invalidated = true;
569+
count++;
570+
}
497571
}
572+
entry = entry->next;
498573
}
499574
return count;
500575
}

src/cache.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,14 @@ void clear_cache_hot(const struct cache *cache, clear_func_t func);
6868
uint32_t cache_freq(const struct cache *cache, uint32_t key);
6969

7070
#if RV32_HAS(JIT) && RV32_HAS(SYSTEM)
71+
72+
/* Page index for O(1) cache invalidation by virtual address.
73+
* With page-bounded blocks, each block fits entirely within one 4KB page,
74+
* allowing direct lookup by page address instead of O(n) scan.
75+
*/
76+
#define PAGE_INDEX_BITS 10
77+
#define PAGE_INDEX_SIZE (1 << PAGE_INDEX_BITS)
78+
7179
/**
7280
* cache_invalidate_satp - invalidate all blocks matching the given SATP
7381
* @cache: a pointer to target cache
@@ -88,6 +96,7 @@ uint32_t cache_invalidate_satp(struct cache *cache, uint32_t satp);
8896
*
8997
* This is used by SFENCE.VMA with rs1!=0 (address-specific flush) to
9098
* invalidate JIT-compiled blocks in a specific virtual page.
99+
* Uses O(1) page-indexed lookup instead of O(n) scan.
91100
*/
92101
uint32_t cache_invalidate_va(struct cache *cache, uint32_t va, uint32_t satp);
93102
#endif

0 commit comments

Comments
 (0)