Skip to content

Commit 0286257

Browse files
committed
Attempt to fix JIT icache coherency on Arm64
The JIT compiler was experiencing intermittent failures on Arm64/Apple Silicon due to missing instruction cache invalidation after patching branch instructions. When update_branch_imm() modified branch targets in JIT-compiled code, the CPU's icache wasn't being invalidated, causing it to execute stale cached instructions instead of the newly patched ones. This manifested as non-deterministic test failures, particularly in compute-intensive benchmarks like the pi calculation test, with failure rates around 40%. The fix adds sys_icache_invalidate() after memcpy() in update_branch_imm to ensure the icache is synchronized with the data cache after code modification. This is critical on Arm64 architectures which have separate L1 instruction and data caches.
1 parent 4b61b26 commit 0286257

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

src/jit.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,7 @@ static void update_branch_imm(struct jit_state *state,
611611
pthread_jit_write_protect_np(false);
612612
#endif
613613
memcpy(state->buf + offset, &insn, sizeof(uint32_t));
614+
sys_icache_invalidate(state->buf + offset, sizeof(uint32_t));
614615
#if defined(__APPLE__) && defined(__aarch64__)
615616
pthread_jit_write_protect_np(true);
616617
#endif
@@ -2229,6 +2230,7 @@ static void resolve_jumps(struct jit_state *state)
22292230

22302231
uint8_t *offset_ptr = &state->buf[jump.offset_loc];
22312232
memcpy(offset_ptr, &rel, sizeof(uint32_t));
2233+
sys_icache_invalidate(offset_ptr, sizeof(uint32_t));
22322234
#elif defined(__aarch64__)
22332235
int32_t rel = target_loc - jump.offset_loc;
22342236
update_branch_imm(state, jump.offset_loc, rel);
@@ -2318,12 +2320,16 @@ void jit_translate(riscv_t *rv, block_t *block)
23182320
memset(state->jumps, 0, MAX_JUMPS * sizeof(struct jump));
23192321
state->n_jumps = 0;
23202322
block->offset = state->offset;
2323+
uint32_t translation_start = state->offset;
23212324
translate_chained_block(state, rv, block);
23222325
if (unlikely(should_flush)) {
23232326
code_cache_flush(state, rv);
23242327
goto restart;
23252328
}
23262329
resolve_jumps(state);
2330+
/* Ensure all instruction cache is synchronized after translation */
2331+
sys_icache_invalidate(state->buf + translation_start,
2332+
state->offset - translation_start);
23272333
block->hot = true;
23282334
}
23292335

0 commit comments

Comments
 (0)