Skip to content

Commit 4a90604

Browse files
committed
Fix T2C memory visibility race
The T2C compilation thread could set hot2 flag before compiled code was fully visible to the main thread, causing execution of invalid function pointers. This resulted in incorrect calculation results in the pi test. Problem: - volatile alone doesn't provide memory ordering guarantees - CPU could reorder operations, making hot2=true visible before block->func - Led to executing stale or partially written function pointers Solution: - Use __atomic_store_n with __ATOMIC_RELEASE when setting hot2 - Use __atomic_load_n with __ATOMIC_ACQUIRE when reading hot2 - Release-Acquire synchronization ensures proper ordering This creates a happens-before relationship: when the main thread observes hot2=true via acquire, it is guaranteed to see all writes (including block->func) that happened before the release.
1 parent 21484fb commit 4a90604

File tree

3 files changed

+63
-27
lines changed

3 files changed

+63
-27
lines changed

src/emulate.c

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -278,12 +278,12 @@ static block_t *block_alloc(riscv_t *rv)
278278
#if RV32_HAS(JIT)
279279
block->translatable = true;
280280
block->hot = false;
281-
block->hot2 = false;
281+
__atomic_store_n(&block->hot2, false, __ATOMIC_RELAXED);
282282
block->has_loops = false;
283283
block->n_invoke = 0;
284284
INIT_LIST_HEAD(&block->list);
285285
#if RV32_HAS(T2C)
286-
block->compiled = false;
286+
__atomic_store_n(&block->compiled, false, __ATOMIC_RELAXED);
287287
#endif
288288
#endif
289289
return block;
@@ -918,6 +918,36 @@ static block_t *block_find_or_translate(riscv_t *rv)
918918
return next_blk;
919919
}
920920

921+
#if RV32_HAS(T2C)
922+
/* Never evict blocks with hot2 set - they may be executing in another
923+
* thread
924+
*/
925+
if (__atomic_load_n(&replaced_blk->hot2, __ATOMIC_ACQUIRE)) {
926+
/* This block has compiled T2C code - do not evict it.
927+
* Try to find the block we actually need in the cache.
928+
*/
929+
block_t *found = cache_get(rv->block_cache, rv->PC, false);
930+
pthread_mutex_unlock(&rv->cache_lock);
931+
932+
if (found) {
933+
/* Found the block we need - free the newly allocated one */
934+
for (rv_insn_t *ir = next_blk->ir_head, *next_ir; ir; ir = next_ir) {
935+
next_ir = ir->next;
936+
free(ir->fuse);
937+
mpool_free(rv->block_ir_mp, ir);
938+
}
939+
list_del_init(&next_blk->list);
940+
mpool_free(rv->block_mp, next_blk);
941+
return found;
942+
}
943+
944+
/* If not found and cache is full of hot2 blocks, return the new block
945+
* even though it won't be cached. This prevents deadlock.
946+
*/
947+
return next_blk;
948+
}
949+
#endif
950+
921951
if (prev == replaced_blk)
922952
prev = NULL;
923953

@@ -930,32 +960,25 @@ static block_t *block_find_or_translate(riscv_t *rv)
930960
rv_insn_t *taken = entry->ir_tail->branch_taken,
931961
*untaken = entry->ir_tail->branch_untaken;
932962

933-
if (taken == replaced_blk_entry) {
963+
if (taken == replaced_blk_entry)
934964
entry->ir_tail->branch_taken = NULL;
935-
}
936-
if (untaken == replaced_blk_entry) {
965+
if (untaken == replaced_blk_entry)
937966
entry->ir_tail->branch_untaken = NULL;
938-
}
939967

940968
/* upadte JALR LUT */
941-
if (!entry->ir_tail->branch_table) {
969+
if (!entry->ir_tail->branch_table)
942970
continue;
943-
}
944971

945-
/**
946-
* TODO: upadate all JALR instructions which references to this
947-
* basic block as the destination.
972+
/* TODO: upadate all JALR instructions which references to this basic
973+
* block as the destination.
948974
*/
949975
}
950976

951977
/* free IRs in replaced block */
952-
for (rv_insn_t *ir = replaced_blk->ir_head, *next_ir; ir != NULL;
978+
for (rv_insn_t *ir = replaced_blk->ir_head, *next_ir; ir;
953979
ir = next_ir) {
954980
next_ir = ir->next;
955-
956-
if (ir->fuse)
957-
free(ir->fuse);
958-
981+
free(ir->fuse);
959982
mpool_free(rv->block_ir_mp, ir);
960983
}
961984

@@ -1121,15 +1144,20 @@ void rv_step(void *arg)
11211144
#if RV32_HAS(JIT)
11221145
#if RV32_HAS(T2C)
11231146
/* executed through the tier-2 JIT compiler */
1124-
if (block->hot2) {
1125-
/* hot2 is volatile, ensuring visibility across threads */
1126-
((exec_t2c_func_t) block->func)(rv);
1147+
if (__atomic_load_n(&block->hot2, __ATOMIC_ACQUIRE)) {
1148+
/* Lock to ensure block remains valid during execution */
1149+
pthread_mutex_lock(&rv->cache_lock);
1150+
/* Re-verify the block is still hot2 after acquiring lock */
1151+
if (__atomic_load_n(&block->hot2, __ATOMIC_ACQUIRE))
1152+
((exec_t2c_func_t) block->func)(rv);
1153+
pthread_mutex_unlock(&rv->cache_lock);
11271154
prev = NULL;
11281155
continue;
11291156
} /* check if invoking times of t1 generated code exceed threshold */
1130-
else if (!block->compiled && block->n_invoke >= THRESHOLD) {
1157+
else if (!__atomic_load_n(&block->compiled, __ATOMIC_ACQUIRE) &&
1158+
block->n_invoke >= THRESHOLD) {
11311159
/* Mark block as queued for compilation to avoid re-queueing */
1132-
block->compiled = true;
1160+
__atomic_store_n(&block->compiled, true, __ATOMIC_RELEASE);
11331161
queue_entry_t *entry = malloc(sizeof(queue_entry_t));
11341162
entry->block = block;
11351163
pthread_mutex_lock(&rv->wait_queue_lock);

src/riscv_private.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ typedef struct block {
8585

8686
rv_insn_t *ir_head, *ir_tail; /**< the first and last ir for this block */
8787
#if RV32_HAS(JIT)
88-
bool hot; /**< Determine the block is potential hotspot or not */
89-
volatile bool hot2; /**< Determine the block is strong hotspot or not */
88+
bool hot; /**< Determine the block is potential hotspot or not */
89+
bool hot2; /**< Determine the block is strong hotspot or not */
9090
bool
9191
translatable; /**< Determine the block has RV32AF insturctions or not */
9292
bool has_loops; /**< Determine the block has loop or not */

src/t2c.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -336,18 +336,26 @@ void t2c_compile(riscv_t *rv, block_t *block)
336336
}
337337

338338
/* Return the function pointer of T2C generated machine code */
339-
block->func = (exec_t2c_func_t) LLVMGetPointerToGlobal(engine, start);
339+
exec_t2c_func_t func =
340+
(exec_t2c_func_t) LLVMGetPointerToGlobal(engine, start);
340341

341342
#if RV32_HAS(SYSTEM)
342343
uint64_t key = (uint64_t) block->pc_start | ((uint64_t) block->satp << 32);
343344
#else
344345
uint64_t key = (uint64_t) block->pc_start;
345346
#endif
346347

347-
jit_cache_update(rv->jit_cache, key, block->func);
348+
jit_cache_update(rv->jit_cache, key, func);
348349

349-
/* hot2 is declared volatile, ensuring visibility across threads */
350-
block->hot2 = true;
350+
/* Store function pointer - the release barrier below ensures visibility */
351+
/* Store function pointer first */
352+
block->func = func;
353+
354+
/* Memory barrier ensures func is visible before hot2 */
355+
__atomic_thread_fence(__ATOMIC_RELEASE);
356+
357+
/* Set hot2 flag to indicate compilation is complete */
358+
__atomic_store_n(&block->hot2, true, __ATOMIC_RELEASE);
351359
}
352360

353361
struct jit_cache *jit_cache_init()

0 commit comments

Comments
 (0)