Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,28 @@ endif
ENABLE_ARCH_TEST ?= 0
$(call set-feature, ARCH_TEST)

# ThreadSanitizer support
# TSAN on x86-64 memory layout:
# Shadow: 0x02a000000000 - 0x7cefffffffff (reserved by TSAN)
# App: 0x7cf000000000 - 0x7ffffffff000 (usable by application)
#
# We use MAP_FIXED to allocate FULL4G's 4GB memory at a fixed address
# (0x7d0000000000) within TSAN's app range, ensuring compatibility.
#
# IMPORTANT: TSAN requires ASLR (Address Space Layout Randomization) to be
# disabled to prevent system allocations from landing in TSAN's shadow memory.
# Tests are run with 'setarch $(uname -m) -R' to disable ASLR.
ENABLE_TSAN ?= 0
ifeq ("$(ENABLE_TSAN)", "1")
override ENABLE_SDL := 0 # SDL (uninstrumented system lib) creates threads TSAN cannot track
override ENABLE_LTO := 0 # LTO interferes with TSAN instrumentation
CFLAGS += -DTSAN_ENABLED # Signal code to use TSAN-compatible allocations
# Disable ASLR for TSAN tests to prevent allocations in TSAN shadow memory
BIN_WRAPPER = setarch $(shell uname -m) -R
else
BIN_WRAPPER =
endif

# Enable link-time optimization (LTO)
ENABLE_LTO ?= 1
ifeq ($(call has, LTO), 1)
Expand Down Expand Up @@ -281,6 +303,12 @@ CFLAGS += -fsanitize=undefined -fno-sanitize=alignment -fno-sanitize-recover=all
LDFLAGS += -fsanitize=undefined -fno-sanitize=alignment -fno-sanitize-recover=all
endif

# ThreadSanitizer flags (ENABLE_TSAN is set earlier to override SDL/FULL4G)
ifeq ("$(ENABLE_TSAN)", "1")
CFLAGS += -fsanitize=thread -g
LDFLAGS += -fsanitize=thread
endif

$(OUT)/emulate.o: CFLAGS += -foptimize-sibling-calls -fomit-frame-pointer -fno-stack-check -fno-stack-protector

# .DEFAULT_GOAL should be set to all since the very first target is not all
Expand Down Expand Up @@ -375,7 +403,7 @@ define check-test
$(Q)true; \
$(PRINTF) "Running $(3) ... "; \
OUTPUT_FILE="$$(mktemp)"; \
if (LC_ALL=C $(BIN) $(1) $(2) > "$$OUTPUT_FILE") && \
if (LC_ALL=C $(BIN_WRAPPER) $(BIN) $(1) $(2) > "$$OUTPUT_FILE") && \
[ "$$(cat "$$OUTPUT_FILE" | $(LOG_FILTER) | $(4))" = "$(5)" ]; then \
$(call notice, [OK]); \
else \
Expand Down
44 changes: 35 additions & 9 deletions src/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ extern struct target_ops gdbstub_ops;
#endif

#include "decode.h"
#include "log.h"
#include "mpool.h"
#include "riscv.h"
#include "riscv_private.h"
Expand Down Expand Up @@ -283,6 +284,7 @@ static block_t *block_alloc(riscv_t *rv)
block->hot2 = false;
block->has_loops = false;
block->n_invoke = 0;
block->func = NULL;
INIT_LIST_HEAD(&block->list);
#if RV32_HAS(T2C)
block->compiled = false;
Expand Down Expand Up @@ -1151,22 +1153,32 @@ void rv_step(void *arg)
#if RV32_HAS(JIT)
#if RV32_HAS(T2C)
/* executed through the tier-2 JIT compiler */
if (block->hot2) {
/* Use acquire semantics to ensure we see func write before using it */
if (__atomic_load_n(&block->hot2, __ATOMIC_ACQUIRE)) {
((exec_t2c_func_t) block->func)(rv);
prev = NULL;
continue;
} /* check if invoking times of t1 generated code exceed threshold */
else if (!block->compiled && block->n_invoke >= THRESHOLD) {
block->compiled = true;
else if (!__atomic_load_n(&block->compiled, __ATOMIC_RELAXED) &&
__atomic_load_n(&block->n_invoke, __ATOMIC_RELAXED) >=
THRESHOLD) {
__atomic_store_n(&block->compiled, true, __ATOMIC_RELAXED);
queue_entry_t *entry = malloc(sizeof(queue_entry_t));
if (unlikely(!entry)) {
/* Malloc failed - reset compiled flag to allow retry later */
block->compiled = false;
__atomic_store_n(&block->compiled, false, __ATOMIC_RELAXED);
continue;
}
entry->block = block;
/* Store cache key instead of pointer to prevent use-after-free */
#if RV32_HAS(SYSTEM)
entry->key =
(uint64_t) block->pc_start | ((uint64_t) block->satp << 32);
#else
entry->key = (uint64_t) block->pc_start;
#endif
pthread_mutex_lock(&rv->wait_queue_lock);
list_add(&entry->list, &rv->wait_queue);
pthread_cond_signal(&rv->wait_queue_cond);
pthread_mutex_unlock(&rv->wait_queue_lock);
}
#endif
Expand All @@ -1178,7 +1190,11 @@ void rv_step(void *arg)
* entry in compiled binary buffer.
*/
if (block->hot) {
#if RV32_HAS(T2C)
__atomic_fetch_add(&block->n_invoke, 1, __ATOMIC_RELAXED);
#else
block->n_invoke++;
#endif
((exec_block_func_t) state->buf)(
rv, (uintptr_t) (state->buf + block->offset));
prev = NULL;
Expand All @@ -1190,10 +1206,20 @@ void rv_step(void *arg)
#endif
) {
jit_translate(rv, block);
((exec_block_func_t) state->buf)(
rv, (uintptr_t) (state->buf + block->offset));
prev = NULL;
continue;
/* Only execute if translation succeeded (block is hot) */
if (block->hot) {
rv_log_debug("JIT: Executing block pc=0x%08x, offset=%u",
block->pc_start, block->offset);
((exec_block_func_t) state->buf)(
rv, (uintptr_t) (state->buf + block->offset));
prev = NULL;
continue;
}
/* Fall through to interpreter if translation failed */
rv_log_debug(
"JIT: Translation failed for block pc=0x%08x, using "
"interpreter",
block->pc_start);
}
set_reset(&pc_set);
has_loops = false;
Expand Down
35 changes: 35 additions & 0 deletions src/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,47 @@ memory_t *memory_new(uint32_t size)
return NULL;
assert(mem);
#if HAVE_MMAP
#if defined(TSAN_ENABLED)
/* ThreadSanitizer compatibility: Use MAP_FIXED to allocate at a specific
* address to avoid conflicts with TSAN's shadow memory.
*/
#if defined(__x86_64__)
/* x86_64: Allocate within TSAN's range (0x7cf000000000 - 0x7ffffffff000).
*
* Fixed address: 0x7d0000000000
* Size: up to 4GB (0x100000000)
* End: 0x7d0100000000 (well within app range)
*/
void *fixed_addr = (void *) 0x7d0000000000UL;
#elif defined(__aarch64__)
/* ARM64 (macOS/Apple Silicon): Use higher address range.
*
* Fixed address: 0x150000000000 (21TB)
* Size: up to 4GB (0x100000000)
* End: 0x150100000000
*
* This avoids TSAN's shadow memory and typical process allocations.
* Requires ASLR disabled via: setarch $(uname -m) -R
*/
void *fixed_addr = (void *) 0x150000000000UL;
#else
#error "TSAN is only supported on x86_64 and aarch64"
#endif
data_memory_base = mmap(fixed_addr, size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
if (data_memory_base == MAP_FAILED) {
free(mem);
return NULL;
}
#else
/* Standard allocation without TSAN */
data_memory_base = mmap(NULL, size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (data_memory_base == MAP_FAILED) {
free(mem);
return NULL;
}
#endif
#else
data_memory_base = malloc(size);
if (!data_memory_base) {
Expand Down
77 changes: 75 additions & 2 deletions src/jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "decode.h"
#include "io.h"
#include "jit.h"
#include "log.h"
#include "riscv.h"
#include "riscv_private.h"
#include "utils.h"
Expand Down Expand Up @@ -593,24 +594,30 @@ static void update_branch_imm(struct jit_state *state,
assert((imm & 3) == 0);
uint32_t insn;
imm >>= 2;
rv_log_debug("JIT: Patching branch at offset=%u, imm=%d", offset, imm * 4);
/* Read instruction while in execute mode (MAP_JIT requirement) */
memcpy(&insn, state->buf + offset, sizeof(uint32_t));
if ((insn & 0xfe000000U) == 0x54000000U /* Conditional branch immediate. */
|| (insn & 0x7e000000U) ==
0x34000000U) { /* Compare and branch immediate. */
assert((imm >> 19) == INT64_C(-1) || (imm >> 19) == 0);
insn &= ~(0x7ffffU << 5); /* Clear old offset bits */
insn |= (imm & 0x7ffff) << 5;
} else if ((insn & 0x7c000000U) == 0x14000000U) {
/* Unconditional branch immediate. */
assert((imm >> 26) == INT64_C(-1) || (imm >> 26) == 0);
insn &= ~0x03ffffffU; /* Clear old offset bits */
insn |= (imm & 0x03ffffffU) << 0;
} else {
assert(false);
insn = BAD_OPCODE;
}
#if defined(__APPLE__) && defined(__aarch64__)
/* Switch to write mode only for writing */
pthread_jit_write_protect_np(false);
#endif
memcpy(state->buf + offset, &insn, sizeof(uint32_t));
sys_icache_invalidate(state->buf + offset, sizeof(uint32_t));
#if defined(__APPLE__) && defined(__aarch64__)
pthread_jit_write_protect_np(true);
#endif
Expand Down Expand Up @@ -2164,9 +2171,12 @@ void clear_hot(block_t *block)

static void code_cache_flush(struct jit_state *state, riscv_t *rv)
{
rv_log_info("JIT: Flushing code cache (n_blocks=%d, n_jumps=%d, offset=%u)",
state->n_blocks, state->n_jumps, state->offset);
should_flush = false;
state->offset = state->org_size;
state->n_blocks = 0;
state->n_jumps = 0; /* Reset jump count when flushing */
set_reset(&state->set);
clear_cache_hot(rv->block_cache, (clear_func_t) clear_hot);
#if RV32_HAS(T2C)
Expand Down Expand Up @@ -2196,6 +2206,7 @@ static void translate(struct jit_state *state, riscv_t *rv, block_t *block)

static void resolve_jumps(struct jit_state *state)
{
rv_log_debug("JIT: Resolving %d jumps", state->n_jumps);
for (int i = 0; i < state->n_jumps; i++) {
struct jump jump = state->jumps[i];
int target_loc;
Expand All @@ -2218,6 +2229,10 @@ static void resolve_jumps(struct jit_state *state)
(if (jump.target_satp == state->offset_map[i].satp), )
{
target_loc = state->offset_map[i].offset;
rv_log_debug(
"JIT: Jump %d resolved to block pc=0x%08x, "
"offset=%d",
i, jump.target_pc, target_loc);
break;
}
}
Expand All @@ -2229,6 +2244,7 @@ static void resolve_jumps(struct jit_state *state)

uint8_t *offset_ptr = &state->buf[jump.offset_loc];
memcpy(offset_ptr, &rel, sizeof(uint32_t));
sys_icache_invalidate(offset_ptr, sizeof(uint32_t));
#elif defined(__aarch64__)
int32_t rel = target_loc - jump.offset_loc;
update_branch_imm(state, jump.offset_loc, rel);
Expand Down Expand Up @@ -2308,23 +2324,35 @@ void jit_translate(riscv_t *rv, block_t *block)
) {
block->offset = state->offset_map[i].offset;
block->hot = true;
rv_log_debug("JIT: Cache hit for block pc=0x%08x, offset=%u",
block->pc_start, block->offset);
return;
}
}
assert(NULL);
__UNREACHABLE;
}
rv_log_debug("JIT: Starting translation for block pc=0x%08x",
block->pc_start);
restart:
memset(state->jumps, 0, MAX_JUMPS * sizeof(struct jump));
state->n_jumps = 0;
block->offset = state->offset;
translate_chained_block(state, rv, block);
if (unlikely(should_flush)) {
/* Mark block as not translated since translation was incomplete */
block->hot = false;
/* Don't reset offset - it will be set correctly on restart */
rv_log_debug("JIT: Translation triggered flush for block pc=0x%08x",
block->pc_start);
code_cache_flush(state, rv);
goto restart;
}
resolve_jumps(state);
block->hot = true;
rv_log_debug(
"JIT: Translation completed for block pc=0x%08x, offset=%u, size=%u",
block->pc_start, block->offset, state->offset - block->offset);
}

struct jit_state *jit_state_init(size_t size)
Expand All @@ -2336,6 +2364,52 @@ struct jit_state *jit_state_init(size_t size)

state->offset = 0;
state->size = size;
#if defined(TSAN_ENABLED)
/* ThreadSanitizer compatibility: Allocate JIT code buffer at a fixed
* address above the main memory region to avoid conflicts.
*/
#if defined(__x86_64__)
/* x86_64 memory layout:
* Main memory: 0x7d0000000000 - 0x7d0100000000 (4GB for FULL4G)
* JIT buffer: 0x7d1000000000 + size
*
* This keeps both allocations in TSAN's app range (0x7cf000000000 -
* 0x7ffffffff000) and prevents overlap with main memory or TSAN shadow.
*/
void *jit_addr = (void *) 0x7d1000000000UL;
state->buf = mmap(jit_addr, size, PROT_READ | PROT_WRITE | PROT_EXEC,
Copy link

@cubic-dev-ai cubic-dev-ai bot Oct 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When TSAN is enabled on macOS x86_64, this mmap call loses the MAP_JIT flag that the standard path uses, so hardened macOS failures return MAP_FAILED and the JIT never initializes. Please keep MAP_JIT on macOS even in the TSAN path.

Prompt for AI agents
Address the following comment on src/jit.c at line 2350:

<comment>When TSAN is enabled on macOS x86_64, this mmap call loses the MAP_JIT flag that the standard path uses, so hardened macOS failures return MAP_FAILED and the JIT never initializes. Please keep MAP_JIT on macOS even in the TSAN path.</comment>

<file context>
@@ -2336,6 +2336,25 @@ struct jit_state *jit_state_init(size_t size)
+     * 0x7ffffffff000) and prevents overlap with main memory or TSAN shadow.
+     */
+    void *jit_addr = (void *) 0x7d1000000000UL;
+    state-&gt;buf = mmap(jit_addr, size, PROT_READ | PROT_WRITE | PROT_EXEC,
+                      MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+    if (state-&gt;buf == MAP_FAILED) {
</file context>

✅ Addressed in f915bc2

MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED
#if defined(__APPLE__)
| MAP_JIT
#endif
,
-1, 0);
#elif defined(__aarch64__)
/* ARM64 memory layout (macOS/Apple Silicon):
* Main memory: 0x150000000000 - 0x150100000000 (4GB for FULL4G)
* JIT buffer: 0x151000000000 + size
*
* Apple Silicon requires MAP_JIT for executable memory. The fixed
* address is chosen to avoid TSAN's shadow memory and typical process
* allocations. Requires ASLR disabled via: setarch $(uname -m) -R
*/
void *jit_addr = (void *) 0x151000000000UL;
state->buf = mmap(jit_addr, size, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED
#if defined(__APPLE__)
| MAP_JIT
#endif
,
-1, 0);
#else
#error "TSAN is only supported on x86_64 and aarch64"
#endif
if (state->buf == MAP_FAILED) {
free(state);
return NULL;
}
#else
/* Standard allocation without TSAN */
state->buf = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS
#if defined(__APPLE__)
Expand All @@ -2347,8 +2421,7 @@ struct jit_state *jit_state_init(size_t size)
free(state);
return NULL;
}
assert(state->buf != MAP_FAILED);

#endif
state->n_blocks = 0;
set_reset(&state->set);
reset_reg();
Expand Down
Loading
Loading