Skip to content

Commit 49e4d21

Browse files
authored
ZJIT: Share function stub code using a trampoline (ruby#14225)
1 parent 149add8 commit 49e4d21

File tree

3 files changed

+99
-69
lines changed

3 files changed

+99
-69
lines changed

zjit/src/backend/lir.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ pub use crate::backend::current::{
1515
NATIVE_STACK_PTR, NATIVE_BASE_PTR,
1616
C_ARG_OPNDS, C_RET_REG, C_RET_OPND,
1717
};
18+
pub const SCRATCH_OPND: Opnd = Opnd::Reg(Assembler::SCRATCH_REG);
1819

1920
pub static JIT_PRESERVED_REGS: &'static [Opnd] = &[CFP, SP, EC];
2021

zjit/src/codegen.rs

Lines changed: 82 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::gc::{append_gc_offsets, get_or_create_iseq_payload, get_or_create_ise
99
use crate::state::ZJITState;
1010
use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::compile_time_ns};
1111
use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr};
12-
use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP};
12+
use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SCRATCH_OPND, SP};
1313
use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SpecialBackrefSymbol, SELF_PARAM_IDX};
1414
use crate::hir::{Const, FrameState, Function, Insn, InsnId};
1515
use crate::hir_type::{types, Type};
@@ -26,8 +26,8 @@ struct JITState {
2626
/// Labels for each basic block indexed by the BlockId
2727
labels: Vec<Option<Target>>,
2828

29-
/// Branches to an ISEQ that need to be compiled later
30-
branch_iseqs: Vec<(Rc<Branch>, IseqPtr)>,
29+
/// ISEQ calls that need to be compiled later
30+
iseq_calls: Vec<Rc<IseqCall>>,
3131

3232
/// The number of bytes allocated for basic block arguments spilled onto the C stack
3333
c_stack_slots: usize,
@@ -40,7 +40,7 @@ impl JITState {
4040
iseq,
4141
opnds: vec![None; num_insns],
4242
labels: vec![None; num_blocks],
43-
branch_iseqs: Vec::default(),
43+
iseq_calls: Vec::default(),
4444
c_stack_slots,
4545
}
4646
}
@@ -130,8 +130,8 @@ fn gen_iseq_entry_point_body(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<CodePt
130130
};
131131

132132
// Stub callee ISEQs for JIT-to-JIT calls
133-
for (branch, callee_iseq) in jit.branch_iseqs.into_iter() {
134-
gen_iseq_branch(cb, callee_iseq, iseq, branch)?;
133+
for iseq_call in jit.iseq_calls.into_iter() {
134+
gen_iseq_call(cb, iseq, iseq_call)?;
135135
}
136136

137137
// Remember the block address to reuse it later
@@ -144,20 +144,19 @@ fn gen_iseq_entry_point_body(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<CodePt
144144
}
145145

146146
/// Stub a branch for a JIT-to-JIT call
147-
fn gen_iseq_branch(cb: &mut CodeBlock, iseq: IseqPtr, caller_iseq: IseqPtr, branch: Rc<Branch>) -> Option<()> {
147+
fn gen_iseq_call(cb: &mut CodeBlock, caller_iseq: IseqPtr, iseq_call: Rc<IseqCall>) -> Option<()> {
148148
// Compile a function stub
149-
let Some((stub_ptr, gc_offsets)) = gen_function_stub(cb, iseq, branch.clone()) else {
149+
let Some(stub_ptr) = gen_function_stub(cb, iseq_call.clone()) else {
150150
// Failed to compile the stub. Bail out of compiling the caller ISEQ.
151151
debug!("Failed to compile iseq: could not compile stub: {} -> {}",
152-
iseq_get_location(caller_iseq, 0), iseq_get_location(iseq, 0));
152+
iseq_get_location(caller_iseq, 0), iseq_get_location(iseq_call.iseq, 0));
153153
return None;
154154
};
155-
append_gc_offsets(iseq, &gc_offsets);
156155

157156
// Update the JIT-to-JIT call to call the stub
158157
let stub_addr = stub_ptr.raw_ptr(cb);
159-
branch.regenerate(cb, |asm| {
160-
asm_comment!(asm, "call function stub: {}", iseq_get_location(iseq, 0));
158+
iseq_call.regenerate(cb, |asm| {
159+
asm_comment!(asm, "call function stub: {}", iseq_get_location(iseq_call.iseq, 0));
161160
asm.ccall(stub_addr, vec![]);
162161
});
163162
Some(())
@@ -210,7 +209,7 @@ fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function, function_pt
210209
}
211210

212211
/// Compile an ISEQ into machine code
213-
fn gen_iseq(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<(CodePtr, Vec<(Rc<Branch>, IseqPtr)>)> {
212+
fn gen_iseq(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<(CodePtr, Vec<Rc<IseqCall>>)> {
214213
// Return an existing pointer if it's already compiled
215214
let payload = get_or_create_iseq_payload(iseq);
216215
match payload.status {
@@ -233,7 +232,7 @@ fn gen_iseq(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<(CodePtr, Vec<(Rc<Branc
233232
if let Some((start_ptr, gc_offsets, jit)) = result {
234233
payload.status = IseqStatus::Compiled(start_ptr);
235234
append_gc_offsets(iseq, &gc_offsets);
236-
Some((start_ptr, jit.branch_iseqs))
235+
Some((start_ptr, jit.iseq_calls))
237236
} else {
238237
payload.status = IseqStatus::CantCompile;
239238
None
@@ -928,20 +927,20 @@ fn gen_send_without_block_direct(
928927
c_args.extend(args);
929928

930929
// Make a method call. The target address will be rewritten once compiled.
931-
let branch = Branch::new();
930+
let iseq_call = IseqCall::new(iseq);
932931
let dummy_ptr = cb.get_write_ptr().raw_ptr(cb);
933-
jit.branch_iseqs.push((branch.clone(), iseq));
932+
jit.iseq_calls.push(iseq_call.clone());
934933
// TODO(max): Add a PatchPoint here that can side-exit the function if the callee messed with
935934
// the frame's locals
936-
let ret = asm.ccall_with_branch(dummy_ptr, c_args, &branch);
935+
let ret = asm.ccall_with_iseq_call(dummy_ptr, c_args, &iseq_call);
937936

938937
// If a callee side-exits, i.e. returns Qundef, propagate the return value to the caller.
939938
// The caller will side-exit the callee into the interpreter.
940939
// TODO: Let side exit code pop all JIT frames to optimize away this cmp + je.
941940
asm_comment!(asm, "side-exit if callee side-exits");
942941
asm.cmp(ret, Qundef.into());
943942
// Restore the C stack pointer on exit
944-
asm.je(ZJITState::get_exit_code().into());
943+
asm.je(ZJITState::get_exit_trampoline().into());
945944

946945
asm_comment!(asm, "restore SP register for the caller");
947946
let new_sp = asm.sub(SP, sp_offset.into());
@@ -1428,32 +1427,36 @@ c_callable! {
14281427
/// This function is expected to be called repeatedly when ZJIT fails to compile the stub.
14291428
/// We should be able to compile most (if not all) function stubs by side-exiting at unsupported
14301429
/// instructions, so this should be used primarily for cb.has_dropped_bytes() situations.
1431-
fn function_stub_hit(iseq: IseqPtr, branch_ptr: *const c_void, ec: EcPtr, sp: *mut VALUE) -> *const u8 {
1430+
fn function_stub_hit(iseq_call_ptr: *const c_void, ec: EcPtr, sp: *mut VALUE) -> *const u8 {
14321431
with_vm_lock(src_loc!(), || {
14331432
// gen_push_frame() doesn't set PC and SP, so we need to set them before exit.
14341433
// function_stub_hit_body() may allocate and call gc_validate_pc(), so we always set PC.
1434+
let iseq_call = unsafe { Rc::from_raw(iseq_call_ptr as *const IseqCall) };
14351435
let cfp = unsafe { get_ec_cfp(ec) };
1436-
let pc = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; // TODO: handle opt_pc once supported
1436+
let pc = unsafe { rb_iseq_pc_at_idx(iseq_call.iseq, 0) }; // TODO: handle opt_pc once supported
14371437
unsafe { rb_set_cfp_pc(cfp, pc) };
14381438
unsafe { rb_set_cfp_sp(cfp, sp) };
14391439

14401440
// If we already know we can't compile the ISEQ, fail early without cb.mark_all_executable().
14411441
// TODO: Alan thinks the payload status part of this check can happen without the VM lock, since the whole
14421442
// code path can be made read-only. But you still need the check as is while holding the VM lock in any case.
14431443
let cb = ZJITState::get_code_block();
1444-
let payload = get_or_create_iseq_payload(iseq);
1444+
let payload = get_or_create_iseq_payload(iseq_call.iseq);
14451445
if cb.has_dropped_bytes() || payload.status == IseqStatus::CantCompile {
1446+
// We'll use this Rc again, so increment the ref count decremented by from_raw.
1447+
unsafe { Rc::increment_strong_count(iseq_call_ptr as *const IseqCall); }
1448+
14461449
// Exit to the interpreter
1447-
return ZJITState::get_exit_code().raw_ptr(cb);
1450+
return ZJITState::get_exit_trampoline().raw_ptr(cb);
14481451
}
14491452

14501453
// Otherwise, attempt to compile the ISEQ. We have to mark_all_executable() beyond this point.
1451-
let code_ptr = with_time_stat(compile_time_ns, || function_stub_hit_body(cb, iseq, branch_ptr));
1454+
let code_ptr = with_time_stat(compile_time_ns, || function_stub_hit_body(cb, &iseq_call));
14521455
let code_ptr = if let Some(code_ptr) = code_ptr {
14531456
code_ptr
14541457
} else {
14551458
// Exit to the interpreter
1456-
ZJITState::get_exit_code()
1459+
ZJITState::get_exit_trampoline()
14571460
};
14581461
cb.mark_all_executable();
14591462
code_ptr.raw_ptr(cb)
@@ -1462,34 +1465,48 @@ c_callable! {
14621465
}
14631466

14641467
/// Compile an ISEQ for a function stub
1465-
fn function_stub_hit_body(cb: &mut CodeBlock, iseq: IseqPtr, branch_ptr: *const c_void) -> Option<CodePtr> {
1468+
fn function_stub_hit_body(cb: &mut CodeBlock, iseq_call: &Rc<IseqCall>) -> Option<CodePtr> {
14661469
// Compile the stubbed ISEQ
1467-
let Some((code_ptr, branch_iseqs)) = gen_iseq(cb, iseq) else {
1468-
debug!("Failed to compile iseq: gen_iseq failed: {}", iseq_get_location(iseq, 0));
1470+
let Some((code_ptr, iseq_calls)) = gen_iseq(cb, iseq_call.iseq) else {
1471+
debug!("Failed to compile iseq: gen_iseq failed: {}", iseq_get_location(iseq_call.iseq, 0));
14691472
return None;
14701473
};
14711474

14721475
// Stub callee ISEQs for JIT-to-JIT calls
1473-
for (branch, callee_iseq) in branch_iseqs.into_iter() {
1474-
gen_iseq_branch(cb, callee_iseq, iseq, branch)?;
1476+
for callee_iseq_call in iseq_calls.into_iter() {
1477+
gen_iseq_call(cb, iseq_call.iseq, callee_iseq_call)?;
14751478
}
14761479

14771480
// Update the stub to call the code pointer
1478-
let branch = unsafe { Rc::from_raw(branch_ptr as *const Branch) };
14791481
let code_addr = code_ptr.raw_ptr(cb);
1480-
branch.regenerate(cb, |asm| {
1481-
asm_comment!(asm, "call compiled function: {}", iseq_get_location(iseq, 0));
1482+
iseq_call.regenerate(cb, |asm| {
1483+
asm_comment!(asm, "call compiled function: {}", iseq_get_location(iseq_call.iseq, 0));
14821484
asm.ccall(code_addr, vec![]);
14831485
});
14841486

14851487
Some(code_ptr)
14861488
}
14871489

14881490
/// Compile a stub for an ISEQ called by SendWithoutBlockDirect
1489-
/// TODO: Consider creating a trampoline to share some of the code among function stubs
1490-
fn gen_function_stub(cb: &mut CodeBlock, iseq: IseqPtr, branch: Rc<Branch>) -> Option<(CodePtr, Vec<CodePtr>)> {
1491+
fn gen_function_stub(cb: &mut CodeBlock, iseq_call: Rc<IseqCall>) -> Option<CodePtr> {
1492+
let mut asm = Assembler::new();
1493+
asm_comment!(asm, "Stub: {}", iseq_get_location(iseq_call.iseq, 0));
1494+
1495+
// Call function_stub_hit using the shared trampoline. See `gen_function_stub_hit_trampoline`.
1496+
// Use load_into instead of mov, which is split on arm64, to avoid clobbering ALLOC_REGS.
1497+
asm.load_into(SCRATCH_OPND, Opnd::const_ptr(Rc::into_raw(iseq_call).into()));
1498+
asm.jmp(ZJITState::get_function_stub_hit_trampoline().into());
1499+
1500+
asm.compile(cb).map(|(code_ptr, gc_offsets)| {
1501+
assert_eq!(gc_offsets.len(), 0);
1502+
code_ptr
1503+
})
1504+
}
1505+
1506+
/// Generate a trampoline that is used when a
1507+
pub fn gen_function_stub_hit_trampoline(cb: &mut CodeBlock) -> Option<CodePtr> {
14911508
let mut asm = Assembler::new();
1492-
asm_comment!(asm, "Stub: {}", iseq_get_location(iseq, 0));
1509+
asm_comment!(asm, "function_stub_hit trampoline");
14931510

14941511
// Maintain alignment for x86_64, and set up a frame for arm64 properly
14951512
asm.frame_setup(&[], 0);
@@ -1501,14 +1518,8 @@ fn gen_function_stub(cb: &mut CodeBlock, iseq: IseqPtr, branch: Rc<Branch>) -> O
15011518
const { assert!(ALLOC_REGS.len() % 2 == 0, "x86_64 would need to push one more if we push an odd number of regs"); }
15021519

15031520
// Compile the stubbed ISEQ
1504-
let branch_addr = Rc::into_raw(branch);
1505-
let jump_addr = asm_ccall!(asm, function_stub_hit,
1506-
Opnd::Value(iseq.into()),
1507-
Opnd::const_ptr(branch_addr as *const u8),
1508-
EC,
1509-
SP
1510-
);
1511-
asm.mov(Opnd::Reg(Assembler::SCRATCH_REG), jump_addr);
1521+
let jump_addr = asm_ccall!(asm, function_stub_hit, SCRATCH_OPND, EC, SP);
1522+
asm.mov(SCRATCH_OPND, jump_addr);
15121523

15131524
asm_comment!(asm, "restore argument registers");
15141525
for &reg in ALLOC_REGS.iter().rev() {
@@ -1518,16 +1529,20 @@ fn gen_function_stub(cb: &mut CodeBlock, iseq: IseqPtr, branch: Rc<Branch>) -> O
15181529
// Discard the current frame since the JIT function will set it up again
15191530
asm.frame_teardown(&[]);
15201531

1521-
// Jump to SCRATCH_REG so that cpop_all() doesn't clobber it
1522-
asm.jmp_opnd(Opnd::Reg(Assembler::SCRATCH_REG));
1523-
asm.compile(cb)
1532+
// Jump to SCRATCH_OPND so that cpop_into() doesn't clobber it
1533+
asm.jmp_opnd(SCRATCH_OPND);
1534+
1535+
asm.compile(cb).map(|(code_ptr, gc_offsets)| {
1536+
assert_eq!(gc_offsets.len(), 0);
1537+
code_ptr
1538+
})
15241539
}
15251540

15261541
/// Generate a trampoline that is used when a function exits without restoring PC and the stack
1527-
pub fn gen_exit(cb: &mut CodeBlock) -> Option<CodePtr> {
1542+
pub fn gen_exit_trampoline(cb: &mut CodeBlock) -> Option<CodePtr> {
15281543
let mut asm = Assembler::new();
15291544

1530-
asm_comment!(asm, "exit from function stub");
1545+
asm_comment!(asm, "side-exit trampoline");
15311546
asm.frame_teardown(&[]); // matching the setup in :bb0-prologue:
15321547
asm.cret(Qundef.into());
15331548

@@ -1588,45 +1603,49 @@ fn aligned_stack_bytes(num_slots: usize) -> usize {
15881603
}
15891604

15901605
impl Assembler {
1591-
/// Make a C call while marking the start and end positions of it
1592-
fn ccall_with_branch(&mut self, fptr: *const u8, opnds: Vec<Opnd>, branch: &Rc<Branch>) -> Opnd {
1606+
/// Make a C call while marking the start and end positions for IseqCall
1607+
fn ccall_with_iseq_call(&mut self, fptr: *const u8, opnds: Vec<Opnd>, iseq_call: &Rc<IseqCall>) -> Opnd {
15931608
// We need to create our own branch rc objects so that we can move the closure below
1594-
let start_branch = branch.clone();
1595-
let end_branch = branch.clone();
1609+
let start_iseq_call = iseq_call.clone();
1610+
let end_iseq_call = iseq_call.clone();
15961611

15971612
self.ccall_with_pos_markers(
15981613
fptr,
15991614
opnds,
16001615
move |code_ptr, _| {
1601-
start_branch.start_addr.set(Some(code_ptr));
1616+
start_iseq_call.start_addr.set(Some(code_ptr));
16021617
},
16031618
move |code_ptr, _| {
1604-
end_branch.end_addr.set(Some(code_ptr));
1619+
end_iseq_call.end_addr.set(Some(code_ptr));
16051620
},
16061621
)
16071622
}
16081623
}
16091624

1610-
/// Store info about an outgoing branch in a code segment
1625+
/// Store info about a JIT-to-JIT call
16111626
#[derive(Debug)]
1612-
struct Branch {
1613-
/// Position where the generated code starts
1627+
struct IseqCall {
1628+
/// Callee ISEQ that start_addr jumps to
1629+
iseq: IseqPtr,
1630+
1631+
/// Position where the call instruction starts
16141632
start_addr: Cell<Option<CodePtr>>,
16151633

1616-
/// Position where the generated code ends (exclusive)
1634+
/// Position where the call instruction ends (exclusive)
16171635
end_addr: Cell<Option<CodePtr>>,
16181636
}
16191637

1620-
impl Branch {
1621-
/// Allocate a new branch
1622-
fn new() -> Rc<Self> {
1623-
Rc::new(Branch {
1638+
impl IseqCall {
1639+
/// Allocate a new IseqCall
1640+
fn new(iseq: IseqPtr) -> Rc<Self> {
1641+
Rc::new(IseqCall {
1642+
iseq,
16241643
start_addr: Cell::new(None),
16251644
end_addr: Cell::new(None),
16261645
})
16271646
}
16281647

1629-
/// Regenerate a branch with a given callback
1648+
/// Regenerate a IseqCall with a given callback
16301649
fn regenerate(&self, cb: &mut CodeBlock, callback: impl Fn(&mut Assembler)) {
16311650
cb.with_write_ptr(self.start_addr.get().unwrap(), |cb| {
16321651
let mut asm = Assembler::new();

zjit/src/state.rs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::codegen::gen_exit;
1+
use crate::codegen::{gen_exit_trampoline, gen_function_stub_hit_trampoline};
22
use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insns_count, EcPtr, Qnil, VALUE};
33
use crate::cruby_methods;
44
use crate::invariants::Invariants;
@@ -34,7 +34,10 @@ pub struct ZJITState {
3434
method_annotations: cruby_methods::Annotations,
3535

3636
/// Trampoline to side-exit without restoring PC or the stack
37-
exit_code: CodePtr,
37+
exit_trampoline: CodePtr,
38+
39+
/// Trampoline to call function_stub_hit
40+
function_stub_hit_trampoline: CodePtr,
3841
}
3942

4043
/// Private singleton instance of the codegen globals
@@ -83,7 +86,8 @@ impl ZJITState {
8386
#[cfg(test)]
8487
let mut cb = CodeBlock::new_dummy();
8588

86-
let exit_code = gen_exit(&mut cb).unwrap();
89+
let exit_trampoline = gen_exit_trampoline(&mut cb).unwrap();
90+
let function_stub_hit_trampoline = gen_function_stub_hit_trampoline(&mut cb).unwrap();
8791

8892
// Initialize the codegen globals instance
8993
let zjit_state = ZJITState {
@@ -92,7 +96,8 @@ impl ZJITState {
9296
invariants: Invariants::default(),
9397
assert_compiles: false,
9498
method_annotations: cruby_methods::init(),
95-
exit_code,
99+
exit_trampoline,
100+
function_stub_hit_trampoline,
96101
};
97102
unsafe { ZJIT_STATE = Some(zjit_state); }
98103
}
@@ -170,8 +175,13 @@ impl ZJITState {
170175
}
171176

172177
/// Return a code pointer to the side-exit trampoline
173-
pub fn get_exit_code() -> CodePtr {
174-
ZJITState::get_instance().exit_code
178+
pub fn get_exit_trampoline() -> CodePtr {
179+
ZJITState::get_instance().exit_trampoline
180+
}
181+
182+
/// Return a code pointer to the function stub hit trampoline
183+
pub fn get_function_stub_hit_trampoline() -> CodePtr {
184+
ZJITState::get_instance().function_stub_hit_trampoline
175185
}
176186
}
177187

0 commit comments

Comments
 (0)