Skip to content

Commit afac226

Browse files
committed
ZJIT: Fix side-exit panicking when there's too many locals
Previously, ARM64 panicked due to compiled_side_exits() when the memory displacement got large enough to exceed the 9 bits limit. Usually, we split these kind of memory operands, but compiled_side_exits() runs after split. Using scratch registers, implement `Insn::Store` on ARM such that it can handle large displacements without split(). Do this for x86 as well, and remove arch specific code from compiled_side_exits(). We can now run `TestKeywordArguments`. Since `Insn::Store` doesn't need splitting now, users enjoy lower register pressure. Downside is, using `Assembler::SCRATCH_REG` as a base register is now sometimes an error, depending on whether `Insn::Store` also needs to use the register. It seems a fair trade off since `SCRATCH_REG` is not often used, and we don't put it as a base register anywhere at the moment.
1 parent f58fca7 commit afac226

File tree

4 files changed

+269
-114
lines changed

4 files changed

+269
-114
lines changed

test/.excludes-zjit/TestKeywordArguments.rb

Lines changed: 0 additions & 1 deletion
This file was deleted.

zjit/src/backend/arm64/mod.rs

Lines changed: 176 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -195,11 +195,15 @@ pub const ALLOC_REGS: &'static [Reg] = &[
195195

196196
impl Assembler
197197
{
198-
// Special scratch registers for intermediate processing.
199-
// This register is caller-saved (so we don't have to save it before using it)
198+
/// Special scratch registers for intermediate processing.
199+
/// This register is call-clobbered (so we don't have to save it before using it).
200+
/// Avoid using if you can since this is used to lower [Insn] internally and
201+
/// so conflicts are possible.
200202
pub const SCRATCH_REG: Reg = X16_REG;
201203
const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG);
204+
const SCRATCH0_REG: Reg = Assembler::SCRATCH_REG;
202205
const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG);
206+
const SCRATCH1_REG: Reg = X17_REG;
203207

204208
/// Get the list of registers from which we will allocate on this platform
205209
pub fn get_alloc_regs() -> Vec<Reg> {
@@ -652,31 +656,6 @@ impl Assembler
652656
*opnd = split_load_operand(asm, *opnd);
653657
asm.push_insn(insn);
654658
},
655-
Insn::Store { dest, src } => {
656-
// The value being stored must be in a register, so if it's
657-
// not already one we'll load it first.
658-
let opnd1 = match src {
659-
// If the first operand is zero, then we can just use
660-
// the zero register.
661-
Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG),
662-
// Otherwise we'll check if we need to load it first.
663-
_ => split_load_operand(asm, *src)
664-
};
665-
666-
match dest {
667-
Opnd::Reg(_) => {
668-
// Store does not support a register as a dest operand.
669-
asm.mov(*dest, opnd1);
670-
}
671-
_ => {
672-
// The displacement for the STUR instruction can't be more
673-
// than 9 bits long. If it's longer, we need to load the
674-
// memory address into a register first.
675-
let opnd0 = split_memory_address(asm, *dest);
676-
asm.store(opnd0, opnd1);
677-
}
678-
}
679-
},
680659
Insn::Mul { left, right, .. } => {
681660
*left = split_load_operand(asm, *left);
682661
*right = split_load_operand(asm, *right);
@@ -843,6 +822,42 @@ impl Assembler
843822
}
844823
}
845824

825+
/// Do the address calculation of `out_reg = base_reg + disp`
826+
fn load_effective_address(cb: &mut CodeBlock, out: A64Opnd, base_reg_no: u8, disp: i32) {
827+
let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no });
828+
assert_ne!(31, out.unwrap_reg().reg_no, "Lea sp, [sp, #imm] not always encodable. Use add/sub instead.");
829+
830+
if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() {
831+
// Use ADD/SUB if the displacement fits
832+
add(cb, out, base_reg, A64Opnd::new_imm(disp.into()));
833+
} else {
834+
// Use add_extended() to interpret reg_no=31 as sp
835+
// since the base register is never the zero register.
836+
// Careful! Only the first two operands can refer to sp.
837+
emit_load_value(cb, out, disp as u64);
838+
add_extended(cb, out, base_reg, out);
839+
};
840+
}
841+
842+
/// Load a VALUE to a register and remember it for GC marking and reference updating
843+
fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec<CodePtr>, dest: A64Opnd, value: VALUE) {
844+
// We dont need to check if it's a special const
845+
// here because we only allow these operands to hit
846+
// this point if they're not a special const.
847+
assert!(!value.special_const_p());
848+
849+
// This assumes only load instructions can contain
850+
// references to GC'd Value operands. If the value
851+
// being loaded is a heap object, we'll report that
852+
// back out to the gc_offsets list.
853+
ldr_literal(cb, dest, 2.into());
854+
b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32)));
855+
cb.write_bytes(&value.as_u64().to_le_bytes());
856+
857+
let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
858+
gc_offsets.push(ptr_offset);
859+
}
860+
846861
/// Emit a push instruction for the given operand by adding to the stack
847862
/// pointer and then storing the given value.
848863
fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) {
@@ -1013,12 +1028,84 @@ impl Assembler
10131028
Insn::LShift { opnd, shift, out } => {
10141029
lsl(cb, out.into(), opnd.into(), shift.into());
10151030
},
1016-
Insn::Store { dest, src } => {
1031+
store_insn @ Insn::Store { dest, src } => {
1032+
// With minor exceptions, as long as `dest` is a Mem, all forms of `src` are
1033+
// accepted. As a rule of thumb, avoid using Assembler::SCRATCH as a memory
1034+
// base register to gurantee things will work.
1035+
let &Opnd::Mem(Mem { num_bits: dest_num_bits, base: MemBase::Reg(base_reg_no), disp }) = dest else {
1036+
panic!("Unexpected Insn::Store destination in arm64_emit: {dest:?}");
1037+
};
1038+
1039+
// This kind of tricky clobber can only happen for explicit use of SCRATCH_REG,
1040+
// so we panic to get the author to change their code.
1041+
#[track_caller]
1042+
fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) {
1043+
assert_ne!(
1044+
backend_use.reg_no,
1045+
user_use,
1046+
"Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics"
1047+
);
1048+
}
1049+
1050+
// Split src into SCRATCH0 if necessary
1051+
let src_reg: A64Reg = match src {
1052+
Opnd::Reg(reg) => *reg,
1053+
// Use zero register when possible
1054+
Opnd::UImm(0) | Opnd::Imm(0) => XZR_REG,
1055+
// Immediates
1056+
&Opnd::Imm(imm) => {
1057+
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
1058+
emit_load_value(cb, Self::SCRATCH0, imm as u64);
1059+
Self::SCRATCH0_REG
1060+
}
1061+
&Opnd::UImm(imm) => {
1062+
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
1063+
emit_load_value(cb, Self::SCRATCH0, imm);
1064+
Self::SCRATCH0_REG
1065+
}
1066+
&Opnd::Value(value) => {
1067+
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
1068+
emit_load_gc_value(cb, &mut gc_offsets, Self::SCRATCH0, value);
1069+
Self::SCRATCH0_REG
1070+
}
1071+
src_mem @ &Opnd::Mem(Mem { num_bits: src_num_bits, base: MemBase::Reg(src_base_reg_no), disp: src_disp }) => {
1072+
// For mem-to-mem store, load the source into SCRATCH0
1073+
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
1074+
let src_mem = if mem_disp_fits_bits(src_disp) {
1075+
src_mem.into()
1076+
} else {
1077+
// Split the load address into SCRATCH0 first if necessary
1078+
assert_no_clobber(store_insn, src_base_reg_no, Self::SCRATCH0_REG);
1079+
load_effective_address(cb, Self::SCRATCH0, src_base_reg_no, src_disp);
1080+
A64Opnd::new_mem(dest_num_bits, Self::SCRATCH0, 0)
1081+
};
1082+
match src_num_bits {
1083+
64 | 32 => ldur(cb, Self::SCRATCH0, src_mem),
1084+
16 => ldurh(cb, Self::SCRATCH0, src_mem),
1085+
8 => ldurb(cb, Self::SCRATCH0, src_mem),
1086+
num_bits => panic!("unexpected num_bits: {num_bits}")
1087+
};
1088+
Self::SCRATCH0_REG
1089+
}
1090+
src @ (Opnd::Mem(_) | Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during arm64_emit: {src:?}")
1091+
};
1092+
let src = A64Opnd::Reg(src_reg);
1093+
1094+
// Split dest into SCRATCH1 if necessary.
1095+
let dest = if mem_disp_fits_bits(disp) {
1096+
dest.into()
1097+
} else {
1098+
assert_no_clobber(store_insn, src_reg.reg_no, Self::SCRATCH1_REG);
1099+
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH1_REG);
1100+
load_effective_address(cb, Self::SCRATCH1, base_reg_no, disp);
1101+
A64Opnd::new_mem(dest_num_bits, Self::SCRATCH1, 0)
1102+
};
1103+
10171104
// This order may be surprising but it is correct. The way
10181105
// the Arm64 assembler works, the register that is going to
10191106
// be stored is first and the address is second. However in
10201107
// our IR we have the address first and the register second.
1021-
match dest.rm_num_bits() {
1108+
match dest_num_bits {
10221109
64 | 32 => stur(cb, src.into(), dest.into()),
10231110
16 => sturh(cb, src.into(), dest.into()),
10241111
num_bits => panic!("unexpected dest num_bits: {} (src: {:#?}, dest: {:#?})", num_bits, src, dest),
@@ -1045,21 +1132,7 @@ impl Assembler
10451132
};
10461133
},
10471134
Opnd::Value(value) => {
1048-
// We dont need to check if it's a special const
1049-
// here because we only allow these operands to hit
1050-
// this point if they're not a special const.
1051-
assert!(!value.special_const_p());
1052-
1053-
// This assumes only load instructions can contain
1054-
// references to GC'd Value operands. If the value
1055-
// being loaded is a heap object, we'll report that
1056-
// back out to the gc_offsets list.
1057-
ldr_literal(cb, out.into(), 2.into());
1058-
b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32)));
1059-
cb.write_bytes(&value.as_u64().to_le_bytes());
1060-
1061-
let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
1062-
gc_offsets.push(ptr_offset);
1135+
emit_load_gc_value(cb, &mut gc_offsets, out.into(), value);
10631136
},
10641137
Opnd::None => {
10651138
unreachable!("Attempted to load from None operand");
@@ -1097,20 +1170,7 @@ impl Assembler
10971170
let &Opnd::Mem(Mem { num_bits: _, base: MemBase::Reg(base_reg_no), disp }) = opnd else {
10981171
panic!("Unexpected Insn::Lea operand in arm64_emit: {opnd:?}");
10991172
};
1100-
let out: A64Opnd = out.into();
1101-
let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no });
1102-
assert_ne!(31, out.unwrap_reg().reg_no, "Insn::Lea sp, [sp, #imm] not always encodable. Use add/sub instead.");
1103-
1104-
if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() {
1105-
// Use ADD/SUB if the displacement fits
1106-
add(cb, out, base_reg, A64Opnd::new_imm(disp.into()));
1107-
} else {
1108-
// Use add_extended() to interpret reg_no=31 as sp
1109-
// since the base register is never the zero register.
1110-
// Careful! Only the first two operands can refer to sp.
1111-
emit_load_value(cb, out, disp as u64);
1112-
add_extended(cb, out, base_reg, out);
1113-
};
1173+
load_effective_address(cb, out.into(), base_reg_no, disp);
11141174
}
11151175
Insn::LeaJumpTarget { out, target, .. } => {
11161176
if let Target::Label(label_idx) = target {
@@ -1627,6 +1687,64 @@ mod tests {
16271687
");
16281688
}
16291689

1690+
#[test]
1691+
fn test_store() {
1692+
let (mut asm, mut cb) = setup_asm();
1693+
1694+
// Large memory offsets in combinations of destination and source
1695+
let large_mem = Opnd::mem(64, NATIVE_STACK_PTR, -0x305);
1696+
let small_mem = Opnd::mem(64, C_RET_OPND, 0);
1697+
asm.store(small_mem, large_mem);
1698+
asm.store(large_mem, small_mem);
1699+
asm.store(large_mem, large_mem);
1700+
1701+
asm.compile_with_num_regs(&mut cb, 0);
1702+
assert_disasm!(cb, "f0170cd1100240f8100000f8100040f8f1170cd1300200f8f0170cd1100240f8f1170cd1300200f8", "
1703+
0x0: sub x16, sp, #0x305
1704+
0x4: ldur x16, [x16]
1705+
0x8: stur x16, [x0]
1706+
0xc: ldur x16, [x0]
1707+
0x10: sub x17, sp, #0x305
1708+
0x14: stur x16, [x17]
1709+
0x18: sub x16, sp, #0x305
1710+
0x1c: ldur x16, [x16]
1711+
0x20: sub x17, sp, #0x305
1712+
0x24: stur x16, [x17]
1713+
");
1714+
}
1715+
1716+
#[test]
1717+
fn test_store_value_without_split() {
1718+
let (mut asm, mut cb) = setup_asm();
1719+
1720+
let imitation_heap_value = VALUE(0x1000);
1721+
assert!(imitation_heap_value.heap_object_p());
1722+
asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into());
1723+
1724+
// Side exit code are compiled without the split pass, so we directly call emit here to
1725+
// emulate that scenario.
1726+
let gc_offsets = asm.arm64_emit(&mut cb).unwrap();
1727+
assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset");
1728+
1729+
assert_disasm!(cb, "50000058030000140010000000000000b00200f8", "
1730+
0x0: ldr x16, #8
1731+
0x4: b #0x10
1732+
0x8: .byte 0x00, 0x10, 0x00, 0x00
1733+
0xc: .byte 0x00, 0x00, 0x00, 0x00
1734+
0x10: stur x16, [x21]
1735+
");
1736+
}
1737+
1738+
#[test]
1739+
#[should_panic]
1740+
fn test_store_unserviceable() {
1741+
let (mut asm, mut cb) = setup_asm();
1742+
// This would put the source into SCRATCH_REG, messing up the destination
1743+
asm.store(Opnd::mem(64, Opnd::Reg(Assembler::SCRATCH_REG), 0), 0x83902.into());
1744+
1745+
asm.compile_with_num_regs(&mut cb, 0);
1746+
}
1747+
16301748
/*
16311749
#[test]
16321750
fn test_emit_lea_label() {

zjit/src/backend/lir.rs

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1822,29 +1822,16 @@ impl Assembler
18221822
};
18231823
self.write_label(side_exit_label.clone());
18241824

1825-
// Load an operand that cannot be used as a source of Insn::Store
1826-
fn split_store_source(asm: &mut Assembler, opnd: Opnd) -> Opnd {
1827-
if matches!(opnd, Opnd::Mem(_) | Opnd::Value(_)) ||
1828-
(cfg!(target_arch = "aarch64") && matches!(opnd, Opnd::UImm(_))) {
1829-
asm.load_into(Opnd::Reg(Assembler::SCRATCH_REG), opnd);
1830-
Opnd::Reg(Assembler::SCRATCH_REG)
1831-
} else {
1832-
opnd
1833-
}
1834-
}
1835-
18361825
// Restore the PC and the stack for regular side exits. We don't do this for
18371826
// side exits right after JIT-to-JIT calls, which restore them before the call.
18381827
if let Some(SideExitContext { pc, stack, locals }) = context {
18391828
asm_comment!(self, "write stack slots: {stack:?}");
18401829
for (idx, &opnd) in stack.iter().enumerate() {
1841-
let opnd = split_store_source(self, opnd);
18421830
self.store(Opnd::mem(64, SP, idx as i32 * SIZEOF_VALUE_I32), opnd);
18431831
}
18441832

18451833
asm_comment!(self, "write locals: {locals:?}");
18461834
for (idx, &opnd) in locals.iter().enumerate() {
1847-
let opnd = split_store_source(self, opnd);
18481835
self.store(Opnd::mem(64, SP, (-local_size_and_idx_to_ep_offset(locals.len(), idx) - 1) * SIZEOF_VALUE_I32), opnd);
18491836
}
18501837

0 commit comments

Comments
 (0)