Skip to content

Commit 0af85a1

Browse files
ZJIT: Optimize setivar with shape transition (ruby#15375)
Since we do a decent job of pre-sizing objects, don't handle the case where we would need to re-size an object. Also don't handle too-complex shapes. lobsters stats before: ``` Top-20 calls to C functions from JIT code (79.4% of total 90,051,140): rb_vm_opt_send_without_block: 19,762,433 (21.9%) rb_vm_setinstancevariable: 7,698,314 ( 8.5%) rb_hash_aref: 6,767,461 ( 7.5%) rb_vm_env_write: 5,373,080 ( 6.0%) rb_vm_send: 5,049,229 ( 5.6%) rb_vm_getinstancevariable: 4,535,259 ( 5.0%) rb_obj_is_kind_of: 3,746,306 ( 4.2%) rb_ivar_get_at_no_ractor_check: 3,745,237 ( 4.2%) rb_vm_invokesuper: 3,037,467 ( 3.4%) rb_ary_entry: 2,351,983 ( 2.6%) rb_vm_opt_getconstant_path: 1,344,740 ( 1.5%) rb_vm_invokeblock: 1,184,474 ( 1.3%) Hash#[]=: 1,064,288 ( 1.2%) rb_gc_writebarrier: 1,006,972 ( 1.1%) rb_ec_ary_new_from_values: 902,687 ( 1.0%) fetch: 898,667 ( 1.0%) rb_str_buf_append: 833,787 ( 0.9%) rb_class_allocate_instance: 822,024 ( 0.9%) Hash#fetch: 699,580 ( 0.8%) _bi20: 682,068 ( 0.8%) Top-4 setivar fallback reasons (100.0% of total 7,732,326): shape_transition: 6,032,109 (78.0%) not_monomorphic: 1,469,300 (19.0%) not_t_object: 172,636 ( 2.2%) too_complex: 58,281 ( 0.8%) ``` lobsters stats after: ``` Top-20 calls to C functions from JIT code (79.0% of total 88,322,656): rb_vm_opt_send_without_block: 19,777,880 (22.4%) rb_hash_aref: 6,771,589 ( 7.7%) rb_vm_env_write: 5,372,789 ( 6.1%) rb_gc_writebarrier: 5,195,527 ( 5.9%) rb_vm_send: 5,049,145 ( 5.7%) rb_vm_getinstancevariable: 4,538,485 ( 5.1%) rb_obj_is_kind_of: 3,746,241 ( 4.2%) rb_ivar_get_at_no_ractor_check: 3,745,172 ( 4.2%) rb_vm_invokesuper: 3,037,157 ( 3.4%) rb_ary_entry: 2,351,968 ( 2.7%) rb_vm_setinstancevariable: 1,703,337 ( 1.9%) rb_vm_opt_getconstant_path: 1,344,730 ( 1.5%) rb_vm_invokeblock: 1,184,290 ( 1.3%) Hash#[]=: 1,061,868 ( 1.2%) rb_ec_ary_new_from_values: 902,666 ( 1.0%) fetch: 898,666 ( 1.0%) rb_str_buf_append: 833,784 ( 0.9%) rb_class_allocate_instance: 821,778 ( 0.9%) Hash#fetch: 755,913 ( 0.9%) Top-4 setivar fallback reasons (100.0% of total 1,703,337): not_monomorphic: 1,472,405 (86.4%) not_t_object: 172,629 (10.1%) too_complex: 58,281 ( 3.4%) new_shape_needs_extension: 22 ( 0.0%) ``` I also noticed that primitive printing in HIR was broken so I fixed that. Co-authored-by: Aaron Patterson <[email protected]>
1 parent 3efd8c6 commit 0af85a1

File tree

9 files changed

+242
-28
lines changed

9 files changed

+242
-28
lines changed

jit.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,3 +767,9 @@ rb_yarv_str_eql_internal(VALUE str1, VALUE str2)
767767
}
768768

769769
void rb_jit_str_concat_codepoint(VALUE str, VALUE codepoint);
770+
771+
attr_index_t
772+
rb_jit_shape_capacity(shape_id_t shape_id)
773+
{
774+
return RSHAPE_CAPACITY(shape_id);
775+
}

zjit/bindgen/src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ fn main() {
100100
.allowlist_function("rb_shape_id_offset")
101101
.allowlist_function("rb_shape_get_iv_index")
102102
.allowlist_function("rb_shape_transition_add_ivar_no_warnings")
103+
.allowlist_function("rb_jit_shape_capacity")
103104
.allowlist_var("rb_invalid_shape_id")
104105
.allowlist_type("shape_id_fl_type")
105106
.allowlist_var("VM_KW_SPECIFIED_BITS_MAX")

zjit/src/codegen.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
350350
&Insn::Const { val: Const::CPtr(val) } => gen_const_cptr(val),
351351
&Insn::Const { val: Const::CInt64(val) } => gen_const_long(val),
352352
&Insn::Const { val: Const::CUInt16(val) } => gen_const_uint16(val),
353+
&Insn::Const { val: Const::CUInt32(val) } => gen_const_uint32(val),
353354
Insn::Const { .. } => panic!("Unexpected Const in gen_insn: {insn}"),
354355
Insn::NewArray { elements, state } => gen_new_array(asm, opnds!(elements), &function.frame_state(*state)),
355356
Insn::NewHash { elements, state } => gen_new_hash(jit, asm, opnds!(elements), &function.frame_state(*state)),
@@ -475,7 +476,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
475476
Insn::LoadEC => gen_load_ec(),
476477
Insn::LoadSelf => gen_load_self(),
477478
&Insn::LoadField { recv, id, offset, return_type: _ } => gen_load_field(asm, opnd!(recv), id, offset),
478-
&Insn::StoreField { recv, id, offset, val } => no_output!(gen_store_field(asm, opnd!(recv), id, offset, opnd!(val))),
479+
&Insn::StoreField { recv, id, offset, val } => no_output!(gen_store_field(asm, opnd!(recv), id, offset, opnd!(val), function.type_of(val))),
479480
&Insn::WriteBarrier { recv, val } => no_output!(gen_write_barrier(asm, opnd!(recv), opnd!(val), function.type_of(val))),
480481
&Insn::IsBlockGiven => gen_is_block_given(jit, asm),
481482
Insn::ArrayInclude { elements, target, state } => gen_array_include(jit, asm, opnds!(elements), opnd!(target), &function.frame_state(*state)),
@@ -1099,10 +1100,10 @@ fn gen_load_field(asm: &mut Assembler, recv: Opnd, id: ID, offset: i32) -> Opnd
10991100
asm.load(Opnd::mem(64, recv, offset))
11001101
}
11011102

1102-
fn gen_store_field(asm: &mut Assembler, recv: Opnd, id: ID, offset: i32, val: Opnd) {
1103+
fn gen_store_field(asm: &mut Assembler, recv: Opnd, id: ID, offset: i32, val: Opnd, val_type: Type) {
11031104
asm_comment!(asm, "Store field id={} offset={}", id.contents_lossy(), offset);
11041105
let recv = asm.load(recv);
1105-
asm.store(Opnd::mem(64, recv, offset), val);
1106+
asm.store(Opnd::mem(val_type.num_bits(), recv, offset), val);
11061107
}
11071108

11081109
fn gen_write_barrier(asm: &mut Assembler, recv: Opnd, val: Opnd, val_type: Type) {
@@ -1159,6 +1160,10 @@ fn gen_const_uint16(val: u16) -> lir::Opnd {
11591160
Opnd::UImm(val as u64)
11601161
}
11611162

1163+
fn gen_const_uint32(val: u32) -> lir::Opnd {
1164+
Opnd::UImm(val as u64)
1165+
}
1166+
11621167
/// Compile a basic block argument
11631168
fn gen_param(asm: &mut Assembler, idx: usize) -> lir::Opnd {
11641169
// Allocate a register or a stack slot

zjit/src/cruby.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -999,8 +999,9 @@ mod manual_defs {
999999
use super::*;
10001000

10011001
pub const SIZEOF_VALUE: usize = 8;
1002+
pub const BITS_PER_BYTE: usize = 8;
10021003
pub const SIZEOF_VALUE_I32: i32 = SIZEOF_VALUE as i32;
1003-
pub const VALUE_BITS: u8 = 8 * SIZEOF_VALUE as u8;
1004+
pub const VALUE_BITS: u8 = BITS_PER_BYTE as u8 * SIZEOF_VALUE as u8;
10041005

10051006
pub const RUBY_LONG_MIN: isize = std::os::raw::c_long::MIN as isize;
10061007
pub const RUBY_LONG_MAX: isize = std::os::raw::c_long::MAX as isize;
@@ -1382,6 +1383,7 @@ pub(crate) mod ids {
13821383
name: thread_ptr
13831384
name: self_ content: b"self"
13841385
name: rb_ivar_get_at_no_ractor_check
1386+
name: _shape_id
13851387
}
13861388

13871389
/// Get an CRuby `ID` to an interned string, e.g. a particular method name.

zjit/src/cruby_bindings.inc.rs

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

zjit/src/hir.rs

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2949,10 +2949,34 @@ impl Function {
29492949
self.push_insn_id(block, insn_id); continue;
29502950
}
29512951
let mut ivar_index: u16 = 0;
2952+
let mut next_shape_id = recv_type.shape();
29522953
if !unsafe { rb_shape_get_iv_index(recv_type.shape().0, id, &mut ivar_index) } {
2953-
// TODO(max): Shape transition
2954-
self.push_insn(block, Insn::IncrCounter(Counter::setivar_fallback_shape_transition));
2955-
self.push_insn_id(block, insn_id); continue;
2954+
// Current shape does not contain this ivar; do a shape transition.
2955+
let current_shape_id = recv_type.shape();
2956+
let class = recv_type.class();
2957+
// We're only looking at T_OBJECT so ignore all of the imemo stuff.
2958+
assert!(recv_type.flags().is_t_object());
2959+
next_shape_id = ShapeId(unsafe { rb_shape_transition_add_ivar_no_warnings(class, current_shape_id.0, id) });
2960+
let ivar_result = unsafe { rb_shape_get_iv_index(next_shape_id.0, id, &mut ivar_index) };
2961+
assert!(ivar_result, "New shape must have the ivar index");
2962+
// If the VM ran out of shapes, or this class generated too many leaf,
2963+
// it may be de-optimized into OBJ_TOO_COMPLEX_SHAPE (hash-table).
2964+
let new_shape_too_complex = unsafe { rb_jit_shape_too_complex_p(next_shape_id.0) };
2965+
// TODO(max): Is it OK to bail out here after making a shape transition?
2966+
if new_shape_too_complex {
2967+
self.push_insn(block, Insn::IncrCounter(Counter::setivar_fallback_new_shape_too_complex));
2968+
self.push_insn_id(block, insn_id); continue;
2969+
}
2970+
let current_capacity = unsafe { rb_jit_shape_capacity(current_shape_id.0) };
2971+
let next_capacity = unsafe { rb_jit_shape_capacity(next_shape_id.0) };
2972+
// If the new shape has a different capacity, or is TOO_COMPLEX, we'll have to
2973+
// reallocate it.
2974+
let needs_extension = next_capacity != current_capacity;
2975+
if needs_extension {
2976+
self.push_insn(block, Insn::IncrCounter(Counter::setivar_fallback_new_shape_needs_extension));
2977+
self.push_insn_id(block, insn_id); continue;
2978+
}
2979+
// Fall through to emitting the ivar write
29562980
}
29572981
let self_val = self.push_insn(block, Insn::GuardType { val: self_val, guard_type: types::HeapBasicObject, state });
29582982
let self_val = self.push_insn(block, Insn::GuardShape { val: self_val, shape: recv_type.shape(), state });
@@ -2968,6 +2992,13 @@ impl Function {
29682992
};
29692993
self.push_insn(block, Insn::StoreField { recv: ivar_storage, id, offset, val });
29702994
self.push_insn(block, Insn::WriteBarrier { recv: self_val, val });
2995+
if next_shape_id != recv_type.shape() {
2996+
// Write the new shape ID
2997+
assert_eq!(SHAPE_ID_NUM_BITS, 32);
2998+
let shape_id = self.push_insn(block, Insn::Const { val: Const::CUInt32(next_shape_id.0) });
2999+
let shape_id_offset = unsafe { rb_shape_id_offset() };
3000+
self.push_insn(block, Insn::StoreField { recv: self_val, id: ID!(_shape_id), offset: shape_id_offset, val: shape_id });
3001+
}
29713002
}
29723003
_ => { self.push_insn_id(block, insn_id); }
29733004
}

zjit/src/hir/opt_tests.rs

Lines changed: 142 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3484,6 +3484,39 @@ mod hir_opt_tests {
34843484
");
34853485
}
34863486

3487+
#[test]
3488+
fn test_dont_specialize_complex_shape_definedivar() {
3489+
eval(r#"
3490+
class C
3491+
def test = defined?(@a)
3492+
end
3493+
obj = C.new
3494+
(0..1000).each do |i|
3495+
obj.instance_variable_set(:"@v#{i}", i)
3496+
end
3497+
(0..1000).each do |i|
3498+
obj.remove_instance_variable(:"@v#{i}")
3499+
end
3500+
obj.test
3501+
TEST = C.instance_method(:test)
3502+
"#);
3503+
assert_snapshot!(hir_string_proc("TEST"), @r"
3504+
fn test@<compiled>:3:
3505+
bb0():
3506+
EntryPoint interpreter
3507+
v1:BasicObject = LoadSelf
3508+
Jump bb2(v1)
3509+
bb1(v4:BasicObject):
3510+
EntryPoint JIT(0)
3511+
Jump bb2(v4)
3512+
bb2(v6:BasicObject):
3513+
IncrCounter definedivar_fallback_too_complex
3514+
v10:StringExact|NilClass = DefinedIvar v6, :@a
3515+
CheckInterrupts
3516+
Return v10
3517+
");
3518+
}
3519+
34873520
#[test]
34883521
fn test_specialize_monomorphic_setivar_already_in_shape() {
34893522
eval("
@@ -3513,7 +3546,7 @@ mod hir_opt_tests {
35133546
}
35143547

35153548
#[test]
3516-
fn test_dont_specialize_monomorphic_setivar_with_shape_transition() {
3549+
fn test_specialize_monomorphic_setivar_with_shape_transition() {
35173550
eval("
35183551
def test = @foo = 5
35193552
test
@@ -3530,13 +3563,57 @@ mod hir_opt_tests {
35303563
bb2(v6:BasicObject):
35313564
v10:Fixnum[5] = Const Value(5)
35323565
PatchPoint SingleRactorMode
3533-
IncrCounter setivar_fallback_shape_transition
3534-
SetIvar v6, :@foo, v10
3566+
v19:HeapBasicObject = GuardType v6, HeapBasicObject
3567+
v20:HeapBasicObject = GuardShape v19, 0x1000
3568+
StoreField v20, :@foo@0x1001, v10
3569+
WriteBarrier v20, v10
3570+
v23:CUInt32[4194311] = Const CUInt32(4194311)
3571+
StoreField v20, :_shape_id@0x1002, v23
35353572
CheckInterrupts
35363573
Return v10
35373574
");
35383575
}
35393576

3577+
#[test]
3578+
fn test_specialize_multiple_monomorphic_setivar_with_shape_transition() {
3579+
eval("
3580+
def test
3581+
@foo = 1
3582+
@bar = 2
3583+
end
3584+
test
3585+
");
3586+
assert_snapshot!(hir_string("test"), @r"
3587+
fn test@<compiled>:3:
3588+
bb0():
3589+
EntryPoint interpreter
3590+
v1:BasicObject = LoadSelf
3591+
Jump bb2(v1)
3592+
bb1(v4:BasicObject):
3593+
EntryPoint JIT(0)
3594+
Jump bb2(v4)
3595+
bb2(v6:BasicObject):
3596+
v10:Fixnum[1] = Const Value(1)
3597+
PatchPoint SingleRactorMode
3598+
v25:HeapBasicObject = GuardType v6, HeapBasicObject
3599+
v26:HeapBasicObject = GuardShape v25, 0x1000
3600+
StoreField v26, :@foo@0x1001, v10
3601+
WriteBarrier v26, v10
3602+
v29:CUInt32[4194311] = Const CUInt32(4194311)
3603+
StoreField v26, :_shape_id@0x1002, v29
3604+
v16:Fixnum[2] = Const Value(2)
3605+
PatchPoint SingleRactorMode
3606+
v31:HeapBasicObject = GuardType v6, HeapBasicObject
3607+
v32:HeapBasicObject = GuardShape v31, 0x1003
3608+
StoreField v32, :@bar@0x1004, v16
3609+
WriteBarrier v32, v16
3610+
v35:CUInt32[4194312] = Const CUInt32(4194312)
3611+
StoreField v32, :_shape_id@0x1002, v35
3612+
CheckInterrupts
3613+
Return v16
3614+
");
3615+
}
3616+
35403617
#[test]
35413618
fn test_dont_specialize_setivar_with_t_data() {
35423619
eval("
@@ -3611,6 +3688,9 @@ mod hir_opt_tests {
36113688
(0..1000).each do |i|
36123689
obj.instance_variable_set(:"@v#{i}", i)
36133690
end
3691+
(0..1000).each do |i|
3692+
obj.remove_instance_variable(:"@v#{i}")
3693+
end
36143694
obj.test
36153695
TEST = C.instance_method(:test)
36163696
"#);
@@ -3626,7 +3706,7 @@ mod hir_opt_tests {
36263706
bb2(v6:BasicObject):
36273707
v10:Fixnum[5] = Const Value(5)
36283708
PatchPoint SingleRactorMode
3629-
IncrCounter setivar_fallback_shape_transition
3709+
IncrCounter setivar_fallback_too_complex
36303710
SetIvar v6, :@a, v10
36313711
CheckInterrupts
36323712
Return v10
@@ -5417,6 +5497,43 @@ mod hir_opt_tests {
54175497
");
54185498
}
54195499

5500+
#[test]
5501+
fn test_dont_optimize_getivar_with_too_complex_shape() {
5502+
eval(r#"
5503+
class C
5504+
attr_accessor :foo
5505+
end
5506+
obj = C.new
5507+
(0..1000).each do |i|
5508+
obj.instance_variable_set(:"@v#{i}", i)
5509+
end
5510+
(0..1000).each do |i|
5511+
obj.remove_instance_variable(:"@v#{i}")
5512+
end
5513+
def test(o) = o.foo
5514+
test obj
5515+
"#);
5516+
assert_snapshot!(hir_string("test"), @r"
5517+
fn test@<compiled>:12:
5518+
bb0():
5519+
EntryPoint interpreter
5520+
v1:BasicObject = LoadSelf
5521+
v2:BasicObject = GetLocal l0, SP@4
5522+
Jump bb2(v1, v2)
5523+
bb1(v5:BasicObject, v6:BasicObject):
5524+
EntryPoint JIT(0)
5525+
Jump bb2(v5, v6)
5526+
bb2(v8:BasicObject, v9:BasicObject):
5527+
PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010)
5528+
PatchPoint NoSingletonClass(C@0x1000)
5529+
v21:HeapObject[class_exact:C] = GuardType v9, HeapObject[class_exact:C]
5530+
IncrCounter getivar_fallback_too_complex
5531+
v22:BasicObject = GetIvar v21, :@foo
5532+
CheckInterrupts
5533+
Return v22
5534+
");
5535+
}
5536+
54205537
#[test]
54215538
fn test_optimize_send_with_block() {
54225539
eval(r#"
@@ -5697,8 +5814,11 @@ mod hir_opt_tests {
56975814
v16:Fixnum[5] = Const Value(5)
56985815
PatchPoint MethodRedefined(C@0x1000, foo=@0x1008, cme:0x1010)
56995816
v26:HeapObject[class_exact:C] = GuardType v9, HeapObject[class_exact:C]
5700-
IncrCounter setivar_fallback_shape_transition
5701-
SetIvar v26, :@foo, v16
5817+
v29:HeapObject[class_exact:C] = GuardShape v26, 0x1038
5818+
StoreField v29, :@foo@0x1039, v16
5819+
WriteBarrier v29, v16
5820+
v32:CUInt32[4194311] = Const CUInt32(4194311)
5821+
StoreField v29, :_shape_id@0x103a, v32
57025822
CheckInterrupts
57035823
Return v16
57045824
");
@@ -5729,8 +5849,11 @@ mod hir_opt_tests {
57295849
v16:Fixnum[5] = Const Value(5)
57305850
PatchPoint MethodRedefined(C@0x1000, foo=@0x1008, cme:0x1010)
57315851
v26:HeapObject[class_exact:C] = GuardType v9, HeapObject[class_exact:C]
5732-
IncrCounter setivar_fallback_shape_transition
5733-
SetIvar v26, :@foo, v16
5852+
v29:HeapObject[class_exact:C] = GuardShape v26, 0x1038
5853+
StoreField v29, :@foo@0x1039, v16
5854+
WriteBarrier v29, v16
5855+
v32:CUInt32[4194311] = Const CUInt32(4194311)
5856+
StoreField v29, :_shape_id@0x103a, v32
57345857
CheckInterrupts
57355858
Return v16
57365859
");
@@ -9109,18 +9232,22 @@ mod hir_opt_tests {
91099232
SetLocal l0, EP@3, v27
91109233
v39:BasicObject = GetLocal l0, EP@3
91119234
PatchPoint SingleRactorMode
9112-
IncrCounter setivar_fallback_shape_transition
9113-
SetIvar v14, :@formatted, v39
9114-
v45:Class[VMFrozenCore] = Const Value(VALUE(0x1000))
9115-
PatchPoint MethodRedefined(Class@0x1008, lambda@0x1010, cme:0x1018)
9116-
PatchPoint NoSingletonClass(Class@0x1008)
9117-
v60:BasicObject = CCallWithFrame v45, :RubyVM::FrozenCore.lambda@0x1040, block=0x1048
9235+
v56:HeapBasicObject = GuardType v14, HeapBasicObject
9236+
v57:HeapBasicObject = GuardShape v56, 0x1000
9237+
StoreField v57, :@formatted@0x1001, v39
9238+
WriteBarrier v57, v39
9239+
v60:CUInt32[4194311] = Const CUInt32(4194311)
9240+
StoreField v57, :_shape_id@0x1002, v60
9241+
v45:Class[VMFrozenCore] = Const Value(VALUE(0x1008))
9242+
PatchPoint MethodRedefined(Class@0x1010, lambda@0x1018, cme:0x1020)
9243+
PatchPoint NoSingletonClass(Class@0x1010)
9244+
v65:BasicObject = CCallWithFrame v45, :RubyVM::FrozenCore.lambda@0x1048, block=0x1050
91189245
v48:BasicObject = GetLocal l0, EP@6
91199246
v49:BasicObject = GetLocal l0, EP@5
91209247
v50:BasicObject = GetLocal l0, EP@4
91219248
v51:BasicObject = GetLocal l0, EP@3
91229249
CheckInterrupts
9123-
Return v60
9250+
Return v65
91249251
");
91259252
}
91269253
}

0 commit comments

Comments
 (0)