Skip to content

Commit 50c2c4b

Browse files
committed
Make rb_vm_insns_count a thread local variable
`rb_vm_insns_count` is a global variable used for reporting YJIT statistics. It is a counter that tallies the number of interpreter instructions that have been executed, this way we can approximate how much time we're spending in YJIT compared to the interpreter. Unfortunately keeping this statistic means that every instruction executed in the interpreter loop must increment the counter. Normally this isn't a problem, but in multi-threaded situations (when Ractors are used), incrementing this counter can become quite costly due to page caching issues. Additionally, since there is no locking when incrementing this global, the count can't really make sense in a multi-threaded environment. This commit changes `rb_vm_insns_count` to a thread local. That way each Ractor has it's own copy of the counter and incrementing the counter becomes quite cheap. Of course this means that in multi-threaded situations, the value doesn't really make sense (but it didn't make sense before because of the lack of locking). The counter is used for YJIT statistics, and since YJIT is basically disabled when Ractors are in use, I don't think we care about inaccuracies (for the time being). We can revisit this counter when we give YJIT multi-threading support, but for the time being this commit restores multi-threaded performance. To test this, I used the benchmark in [Bug #20489]. Here is the performance on Ruby 3.2: ``` $ time RUBY_MAX_CPU=12 ./miniruby -v ../test.rb 8 8 ruby 3.2.0 (2022-12-25 revision a528908) [x86_64-linux] [0...1, 1...2, 2...3, 3...4, 4...5, 5...6, 6...7, 7...8] ../test.rb:43: warning: Ractor is experimental, and the behavior may change in future versions of Ruby! Also there are many implementation issues. ________________________________________________________ Executed in 2.53 secs fish external usr time 19.86 secs 370.00 micros 19.86 secs sys time 0.02 secs 320.00 micros 0.02 secs ``` We can see the regression in performance on the master branch: ``` $ time RUBY_MAX_CPU=12 ./miniruby -v ../test.rb 8 8 ruby 3.5.0dev (2025-01-10T16:22:26Z master 4a2702d) +PRISM [x86_64-linux] [0...1, 1...2, 2...3, 3...4, 4...5, 5...6, 6...7, 7...8] ../test.rb:43: warning: Ractor is experimental, and the behavior may change in future versions of Ruby! Also there are many implementation issues. ________________________________________________________ Executed in 24.87 secs fish external usr time 195.55 secs 0.00 micros 195.55 secs sys time 0.00 secs 716.00 micros 0.00 secs ``` Here are the stats after this commit: ``` $ time RUBY_MAX_CPU=12 ./miniruby -v ../test.rb 8 8 ruby 3.5.0dev (2025-01-10T20:37:06Z tl 3ef0432779) +PRISM [x86_64-linux] [0...1, 1...2, 2...3, 3...4, 4...5, 5...6, 6...7, 7...8] ../test.rb:43: warning: Ractor is experimental, and the behavior may change in future versions of Ruby! Also there are many implementation issues. ________________________________________________________ Executed in 2.46 secs fish external usr time 19.34 secs 381.00 micros 19.34 secs sys time 0.01 secs 321.00 micros 0.01 secs ``` [Bug #20489]
1 parent 039446f commit 50c2c4b

File tree

8 files changed

+14
-10
lines changed

8 files changed

+14
-10
lines changed

internal/vm.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ struct vm_ifunc *rb_current_ifunc(void);
8181

8282
#if USE_YJIT
8383
/* vm_exec.c */
84-
extern uint64_t rb_vm_insns_count;
84+
extern RB_THREAD_LOCAL_SPECIFIER uint64_t rb_vm_insns_count;
8585
#endif
8686

8787
extern bool rb_free_at_exit;

rjit.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ struct rb_rjit_runtime_counters rb_rjit_counters = { 0 };
170170

171171
extern VALUE rb_gc_enable(void);
172172
extern VALUE rb_gc_disable(void);
173-
extern uint64_t rb_vm_insns_count;
173+
extern RB_THREAD_LOCAL_SPECIFIER uint64_t rb_vm_insns_count;
174174

175175
// Disable GC, TracePoint, JIT, stats, and $!
176176
#define WITH_RJIT_ISOLATED_USING_PC(using_pc, stmt) do { \

rjit_c.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,7 @@ extern VALUE rb_vm_set_ivar_id(VALUE obj, ID id, VALUE val);
541541
extern VALUE rb_ary_unshift_m(int argc, VALUE *argv, VALUE ary);
542542
extern void* rb_rjit_entry_stub_hit(VALUE branch_stub);
543543
extern void* rb_rjit_branch_stub_hit(VALUE branch_stub, int sp_offset, int target0_p);
544-
extern uint64_t rb_vm_insns_count;
544+
extern RB_THREAD_LOCAL_SPECIFIER uint64_t rb_vm_insns_count;
545545

546546
#include "rjit_c.rbinc"
547547

vm_exec.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
#if USE_YJIT || USE_RJIT
1515
// The number of instructions executed on vm_exec_core. --yjit-stats uses this.
16-
uint64_t rb_vm_insns_count = 0;
16+
RB_THREAD_LOCAL_SPECIFIER uint64_t rb_vm_insns_count = 0;
1717
#endif
1818

1919
#if VM_COLLECT_USAGE_DETAILS

yjit.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "internal/fixnum.h"
1717
#include "internal/numeric.h"
1818
#include "internal/gc.h"
19+
#include "internal/vm.h"
1920
#include "vm_core.h"
2021
#include "vm_callinfo.h"
2122
#include "builtin.h"
@@ -96,6 +97,11 @@ rb_yjit_mark_executable(void *mem_block, uint32_t mem_size)
9697
}
9798
}
9899

100+
uint64_t
101+
rb_yjit_vm_insns_count(void) {
102+
return rb_vm_insns_count;
103+
}
104+
99105
// Free the specified memory block.
100106
bool
101107
rb_yjit_mark_unused(void *mem_block, uint32_t mem_size)

yjit/bindgen/src/main.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ fn main() {
317317
.allowlist_function("rb_yjit_get_page_size")
318318
.allowlist_function("rb_yjit_iseq_builtin_attrs")
319319
.allowlist_function("rb_yjit_iseq_inspect")
320+
.allowlist_function("rb_yjit_vm_insns_count")
320321
.allowlist_function("rb_yjit_builtin_function")
321322
.allowlist_function("rb_set_cfp_(pc|sp)")
322323
.allowlist_function("rb_yjit_multi_ractor_p")
@@ -380,9 +381,6 @@ fn main() {
380381
.allowlist_function("rb_ivar_get")
381382
.allowlist_function("rb_mod_name")
382383

383-
// From internal/vm.h
384-
.allowlist_var("rb_vm_insns_count")
385-
386384
// From include/ruby/internal/intern/vm.h
387385
.allowlist_function("rb_get_alloc_func")
388386

yjit/src/cruby_bindings.inc.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1061,7 +1061,6 @@ extern "C" {
10611061
elts: *const VALUE,
10621062
) -> VALUE;
10631063
pub fn rb_vm_top_self() -> VALUE;
1064-
pub static mut rb_vm_insns_count: u64;
10651064
pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t;
10661065
pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t;
10671066
pub fn rb_callable_method_entry_or_negative(
@@ -1141,6 +1140,7 @@ extern "C" {
11411140
pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void);
11421141
pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
11431142
pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
1143+
pub fn rb_yjit_vm_insns_count() -> u64;
11441144
pub fn rb_yjit_mark_unused(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
11451145
pub fn rb_yjit_array_len(a: VALUE) -> ::std::os::raw::c_long;
11461146
pub fn rb_yjit_icache_invalidate(

yjit/src/stats.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,7 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE {
792792
set_stat_usize!(hash, "context_cache_bytes", crate::core::CTX_ENCODE_CACHE_BYTES + crate::core::CTX_DECODE_CACHE_BYTES);
793793

794794
// VM instructions count
795-
set_stat_usize!(hash, "vm_insns_count", rb_vm_insns_count as usize);
795+
set_stat_usize!(hash, "vm_insns_count", rb_yjit_vm_insns_count() as usize);
796796

797797
set_stat_usize!(hash, "live_iseq_count", rb_yjit_live_iseq_count as usize);
798798
set_stat_usize!(hash, "iseq_alloc_count", rb_yjit_iseq_alloc_count as usize);
@@ -862,7 +862,7 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE {
862862
set_stat_double!(hash, "avg_len_in_yjit", avg_len_in_yjit);
863863

864864
// Proportion of instructions that retire in YJIT
865-
let total_insns_count = retired_in_yjit + rb_vm_insns_count;
865+
let total_insns_count = retired_in_yjit + rb_yjit_vm_insns_count();
866866
set_stat_usize!(hash, "total_insns_count", total_insns_count as usize);
867867

868868
let ratio_in_yjit: f64 = 100.0 * retired_in_yjit as f64 / total_insns_count as f64;

0 commit comments

Comments
 (0)