Skip to content

Commit 3e57c4d

Browse files
XrXrk0kubun
authored andcommitted
Strength reduce to CCall for sends landing in simple C methods
A new optimization pass. Also: - Printing for `Insn::CCall` - Wrap `ID` and add convenience method for printing, replacing calls to rb_id2name()
1 parent 97ba8d9 commit 3e57c4d

File tree

9 files changed

+333
-30
lines changed

9 files changed

+333
-30
lines changed

test/ruby/test_zjit.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@
99
return unless JITSupport.zjit_supported?
1010

1111
class TestZJIT < Test::Unit::TestCase
12+
def test_call_itself
13+
assert_compiles '42', <<~RUBY, call_threshold: 2
14+
def test = 42.itself
15+
test
16+
test
17+
RUBY
18+
end
19+
1220
def test_nil
1321
assert_compiles 'nil', %q{
1422
def test = nil

zjit/bindgen/src/main.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,9 +225,9 @@ fn main() {
225225
.allowlist_function("rb_intern")
226226
.allowlist_function("rb_intern2")
227227
.allowlist_function("rb_id2sym")
228-
.allowlist_function("rb_id2name")
229228
.allowlist_function("rb_sym2id")
230229
.allowlist_function("rb_str_intern")
230+
.allowlist_function("rb_id2str")
231231

232232
// From internal/numeric.h
233233
.allowlist_function("rb_fix_aref")
@@ -470,8 +470,9 @@ fn main() {
470470
.allowlist_function("rb_ec_stack_check")
471471
.allowlist_function("rb_vm_top_self")
472472

473-
// We define VALUE manually, don't import it
473+
// We define these manually, don't import them
474474
.blocklist_type("VALUE")
475+
.blocklist_type("ID")
475476

476477
// From iseq.h
477478
.opaque_type("rb_iseq_t")

zjit/src/codegen.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ fn gen_insn(jit: &mut JITState, asm: &mut Assembler, function: &Function, insn_i
203203
Insn::GuardType { val, guard_type, state } => gen_guard_type(asm, opnd!(val), *guard_type, &function.frame_state(*state))?,
204204
Insn::GuardBitEquals { val, expected, state } => gen_guard_bit_equals(asm, opnd!(val), *expected, &function.frame_state(*state))?,
205205
Insn::PatchPoint(_) => return Some(()), // For now, rb_zjit_bop_redefined() panics. TODO: leave a patch point and fix rb_zjit_bop_redefined()
206+
Insn::CCall { cfun, args, name: _ } => gen_ccall(jit, asm, *cfun, args)?,
206207
_ => {
207208
debug!("ZJIT: gen_function: unexpected insn {:?}", insn);
208209
return None;
@@ -215,6 +216,17 @@ fn gen_insn(jit: &mut JITState, asm: &mut Assembler, function: &Function, insn_i
215216
Some(())
216217
}
217218

219+
/// Lowering for [`Insn::CCall`]. This is a low-level raw call that doesn't know
220+
/// anything about the callee, so handling for e.g. GC safety is dealt with elsewhere.
221+
fn gen_ccall(jit: &mut JITState, asm: &mut Assembler, cfun: *const u8, args: &[InsnId]) -> Option<lir::Opnd> {
222+
let mut lir_args = Vec::with_capacity(args.len());
223+
for &arg in args {
224+
lir_args.push(jit.get_opnd(arg)?);
225+
}
226+
227+
Some(asm.ccall(cfun, lir_args))
228+
}
229+
218230
/// Compile an interpreter entry block to be inserted into an ISEQ
219231
fn gen_entry_prologue(iseq: IseqPtr, asm: &mut Assembler) {
220232
asm_comment!(asm, "ZJIT entry point: {}", iseq_get_location(iseq, 0));

zjit/src/cruby.rs

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,12 @@ pub struct rb_iseq_t {
250250
#[repr(transparent)] // same size and alignment as simply `usize`
251251
pub struct VALUE(pub usize);
252252

253+
/// An interned string. See [ids] and methods this type.
254+
/// `0` is a sentinal value for IDs.
255+
#[repr(transparent)]
256+
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
257+
pub struct ID(pub ::std::os::raw::c_ulong);
258+
253259
/// Pointer to an ISEQ
254260
pub type IseqPtr = *const rb_iseq_t;
255261

@@ -455,6 +461,18 @@ impl VALUE {
455461
}
456462
}
457463

464+
/// Borrow the string contents of `self`. Rust unsafe because of possible mutation and GC
465+
/// interactions.
466+
pub unsafe fn as_rstring_byte_slice<'a>(self) -> Option<&'a [u8]> {
467+
if !unsafe { RB_TYPE_P(self, RUBY_T_STRING) } {
468+
None
469+
} else {
470+
let str_ptr = unsafe { rb_RSTRING_PTR(self) } as *const u8;
471+
let str_len: usize = unsafe { rb_RSTRING_LEN(self) }.try_into().ok()?;
472+
Some(unsafe { std::slice::from_raw_parts(str_ptr, str_len) })
473+
}
474+
}
475+
458476
pub fn is_frozen(self) -> bool {
459477
unsafe { rb_obj_frozen_p(self) != VALUE(0) }
460478
}
@@ -637,6 +655,28 @@ impl From<VALUE> for u16 {
637655
}
638656
}
639657

658+
impl ID {
659+
// Get a debug representation of the contents of the ID. Since `str` is UTF-8
660+
// and IDs have encodings that are not, this is a lossy representation.
661+
pub fn contents_lossy(&self) -> std::borrow::Cow<'_, str> {
662+
use std::borrow::Cow;
663+
if self.0 == 0 {
664+
Cow::Borrowed("ID(0)")
665+
} else {
666+
// Get the contents as a byte slice. IDs can have internal NUL bytes so rb_id2name,
667+
// which returns a C string is more lossy than this approach.
668+
let contents = unsafe { rb_id2str(*self) };
669+
if contents == Qfalse {
670+
Cow::Borrowed("ID(0)")
671+
} else {
672+
let slice = unsafe { contents.as_rstring_byte_slice() }
673+
.expect("rb_id2str() returned truthy non-string");
674+
String::from_utf8_lossy(slice)
675+
}
676+
}
677+
}
678+
}
679+
640680
/// Produce a Ruby string from a Rust string slice
641681
pub fn rust_str_to_ruby(str: &str) -> VALUE {
642682
unsafe { rb_utf8_str_new(str.as_ptr() as *const _, str.len() as i64) }
@@ -1140,7 +1180,7 @@ pub(crate) mod ids {
11401180

11411181
// Lookup and cache each ID
11421182
$name.store(
1143-
unsafe { $crate::cruby::rb_intern2(ptr.cast(), content.len() as _) },
1183+
unsafe { $crate::cruby::rb_intern2(ptr.cast(), content.len() as _) }.0,
11441184
std::sync::atomic::Ordering::Relaxed
11451185
);
11461186
)*
@@ -1159,14 +1199,15 @@ pub(crate) mod ids {
11591199
name: compile
11601200
name: eval
11611201
}
1162-
}
11631202

1164-
/// Get an CRuby `ID` to an interned string, e.g. a particular method name.
1165-
macro_rules! ID {
1166-
($id_name:ident) => {{
1167-
let id = $crate::cruby::ids::$id_name.load(std::sync::atomic::Ordering::Relaxed);
1168-
debug_assert_ne!(0, id, "ids module should be initialized");
1169-
id
1170-
}}
1203+
/// Get an CRuby `ID` to an interned string, e.g. a particular method name.
1204+
macro_rules! ID {
1205+
($id_name:ident) => {{
1206+
let id = $crate::cruby::ids::$id_name.load(std::sync::atomic::Ordering::Relaxed);
1207+
debug_assert_ne!(0, id, "ids module should be initialized");
1208+
ID(id)
1209+
}}
1210+
}
1211+
pub(crate) use ID;
11711212
}
1172-
pub(crate) use ID;
1213+
pub(crate) use ids::ID;

zjit/src/cruby_bindings.inc.rs

Lines changed: 1 addition & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

zjit/src/cruby_methods.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*! This module contains assertions we make about runtime properties of core library methods.
2+
* Some properties that influence codegen:
3+
* - Whether the method has been redefined since boot
4+
* - Whether the C method can yield to the GC
5+
* - Whether the C method makes any method calls
6+
*
7+
* For Ruby methods, many of these properties can be inferred through analyzing the
8+
* bytecode, but for C methods we resort to annotation and validation in debug builds.
9+
*/
10+
11+
use crate::cruby::*;
12+
use std::collections::HashMap;
13+
use std::ffi::c_void;
14+
15+
pub struct Annotations {
16+
cfuncs: HashMap<*mut c_void, FnProperties>,
17+
}
18+
19+
/// Runtime behaviors of C functions that implement a Ruby method
20+
#[derive(Clone, Copy, Default)]
21+
pub struct FnProperties {
22+
/// Whether it's possible for the function to yield to the GC
23+
pub no_gc: bool,
24+
/// Whether it's possible for the function to make a ruby call
25+
pub leaf: bool,
26+
}
27+
28+
impl Annotations {
29+
/// Query about properties of a C method
30+
pub fn get_cfunc_properties(&self, method: *const rb_callable_method_entry_t) -> Option<FnProperties> {
31+
let fn_ptr = unsafe {
32+
if VM_METHOD_TYPE_CFUNC != get_cme_def_type(method) {
33+
return None;
34+
}
35+
get_mct_func(get_cme_def_body_cfunc(method.cast()))
36+
};
37+
self.cfuncs.get(&fn_ptr).copied()
38+
}
39+
}
40+
41+
fn annotate_c_method(props_map: &mut HashMap<*mut c_void, FnProperties>, class: VALUE, method_name: &'static str, props: FnProperties) {
42+
// Lookup function pointer of the C method
43+
let fn_ptr = unsafe {
44+
// TODO(alan): (side quest) make rust methods and clean up glue code for rb_method_cfunc_t and
45+
// rb_method_definition_t.
46+
let method_id = rb_intern2(method_name.as_ptr().cast(), method_name.len() as _);
47+
let method = rb_method_entry_at(class, method_id);
48+
assert!(!method.is_null());
49+
// ME-to-CME cast is fine due to identical layout
50+
debug_assert_eq!(VM_METHOD_TYPE_CFUNC, get_cme_def_type(method.cast()));
51+
get_mct_func(get_cme_def_body_cfunc(method.cast()))
52+
};
53+
54+
props_map.insert(fn_ptr, props);
55+
}
56+
57+
/// Gather annotations. Run this right after boot since the annotations
58+
/// are about the stock versions of methods.
59+
pub fn init() -> Annotations {
60+
let cfuncs = &mut HashMap::new();
61+
62+
macro_rules! annotate {
63+
($module:ident, $method_name:literal, $($properties:ident),+) => {
64+
let mut props = FnProperties::default();
65+
$(
66+
props.$properties = true;
67+
)+
68+
annotate_c_method(cfuncs, unsafe { $module }, $method_name, props);
69+
}
70+
}
71+
72+
annotate!(rb_mKernel, "itself", no_gc, leaf);
73+
annotate!(rb_cString, "bytesize", no_gc, leaf);
74+
annotate!(rb_cModule, "name", no_gc, leaf);
75+
annotate!(rb_cModule, "===", no_gc, leaf);
76+
77+
Annotations {
78+
cfuncs: std::mem::take(cfuncs)
79+
}
80+
}

0 commit comments

Comments
 (0)