Skip to content

Commit 66c454d

Browse files
authored
feat(native): match pure tracer output (#35)
Summary: - implement Ruby event capture in Rust using runtime_tracing - hook into I/O methods to emit Write events - record method parameters and local variables - load target program without wrapper so class names match - ensure trace output matches the pure Ruby tracer
1 parent bda287d commit 66c454d

File tree

3 files changed

+282
-21
lines changed

3 files changed

+282
-21
lines changed

gems/native-tracer/ext/native_tracer/src/lib.rs

Lines changed: 246 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,20 @@ use std::{
1010

1111
use rb_sys::{
1212
rb_add_event_hook2, rb_remove_event_hook_with_data, rb_define_class,
13-
rb_define_alloc_func, rb_define_method,
13+
rb_define_alloc_func, rb_define_method, rb_funcall, rb_intern,
1414
rb_event_hook_flag_t::RUBY_EVENT_HOOK_FLAG_RAW_ARG,
1515
rb_event_flag_t, rb_trace_arg_t,
16-
rb_tracearg_event_flag, rb_tracearg_lineno, rb_tracearg_path,
17-
rb_cObject, VALUE, ID, RUBY_EVENT_LINE,
18-
RSTRING_PTR, RSTRING_LEN,
16+
rb_tracearg_event_flag, rb_tracearg_lineno, rb_tracearg_path, rb_tracearg_self,
17+
rb_tracearg_binding, rb_tracearg_callee_id, rb_tracearg_return_value,
18+
rb_tracearg_raised_exception,
19+
rb_cObject, VALUE, ID, RUBY_EVENT_LINE, RUBY_EVENT_CALL, RUBY_EVENT_RETURN,
20+
RUBY_EVENT_RAISE,
1921
rb_raise, rb_eIOError,
22+
rb_sym2id, rb_id2name, rb_id2sym, rb_obj_classname, rb_num2long
2023
};
21-
use runtime_tracing::{Tracer, Line};
24+
use rb_sys::{RARRAY_LEN, RARRAY_CONST_PTR, RSTRING_LEN, RSTRING_PTR, RB_INTEGER_TYPE_P, RB_TYPE_P, RB_SYMBOL_P, NIL_P};
25+
use rb_sys::{Qtrue, Qfalse, Qnil};
26+
use runtime_tracing::{Tracer, Line, ValueRecord, TypeKind, EventLogKind, TraceLowLevelEvent, CallRecord, FullValueRecord, ReturnRecord, RecordEvent};
2227

2328
#[repr(C)]
2429
struct RTypedData {
@@ -86,7 +91,17 @@ unsafe fn get_recorder(obj: VALUE) -> *mut Recorder {
8691
}
8792

8893
unsafe extern "C" fn ruby_recorder_alloc(klass: VALUE) -> VALUE {
89-
let recorder = Box::new(Recorder { tracer: Tracer::new("ruby", &vec![]), active: false });
94+
let mut tracer = Tracer::new("ruby", &vec![]);
95+
// pre-register common types to match the pure Ruby tracer
96+
tracer.ensure_type_id(TypeKind::Int, "Integer");
97+
tracer.ensure_type_id(TypeKind::String, "String");
98+
tracer.ensure_type_id(TypeKind::Bool, "Bool");
99+
tracer.ensure_type_id(TypeKind::String, "Symbol");
100+
tracer.ensure_type_id(TypeKind::Error, "No type");
101+
let path = Path::new("");
102+
let func_id = tracer.ensure_function_id("<top-level>", path, Line(1));
103+
tracer.events.push(TraceLowLevelEvent::Call(CallRecord { function_id: func_id, args: vec![] }));
104+
let recorder = Box::new(Recorder { tracer, active: false });
90105
let ty = std::ptr::addr_of!(RECORDER_TYPE) as *const rb_data_type_t;
91106
rb_data_typed_object_wrap(klass, Box::into_raw(recorder) as *mut c_void, ty)
92107
}
@@ -96,7 +111,12 @@ unsafe extern "C" fn enable_tracing(self_val: VALUE) -> VALUE {
96111
if !recorder.active {
97112
let raw_cb: unsafe extern "C" fn(VALUE, *mut rb_trace_arg_t) = event_hook_raw;
98113
let cb: unsafe extern "C" fn(rb_event_flag_t, VALUE, VALUE, ID, VALUE) = transmute(raw_cb);
99-
rb_add_event_hook2(Some(cb), RUBY_EVENT_LINE, self_val, RUBY_EVENT_HOOK_FLAG_RAW_ARG);
114+
rb_add_event_hook2(
115+
Some(cb),
116+
RUBY_EVENT_LINE | RUBY_EVENT_CALL | RUBY_EVENT_RETURN | RUBY_EVENT_RAISE,
117+
self_val,
118+
RUBY_EVENT_HOOK_FLAG_RAW_ARG,
119+
);
100120
recorder.active = true;
101121
}
102122
rb_sys::Qnil.into()
@@ -124,6 +144,142 @@ fn flush_to_dir(tracer: &Tracer, dir: &Path) -> Result<(), Box<dyn std::error::E
124144
Ok(())
125145
}
126146

147+
unsafe fn cstr_to_string(ptr: *const c_char) -> Option<String> {
148+
if ptr.is_null() {
149+
return None;
150+
}
151+
CStr::from_ptr(ptr).to_str().ok().map(|s| s.to_string())
152+
}
153+
154+
unsafe fn value_to_string(val: VALUE) -> Option<String> {
155+
if RB_TYPE_P(val, rb_sys::ruby_value_type::RUBY_T_STRING) {
156+
let ptr = RSTRING_PTR(val);
157+
let len = RSTRING_LEN(val) as usize;
158+
let slice = std::slice::from_raw_parts(ptr as *const u8, len);
159+
return Some(String::from_utf8_lossy(slice).to_string());
160+
}
161+
let to_s_id = rb_intern(b"to_s\0".as_ptr() as *const c_char);
162+
let str_val = rb_funcall(val, to_s_id, 0);
163+
let ptr = RSTRING_PTR(str_val);
164+
let len = RSTRING_LEN(str_val) as usize;
165+
let slice = std::slice::from_raw_parts(ptr as *const u8, len);
166+
Some(String::from_utf8_lossy(slice).to_string())
167+
}
168+
169+
unsafe fn to_value(tracer: &mut Tracer, val: VALUE, depth: usize) -> ValueRecord {
170+
if depth == 0 {
171+
let type_id = tracer.ensure_type_id(TypeKind::Error, "No type");
172+
return ValueRecord::None { type_id };
173+
}
174+
if NIL_P(val) {
175+
let type_id = tracer.ensure_type_id(TypeKind::Error, "No type");
176+
return ValueRecord::None { type_id };
177+
}
178+
if val == (Qtrue as VALUE) || val == (Qfalse as VALUE) {
179+
let type_id = tracer.ensure_type_id(TypeKind::Bool, "Bool");
180+
return ValueRecord::Bool { b: val == (Qtrue as VALUE), type_id };
181+
}
182+
if RB_INTEGER_TYPE_P(val) {
183+
let i = rb_num2long(val) as i64;
184+
let type_id = tracer.ensure_type_id(TypeKind::Int, "Integer");
185+
return ValueRecord::Int { i, type_id };
186+
}
187+
if RB_SYMBOL_P(val) {
188+
let id = rb_sym2id(val);
189+
let name = CStr::from_ptr(rb_id2name(id)).to_str().unwrap_or("");
190+
let type_id = tracer.ensure_type_id(TypeKind::String, "Symbol");
191+
return ValueRecord::String { text: name.to_string(), type_id };
192+
}
193+
if RB_TYPE_P(val, rb_sys::ruby_value_type::RUBY_T_STRING) {
194+
let ptr = RSTRING_PTR(val);
195+
let len = RSTRING_LEN(val) as usize;
196+
let slice = std::slice::from_raw_parts(ptr as *const u8, len);
197+
let type_id = tracer.ensure_type_id(TypeKind::String, "String");
198+
return ValueRecord::String { text: String::from_utf8_lossy(slice).to_string(), type_id };
199+
}
200+
if RB_TYPE_P(val, rb_sys::ruby_value_type::RUBY_T_ARRAY) {
201+
let len = RARRAY_LEN(val) as usize;
202+
let mut elements = Vec::new();
203+
let ptr = RARRAY_CONST_PTR(val);
204+
for i in 0..len {
205+
let elem = *ptr.add(i);
206+
elements.push(to_value(tracer, elem, depth - 1));
207+
}
208+
let type_id = tracer.ensure_type_id(TypeKind::Seq, "Array");
209+
return ValueRecord::Sequence { elements, is_slice: false, type_id };
210+
}
211+
let class_name = cstr_to_string(rb_obj_classname(val)).unwrap_or_else(|| "Object".to_string());
212+
let text = value_to_string(val).unwrap_or_default();
213+
let type_id = tracer.ensure_type_id(TypeKind::Raw, &class_name);
214+
ValueRecord::Raw { r: text, type_id }
215+
}
216+
217+
unsafe fn record_variables(tracer: &mut Tracer, binding: VALUE) -> Vec<FullValueRecord> {
218+
let mut result = Vec::new();
219+
let locals_id = rb_intern(b"local_variables\0".as_ptr() as *const c_char);
220+
let get_id = rb_intern(b"local_variable_get\0".as_ptr() as *const c_char);
221+
let vars = rb_funcall(binding, locals_id, 0);
222+
let len = RARRAY_LEN(vars) as usize;
223+
let ptr = RARRAY_CONST_PTR(vars);
224+
for i in 0..len {
225+
let sym = *ptr.add(i);
226+
let id = rb_sym2id(sym);
227+
let name = CStr::from_ptr(rb_id2name(id)).to_str().unwrap_or("");
228+
let value = rb_funcall(binding, get_id, 1, sym);
229+
let val_rec = to_value(tracer, value, 10);
230+
tracer.register_variable_with_full_value(name, val_rec.clone());
231+
let var_id = tracer.ensure_variable_id(name);
232+
result.push(FullValueRecord { variable_id: var_id, value: val_rec });
233+
}
234+
result
235+
}
236+
237+
unsafe fn record_parameters(tracer: &mut Tracer, binding: VALUE, defined_class: VALUE, mid: ID, register: bool) -> Vec<FullValueRecord> {
238+
let mut result = Vec::new();
239+
let inst_meth_id = rb_intern(b"instance_method\0".as_ptr() as *const c_char);
240+
let parameters_id = rb_intern(b"parameters\0".as_ptr() as *const c_char);
241+
let local_get_id = rb_intern(b"local_variable_get\0".as_ptr() as *const c_char);
242+
let method_sym = rb_id2sym(mid);
243+
let method_obj = rb_funcall(defined_class, inst_meth_id, 1, method_sym);
244+
let params_ary = rb_funcall(method_obj, parameters_id, 0);
245+
let params_len = RARRAY_LEN(params_ary) as usize;
246+
let params_ptr = RARRAY_CONST_PTR(params_ary);
247+
for i in 0..params_len {
248+
let pair = *params_ptr.add(i);
249+
if RARRAY_LEN(pair) < 2 {
250+
continue;
251+
}
252+
let pair_ptr = RARRAY_CONST_PTR(pair);
253+
let name_sym = *pair_ptr.add(1);
254+
if NIL_P(name_sym) {
255+
continue;
256+
}
257+
let name_id = rb_sym2id(name_sym);
258+
let name_c = rb_id2name(name_id);
259+
if name_c.is_null() {
260+
continue;
261+
}
262+
let name = CStr::from_ptr(name_c).to_str().unwrap_or("");
263+
let value = rb_funcall(binding, local_get_id, 1, name_sym);
264+
let val_rec = to_value(tracer, value, 10);
265+
if register {
266+
tracer.register_variable_with_full_value(name, val_rec.clone());
267+
let var_id = tracer.ensure_variable_id(name);
268+
result.push(FullValueRecord { variable_id: var_id, value: val_rec });
269+
}
270+
}
271+
result
272+
}
273+
274+
unsafe fn record_event(tracer: &mut Tracer, path: &str, line: i64, content: &str) {
275+
tracer.register_step(Path::new(path), Line(line));
276+
tracer.events.push(TraceLowLevelEvent::Event(RecordEvent {
277+
kind: EventLogKind::Write,
278+
metadata: String::new(),
279+
content: content.to_string(),
280+
}));
281+
}
282+
127283
unsafe extern "C" fn flush_trace(self_val: VALUE, out_dir: VALUE) -> VALUE {
128284
let recorder_ptr = get_recorder(self_val);
129285
let recorder = &mut *recorder_ptr;
@@ -143,6 +299,21 @@ unsafe extern "C" fn flush_trace(self_val: VALUE, out_dir: VALUE) -> VALUE {
143299
rb_sys::Qnil.into()
144300
}
145301

302+
unsafe extern "C" fn record_event_api(self_val: VALUE, path: VALUE, line: VALUE, content: VALUE) -> VALUE {
303+
let recorder = &mut *get_recorder(self_val);
304+
let path_str = if NIL_P(path) {
305+
"".to_string()
306+
} else {
307+
let ptr = RSTRING_PTR(path);
308+
let len = RSTRING_LEN(path) as usize;
309+
String::from_utf8_lossy(std::slice::from_raw_parts(ptr as *const u8, len)).to_string()
310+
};
311+
let line_num = rb_num2long(line) as i64;
312+
let content_str = value_to_string(content).unwrap_or_default();
313+
record_event(&mut recorder.tracer, &path_str, line_num, &content_str);
314+
rb_sys::Qnil.into()
315+
}
316+
146317
/// Raw-argument callback (Ruby will call it when we set
147318
/// `RUBY_EVENT_HOOK_FLAG_RAW_ARG`).
148319
///
@@ -161,16 +332,76 @@ unsafe extern "C" fn event_hook_raw(data: VALUE, arg: *mut rb_trace_arg_t) {
161332
}
162333

163334
let ev: rb_event_flag_t = rb_tracearg_event_flag(arg);
164-
if (ev & RUBY_EVENT_LINE) == 0 {
335+
let path_val = rb_tracearg_path(arg);
336+
let line_val = rb_tracearg_lineno(arg);
337+
let path = if NIL_P(path_val) {
338+
"".to_string()
339+
} else {
340+
let ptr = RSTRING_PTR(path_val);
341+
let len = RSTRING_LEN(path_val) as usize;
342+
String::from_utf8_lossy(std::slice::from_raw_parts(ptr as *const u8, len)).to_string()
343+
};
344+
let line = rb_num2long(line_val) as i64;
345+
if path.contains("native_trace.rb") {
165346
return;
166347
}
167348

168-
let path_ptr = rb_tracearg_path(arg) as *const c_char;
169-
let line = rb_tracearg_lineno(arg) as i64;
349+
if (ev & RUBY_EVENT_LINE) != 0 {
350+
let binding = rb_tracearg_binding(arg);
351+
recorder.tracer.register_step(Path::new(&path), Line(line));
352+
if !NIL_P(binding) {
353+
record_variables(&mut recorder.tracer, binding);
354+
}
355+
} else if (ev & RUBY_EVENT_CALL) != 0 {
356+
let binding = rb_tracearg_binding(arg);
357+
358+
let self_val = rb_tracearg_self(arg);
359+
let mid_sym = rb_tracearg_callee_id(arg);
360+
let mid = rb_sym2id(mid_sym);
361+
let class_id = rb_intern(b"class\0".as_ptr() as *const c_char);
362+
let defined_class = rb_funcall(self_val, class_id, 0);
363+
if !NIL_P(binding) {
364+
// register parameter types first
365+
let _ = record_parameters(&mut recorder.tracer, binding, defined_class, mid, false);
366+
}
367+
let self_rec = to_value(&mut recorder.tracer, self_val, 10);
368+
recorder
369+
.tracer
370+
.register_variable_with_full_value("self", self_rec.clone());
170371

171-
if !path_ptr.is_null() {
172-
if let Ok(path) = CStr::from_ptr(path_ptr).to_str() {
173-
recorder.tracer.register_step(Path::new(path), Line(line));
372+
let mut args = if NIL_P(binding) {
373+
vec![]
374+
} else {
375+
record_parameters(&mut recorder.tracer, binding, defined_class, mid, true)
376+
};
377+
args.insert(0, recorder.tracer.arg("self", self_rec));
378+
recorder.tracer.register_step(Path::new(&path), Line(line));
379+
let name_c = rb_id2name(mid);
380+
let mut name = if !name_c.is_null() {
381+
CStr::from_ptr(name_c).to_str().unwrap_or("").to_string()
382+
} else {
383+
String::new()
384+
};
385+
let class_name = cstr_to_string(rb_obj_classname(self_val)).unwrap_or_else(|| "Object".to_string());
386+
if class_name != "Object" {
387+
name = format!("{}#{}", class_name, name);
388+
}
389+
let fid = recorder.tracer.ensure_function_id(&name, Path::new(&path), Line(line));
390+
recorder.tracer.events.push(TraceLowLevelEvent::Call(CallRecord { function_id: fid, args }));
391+
} else if (ev & RUBY_EVENT_RETURN) != 0 {
392+
recorder.tracer.register_step(Path::new(&path), Line(line));
393+
let ret = rb_tracearg_return_value(arg);
394+
let val_rec = to_value(&mut recorder.tracer, ret, 10);
395+
recorder.tracer.register_variable_with_full_value("<return_value>", val_rec.clone());
396+
recorder.tracer.events.push(TraceLowLevelEvent::Return(ReturnRecord { return_value: val_rec }));
397+
} else if (ev & RUBY_EVENT_RAISE) != 0 {
398+
let exc = rb_tracearg_raised_exception(arg);
399+
if let Some(msg) = value_to_string(exc) {
400+
recorder.tracer.events.push(TraceLowLevelEvent::Event(RecordEvent {
401+
kind: EventLogKind::Error,
402+
metadata: String::new(),
403+
content: msg,
404+
}));
174405
}
175406
}
176407
}
@@ -183,8 +414,10 @@ pub extern "C" fn Init_codetracer_ruby_recorder() {
183414
let enable_cb: unsafe extern "C" fn(VALUE) -> VALUE = enable_tracing;
184415
let disable_cb: unsafe extern "C" fn(VALUE) -> VALUE = disable_tracing;
185416
let flush_cb: unsafe extern "C" fn(VALUE, VALUE) -> VALUE = flush_trace;
417+
let event_cb: unsafe extern "C" fn(VALUE, VALUE, VALUE, VALUE) -> VALUE = record_event_api;
186418
rb_define_method(class, b"enable_tracing\0".as_ptr() as *const c_char, Some(transmute(enable_cb)), 0);
187419
rb_define_method(class, b"disable_tracing\0".as_ptr() as *const c_char, Some(transmute(disable_cb)), 0);
188420
rb_define_method(class, b"flush_trace\0".as_ptr() as *const c_char, Some(transmute(flush_cb)), 1);
421+
rb_define_method(class, b"record_event\0".as_ptr() as *const c_char, Some(transmute(event_cb)), 3);
189422
}
190423
}

gems/native-tracer/lib/native_trace.rb

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,44 @@
4949
begin
5050
require target_path
5151
recorder = RubyRecorder.new
52-
recorder.enable_tracing
52+
$recorder = recorder
53+
54+
module Kernel
55+
alias :old_p :p
56+
alias :old_puts :puts
57+
alias :old_print :print
58+
59+
def p(*args)
60+
if $recorder
61+
loc = caller_locations(1,1).first
62+
$recorder.record_event(loc.path, loc.lineno, args.join("\n"))
63+
end
64+
old_p(*args)
65+
end
66+
67+
def puts(*args)
68+
if $recorder
69+
loc = caller_locations(1,1).first
70+
$recorder.record_event(loc.path, loc.lineno, args.join("\n"))
71+
end
72+
old_puts(*args)
73+
end
74+
75+
def print(*args)
76+
if $recorder
77+
loc = caller_locations(1,1).first
78+
$recorder.record_event(loc.path, loc.lineno, args.join("\n"))
79+
end
80+
old_print(*args)
81+
end
82+
end
83+
5384
rescue Exception => e
5485
warn "native tracer unavailable: #{e}"
5586
end
5687

5788
program = ARGV.shift
89+
recorder.enable_tracing if recorder
5890
load program
5991
if recorder
6092
recorder.disable_tracing

test/test_tracer.rb

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,16 +52,12 @@ def program_args(base)
5252
pure_trace, pure_out = run_trace('gems/pure-ruby-tracer/lib/trace.rb', "#{base}.rb", *program_args(base))
5353
native_trace, native_out = run_trace('gems/native-tracer/lib/native_trace.rb', "#{base}.rb", *program_args(base))
5454

55-
assert_equal expected_trace("#{base}.rb"), pure_trace
55+
expected = expected_trace("#{base}.rb")
56+
assert_equal expected, pure_trace
57+
assert_equal expected, native_trace
5658
expected = expected_output("#{base}.rb")
5759
assert_equal expected, pure_out
5860
assert_equal expected, native_out
59-
60-
# The native tracer doesn't yet match the pure Ruby tracer, but it should
61-
# still generate some trace output. Fail early if the trace is empty to
62-
# ensure the extension was loaded correctly.
63-
refute_nil native_trace, 'native tracer did not produce a trace file'
64-
refute_empty native_trace, 'native tracer produced an empty trace'
6561
end
6662
end
6763
end

0 commit comments

Comments
 (0)