diff --git a/listing-001.md b/listing-001.md new file mode 100644 index 0000000..63d5d67 --- /dev/null +++ b/listing-001.md @@ -0,0 +1,330 @@ +# Listing 001 + +This listing introduces the development environment for **codetracer-ruby-recorder**. We review documentation for installation and environment variables (`README.md`), project dependencies (`Gemfile`), build and test tasks (`Rakefile` and `Justfile`), and then walk through the primary `RubyRecorder` class that powers the native tracer. + +**Project heading states this gem records Ruby programs to produce CodeTracer traces.** +```markdown +## codetracer-ruby-recorder + +A recorder of Ruby programs that produces [CodeTracer](https://github.com/metacraft-labs/CodeTracer) traces. +``` + +**Installation instructions show gem installation and fallback to pure Ruby version.** +```bash +gem install codetracer-ruby-recorder +gem install codetracer-pure-ruby-recorder +``` + +**Environment variables toggle debug logging and specify trace output directory.** +```markdown +* if you pass `CODETRACER_RUBY_RECORDER_DEBUG=1`, you enable some additional debug-related logging +* `CODETRACER_RUBY_RECORDER_OUT_DIR` can be used to specify the directory for trace files +``` + +**Development setup suggests installing debugging gems and running tests manually.** +```bash +gem install debug pry +ruby -I lib -I test test/test_tracer.rb +``` + +**Gemfile sets source and references both native and pure-Ruby recorder gems locally.** +```ruby +# frozen_string_literal: true +source "https://rubygems.org" + +gem "codetracer-ruby-recorder", path: "gems/codetracer-ruby-recorder" +gem "codetracer-pure-ruby-recorder", path: "gems/codetracer-pure-ruby-recorder" +``` + +**Optional development gems for debugging are commented out; rubocop is included for development.** +```ruby +# gem "debug", "~> 1.7" # Ruby debugging with rdbg +# gem "pry", "~> 0.14" # Interactive debugging and REPL +gem "rubocop", "~> 1.77", :group => :development +``` + +**Rakefile loads rb_sys extension task.** +```ruby +require 'rb_sys/extensiontask' +``` + +**Extension task configuration specifies build and library directories.** +```ruby +RbSys::ExtensionTask.new('codetracer_ruby_recorder') do |ext| + ext.ext_dir = 'gems/codetracer-ruby-recorder/ext/native_tracer' + ext.lib_dir = 'gems/codetracer-ruby-recorder/lib' + ext.gem_spec = Gem::Specification.load('gems/codetracer-ruby-recorder/codetracer-ruby-recorder.gemspec') +end +``` + +**Alias for running tests and the test command executes installation checks and unit tests.** +```make +alias t := test +test: + ruby -Itest test/gem_installation.rb + ruby -Itest -e 'Dir["test/test_*.rb"].each { |f| require File.expand_path(f) }' +``` + +**Benchmark task runs benchmarks with pattern and report options.** +```make +bench pattern="*" write_report="console": + ruby test/benchmarks/run_benchmarks.rb '{{pattern}}' --write-report={{write_report}} +``` + +**Build native extension via Cargo.** +```make +build-extension: + cargo build --release --manifest-path gems/codetracer-ruby-recorder/ext/native_tracer/Cargo.toml +``` + +**Formatting tasks for Rust, Nix, and Ruby.** +```make +format-rust: + cargo fmt --manifest-path gems/codetracer-ruby-recorder/ext/native_tracer/Cargo.toml + +format-nix: + if command -v nixfmt >/dev/null; then find . -name '*.nix' -print0 | xargs -0 nixfmt; fi + +format-ruby: + if command -v bundle >/dev/null && bundle exec rubocop -v >/dev/null 2>&1; then bundle exec rubocop -A; else echo "Ruby formatter not available; skipping"; fi +``` + +**Aggregate formatting and linting tasks with an alias.** +```make +format: + just format-rust + just format-ruby + just format-nix + +lint-rust: + cargo fmt --check --manifest-path gems/codetracer-ruby-recorder/ext/native_tracer/Cargo.toml + +lint-nix: + if command -v nixfmt >/dev/null; then find . -name '*.nix' -print0 | xargs -0 nixfmt --check; fi + +lint-ruby: + if command -v bundle >/dev/null && bundle exec rubocop -v >/dev/null 2>&1; then bundle exec rubocop; else echo "rubocop not available; skipping"; fi + +lint: + just lint-rust + just lint-ruby + just lint-nix + +alias fmt := format +``` + +**Header comments declare license and purpose.** +```ruby +# SPDX-License-Identifier: MIT +# Library providing a helper method to execute the native tracer. +``` + +**Load option parsing, file utilities, configuration, and kernel patches.** +```ruby +require 'optparse' +require 'fileutils' +require 'rbconfig' +require_relative 'codetracer/kernel_patches' +``` + +**Define RubyRecorder inside CodeTracer module.** +```ruby +module CodeTracer + class RubyRecorder +``` + +**Begin parsing CLI arguments and set up OptionParser.** +```ruby + def self.parse_argv_and_trace_ruby_file(argv) + options = {} + parser = OptionParser.new do |opts| + opts.banner = 'usage: codetracer-ruby-recorder [options] [args]' +``` + +**Accept output directory and format options.** +```ruby + opts.on('-o DIR', '--out-dir DIR', 'Directory to write trace files') do |dir| + options[:out_dir] = dir + end + opts.on('-f FORMAT', '--format FORMAT', 'trace format: json or binary') do |fmt| + options[:format] = fmt + end +``` + +**Provide help flag and finalize option parsing.** +```ruby + opts.on('-h', '--help', 'Print this help') do + puts opts + exit + end + end + parser.order!(argv) +``` + +**Extract program argument and handle missing program.** +```ruby + program = argv.shift + if program.nil? + $stderr.puts parser + exit 1 + end +``` + +**Capture remaining program arguments and determine output directory and format.** +```ruby + # Remaining arguments after the program name are passed to the traced program + program_args = argv.dup + + out_dir = options[:out_dir] || ENV['CODETRACER_RUBY_RECORDER_OUT_DIR'] || Dir.pwd + format = (options[:format] || 'json').to_sym + trace_ruby_file(program, out_dir, program_args, format) + 0 + end +``` + +**Trace specified Ruby file with selected options.** +```ruby + def self.trace_ruby_file(program, out_dir, program_args = [], format = :json) + recorder = RubyRecorder.new(out_dir, format) + return 1 unless recorder.available? + + ENV['CODETRACER_RUBY_RECORDER_OUT_DIR'] = out_dir +``` + +**Execute program under recorder, adjusting ARGV temporarily.** +```ruby + recorder.start + begin + # Set ARGV to contain the program arguments + original_argv = ARGV.dup + ARGV.clear + ARGV.concat(program_args) + + load program + ensure + # Restore original ARGV + ARGV.clear + ARGV.concat(original_argv) + + recorder.stop + recorder.flush_trace + end + 0 + end +``` + +**Entry point to run CLI logic.** +```ruby + # Execute the native tracer CLI logic with the provided +argv+. + def self.execute(argv) + parse_argv_and_trace_ruby_file(argv) + end +``` + +**Initialize recorder and load native implementation.** +```ruby + def initialize(out_dir, format = :json) + @recorder = nil + @active = false + load_native_recorder(out_dir, format) + end +``` + +**Start recording and apply kernel patches if not already active.** +```ruby + # Start the recorder and install kernel patches + def start + return if @active || @recorder.nil? + + @recorder.enable_tracing + CodeTracer::KernelPatches.install(self) + @active = true + end +``` + +**Stop recording and remove patches.** +```ruby + # Stop the recorder and remove kernel patches + def stop + return unless @active + + CodeTracer::KernelPatches.uninstall(self) + @recorder.disable_tracing if @recorder + @active = false + end +``` + +**Delegate recording events to native recorder.** +```ruby + # Record event for kernel patches integration + def record_event(path, line, content) + @recorder.record_event(path, line, content) if @recorder + end +``` + +**Flush trace data and report availability.** +```ruby + # Flush trace to output directory + def flush_trace + @recorder.flush_trace if @recorder + end + + # Check if recorder is available + def available? + !@recorder.nil? + end +``` + +**Mark following methods as private and begin loading native recorder.** +```ruby + private + + def load_native_recorder(out_dir, format = :json) + begin + # Load native extension at module level +``` + +**Resolve extension directory and target library path based on platform.** +```ruby + ext_dir = File.expand_path('../ext/native_tracer/target/release', __dir__) + dlext = RbConfig::CONFIG['DLEXT'] + target_path = File.join(ext_dir, "codetracer_ruby_recorder.#{dlext}") + unless File.exist?(target_path) + extensions = %w[so bundle dylib dll] +``` + +**Search for alternative library names and create symlink or copy as needed.** +```ruby + alt_path = extensions + .map { |ext| File.join(ext_dir, "libcodetracer_ruby_recorder.#{ext}") } + .find { |path| File.exist?(path) } + if alt_path + begin + File.symlink(alt_path, target_path) + rescue StandardError + FileUtils.cp(alt_path, target_path) + end + end + end +``` + +**Load library and build recorder instance.** +```ruby + require target_path + @recorder = CodeTracerNativeRecorder.new(out_dir, format) +``` + +**On errors, emit warning and fall back to nil recorder.** +```ruby + rescue Exception => e + warn "native tracer unavailable: #{e}" + @recorder = nil + end + end +``` + +**Terminate the RubyRecorder class and CodeTracer module.** +```ruby + end +end +``` diff --git a/listing-002.md b/listing-002.md new file mode 100644 index 0000000..935b799 --- /dev/null +++ b/listing-002.md @@ -0,0 +1,185 @@ +# Listing 002 + +This listing continues the inspection by detailing how runtime I/O is captured and how the gem is packaged. We review the output-hooking helpers in `gems/codetracer-ruby-recorder/lib/codetracer/kernel_patches.rb`, the CLI entry script `gems/codetracer-ruby-recorder/bin/codetracer-ruby-recorder`, and the gem specification `gems/codetracer-ruby-recorder/codetracer-ruby-recorder.gemspec`. + +**File declares MIT license and begins KernelPatches module tracking installed tracers.** +```ruby +# SPDX-License-Identifier: MIT + +module CodeTracer + module KernelPatches + @@tracers = [] +``` + +**Add a tracer unless already present and store it in the class variable.** +```ruby + def self.install(tracer) + return if @@tracers.include?(tracer) + @@tracers << tracer +``` + +**When the first tracer is installed, patch Kernel methods.** +```ruby + if @@tracers.length == 1 + Kernel.module_eval do +``` + +**Alias original I/O methods so they can be restored later.** +```ruby + alias_method :codetracer_original_p, :p unless method_defined?(:codetracer_original_p) + alias_method :codetracer_original_puts, :puts unless method_defined?(:codetracer_original_puts) + alias_method :codetracer_original_print, :print unless method_defined?(:codetracer_original_print) +``` + +**Redefine `p` to compute a printable representation and log it.** +```ruby + define_method(:p) do |*args| + loc = caller_locations(1, 1).first + content = if args.length == 1 && args.first.is_a?(Array) +``` + +**Handle array arguments or multiple values uniformly.** +```ruby + args.first.map(&:inspect).join("\n") + else + args.map(&:inspect).join("\n") + end +``` + +**Record the event with all active tracers before delegating.** +```ruby + @@tracers.each do |t| + t.record_event(loc.path, loc.lineno, content) + end + codetracer_original_p(*args) + end +``` + +**Redefine `puts` to capture line-oriented output.** +```ruby + define_method(:puts) do |*args| + loc = caller_locations(1, 1).first + @@tracers.each do |t| + t.record_event(loc.path, loc.lineno, args.join("\n")) + end +``` + +**Forward `puts` after logging the captured lines.** +```ruby + codetracer_original_puts(*args) + end +``` + +**Redefine `print` to intercept raw output without newlines.** +```ruby + define_method(:print) do |*args| + loc = caller_locations(1, 1).first + @@tracers.each do |t| + t.record_event(loc.path, loc.lineno, args.join) + end +``` + +**Delegate `print` to the original implementation afterward.** +```ruby + codetracer_original_print(*args) + end + end + end + end +``` + +**Remove a tracer and restore Kernel methods when none remain.** +```ruby + def self.uninstall(tracer) + @@tracers.delete(tracer) + + if @@tracers.empty? && Kernel.private_method_defined?(:codetracer_original_p) + Kernel.module_eval do + alias_method :p, :codetracer_original_p + alias_method :puts, :codetracer_original_puts + alias_method :print, :codetracer_original_print + end + end + end +``` + +**Provide helper to uninstall every active tracer.** +```ruby + # Uninstall all active tracers and restore the original Kernel methods. + def self.reset + @@tracers.dup.each do |tracer| + uninstall(tracer) + end + end + end +end +``` + +**Shebang, license, and comment establish the CLI script.** +```ruby +#!/usr/bin/env ruby +# SPDX-License-Identifier: MIT +# CLI wrapper for the native tracer +``` + +**Load the library path and require the main recorder.** +```ruby +lib_dir = File.expand_path('../lib', __dir__) +$LOAD_PATH.unshift(lib_dir) unless $LOAD_PATH.include?(lib_dir) +require 'codetracer_ruby_recorder' +``` + +**Invoke the argument parser and exit with its status.** +```ruby +exit CodeTracer::RubyRecorder.parse_argv_and_trace_ruby_file(ARGV) +``` + +**Begin gem specification and compute version from file.** +```ruby +Gem::Specification.new do |spec| + spec.name = 'codetracer-ruby-recorder' + version_file = File.expand_path('../../version.txt', __dir__) + spec.version = File.read(version_file).strip +``` + +**Define authorship metadata for the gem.** +```ruby + spec.authors = ['Metacraft Labs'] + spec.email = ['info@metacraft-labs.com'] +``` + +**Provide summary, description, license, and homepage.** +```ruby + spec.summary = 'CodeTracer Ruby recorder with native extension' + spec.description = 'Ruby tracer that records execution steps via a Rust native extension.' + spec.license = 'MIT' + spec.homepage = 'https://github.com/metacraft-labs/codetracer-ruby-recorder' +``` + +**Enumerate files to include in the gem package.** +```ruby + spec.files = Dir[ + 'lib/**/*', + 'ext/native_tracer/**/{Cargo.toml,*.rs}', +``` + +**List native extension build scripts and compiled targets.** +```ruby + 'ext/native_tracer/extconf.rb', + 'ext/native_tracer/target/release/*' + ] +``` + +**Configure load paths, extensions, and executable entrypoint.** +```ruby + spec.require_paths = ['lib'] + spec.extensions = [] + spec.bindir = 'bin' + spec.executables = ['codetracer-ruby-recorder'] +``` + +**Add development dependency on rb_sys and close specification.** +```ruby + spec.add_development_dependency 'rb_sys', '~> 0.9' +end +``` diff --git a/listing-003.md b/listing-003.md new file mode 100644 index 0000000..cb77688 --- /dev/null +++ b/listing-003.md @@ -0,0 +1,208 @@ +# Listing 003 + +This listing examines the build pipeline and initial Rust implementation of the native tracer. We review the `extconf.rb` script, `build.rs`, the crate's `Cargo.toml`, and the opening portions of `src/lib.rs` that set up symbol lookups, recorder state, and early helper functions. + +**Invoke mkmf and rb_sys to generate a Makefile for the Rust extension.** +```ruby +require 'mkmf' +require 'rb_sys/mkmf' + +create_rust_makefile('codetracer_ruby_recorder') +``` + +**Activate rb_sys environment variables during Cargo build.** +```rust +fn main() -> Result<(), Box> { + rb_sys_env::activate()?; + Ok(()) +} +``` + +**Define package metadata, library type, dependencies, and build helpers.** +```toml +[package] +name = "codetracer_ruby_recorder" +description = "Native Ruby module for generating CodeTracer trace files" +version = "0.1.0" +edition = "2021" +build = "build.rs" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +rb-sys = "0.9" +runtime_tracing = "0.14.0" + +[build-dependencies] +rb-sys-env = "0.2" + +[profile.release] +codegen-units = 1 +lto = "thin" +opt-level = 3 +``` + +**Allow missing safety docs and import standard, Ruby, and tracing crates.** +```rust +#![allow(clippy::missing_safety_doc)] + +use std::{ + collections::HashMap, + ffi::CStr, + mem::transmute, + os::raw::{c_char, c_int, c_void}, + path::Path, + ptr, +}; + +use rb_sys::{ + rb_cObject, rb_define_alloc_func, rb_define_class, rb_define_method, rb_eIOError, + rb_event_flag_t, rb_funcall, rb_id2name, rb_id2sym, rb_intern, rb_num2long, rb_obj_classname, + rb_raise, rb_sym2id, ID, RUBY_EVENT_CALL, RUBY_EVENT_LINE, RUBY_EVENT_RAISE, RUBY_EVENT_RETURN, + VALUE, +}; +use rb_sys::{ + rb_protect, NIL_P, RARRAY_CONST_PTR, RARRAY_LEN, RB_FLOAT_TYPE_P, RB_INTEGER_TYPE_P, + RB_SYMBOL_P, RB_TYPE_P, RSTRING_LEN, RSTRING_PTR, +}; +use rb_sys::{Qfalse, Qnil, Qtrue}; +use runtime_tracing::{ + create_trace_writer, CallRecord, EventLogKind, FieldTypeRecord, FullValueRecord, Line, + TraceEventsFileFormat, TraceLowLevelEvent, TraceWriter, TypeKind, TypeRecord, TypeSpecificInfo, + ValueRecord, +}; +``` + +**Declare event hook type and import flag enum and additional binding functions.** +```rust +// Event hook function type from Ruby debug.h +type rb_event_hook_func_t = Option; + +// Use event hook flags enum from rb_sys +use rb_sys::rb_event_hook_flag_t; + +// Types from rb_sys bindings +use rb_sys::{ + rb_add_event_hook2, rb_cRange, rb_cRegexp, rb_cStruct, rb_cTime, rb_check_typeddata, + rb_const_defined, rb_const_get, rb_data_type_struct__bindgen_ty_1, rb_data_type_t, + rb_data_typed_object_wrap, rb_method_boundp, rb_num2dbl, rb_obj_is_kind_of, + rb_remove_event_hook_with_data, rb_trace_arg_t, rb_tracearg_binding, rb_tracearg_callee_id, + rb_tracearg_event_flag, rb_tracearg_lineno, rb_tracearg_path, rb_tracearg_raised_exception, + rb_tracearg_return_value, rb_tracearg_self, +}; +``` + +**Collect frequently used Ruby method identifiers for efficient lookup.** +```rust +struct InternedSymbols { + to_s: ID, + local_variables: ID, + local_variable_get: ID, + instance_method: ID, + parameters: ID, + class: ID, + to_a: ID, + begin: ID, + end: ID, + to_i: ID, + nsec: ID, + source: ID, + options: ID, + members: ID, + values: ID, + to_h: ID, + instance_variables: ID, + instance_variable_get: ID, + set_const: ID, + open_struct_const: ID, +} +``` + +**Construct the symbol table by interning method names.** +```rust +impl InternedSymbols { + unsafe fn new() -> InternedSymbols { + InternedSymbols { + to_s: rb_intern!("to_s"), + local_variables: rb_intern!("local_variables"), + local_variable_get: rb_intern!("local_variable_get"), + instance_method: rb_intern!("instance_method"), + parameters: rb_intern!("parameters"), + class: rb_intern!("class"), + to_a: rb_intern!("to_a"), + begin: rb_intern!("begin"), + end: rb_intern!("end"), + to_i: rb_intern!("to_i"), + nsec: rb_intern!("nsec"), + source: rb_intern!("source"), + options: rb_intern!("options"), + members: rb_intern!("members"), + values: rb_intern!("values"), + to_h: rb_intern!("to_h"), + instance_variables: rb_intern!("instance_variables"), + instance_variable_get: rb_intern!("instance_variable_get"), + set_const: rb_intern!("Set"), + open_struct_const: rb_intern!("OpenStruct"), + } + } +} +``` + +**Define the recorder state with tracing backend, flags, and cached type IDs.** +```rust +struct Recorder { + tracer: Box, + active: bool, + id: InternedSymbols, + set_class: VALUE, + open_struct_class: VALUE, + struct_type_versions: HashMap, + int_type_id: runtime_tracing::TypeId, + float_type_id: runtime_tracing::TypeId, + bool_type_id: runtime_tracing::TypeId, + string_type_id: runtime_tracing::TypeId, + symbol_type_id: runtime_tracing::TypeId, + error_type_id: runtime_tracing::TypeId, +} +``` + +**Skip instrumentation for internal or library paths to reduce noise.** +```rust +fn should_ignore_path(path: &str) -> bool { + const PATTERNS: [&str; 5] = [ + "codetracer_ruby_recorder.rb", + "lib/ruby", + "recorder.rb", + "codetracer_pure_ruby_recorder.rb", + "gems/", + ]; + if path.starts_with(" runtime_tracing::TypeId { + use ValueRecord::*; + match val { + Int { type_id, .. } + | Float { type_id, .. } + | Bool { type_id, .. } + | String { type_id, .. } + | Sequence { type_id, .. } + | Tuple { type_id, .. } + | Struct { type_id, .. } + | Variant { type_id, .. } + | Reference { type_id, .. } + | Raw { type_id, .. } + | Error { type_id, .. } + | BigInt { type_id, .. } + | None { type_id } => *type_id, + Cell { .. } => runtime_tracing::NONE_TYPE_ID, + } +} +``` diff --git a/listing-004.md b/listing-004.md new file mode 100644 index 0000000..7fefb2e --- /dev/null +++ b/listing-004.md @@ -0,0 +1,252 @@ +# Listing 004 + +This listing follows the Rust half of the recorder, showing how the extension allocates and frees the `Recorder`, toggles Ruby's trace hooks, and begins translating primitive Ruby values. All snippets come from `gems/codetracer-ruby-recorder/ext/native_tracer/src/lib.rs`. + +**Signature for `struct_value` collects the recorder context, struct metadata, and a recursion depth.** +```rust +unsafe fn struct_value( + recorder: &mut Recorder, + class_name: &str, + field_names: &[&str], + field_values: &[VALUE], + depth: usize, +) -> ValueRecord { +``` + +**Allocate space for converted fields and recursively map each Ruby field to a `ValueRecord`.** +```rust + let mut vals = Vec::with_capacity(field_values.len()); + for &v in field_values { + vals.push(to_value(recorder, v, depth - 1)); + } +``` + +**Track a monotonically increasing version number per struct name.** +```rust + let version_entry = recorder + .struct_type_versions + .entry(class_name.to_string()) + .or_insert(0); + let name_version = format!("{} (#{})", class_name, *version_entry); + *version_entry += 1; +``` + +**Describe each field by name and the type ID of its converted value.** +```rust + let mut field_types = Vec::with_capacity(field_names.len()); + for (n, v) in field_names.iter().zip(&vals) { + field_types.push(FieldTypeRecord { + name: (*n).to_string(), + type_id: value_type_id(v), + }); + } +``` + +**Assemble a `TypeRecord` for the struct and register it with the trace writer to obtain a type ID.** +```rust + let typ = TypeRecord { + kind: TypeKind::Struct, + lang_type: name_version, + specific_info: TypeSpecificInfo::Struct { + fields: field_types, + }, + }; + let type_id = TraceWriter::ensure_raw_type_id(&mut *recorder.tracer, typ); +``` + +**Return a structured value with its field data and associated type ID.** +```rust + ValueRecord::Struct { + field_values: vals, + type_id, + } +} +``` + +**`recorder_free` is registered as a destructor and drops the boxed recorder when the Ruby object is garbage collected.** +```rust +unsafe extern "C" fn recorder_free(ptr: *mut c_void) { + if !ptr.is_null() { + drop(Box::from_raw(ptr as *mut Recorder)); + } +} +``` + +**`RECORDER_TYPE` exposes the recorder to Ruby, naming the type and specifying the `dfree` callback.** +```rust +static mut RECORDER_TYPE: rb_data_type_t = rb_data_type_t { + wrap_struct_name: b"Recorder\0".as_ptr() as *const c_char, + function: rb_data_type_struct__bindgen_ty_1 { + dmark: None, + dfree: Some(recorder_free), + dsize: None, + dcompact: None, + reserved: [ptr::null_mut(); 1], + }, + parent: ptr::null(), + data: ptr::null_mut(), + flags: 0 as VALUE, +}; +``` + +**`get_recorder` fetches the internal pointer from a Ruby object, raising `IOError` if the type check fails.** +```rust +unsafe fn get_recorder(obj: VALUE) -> *mut Recorder { + let ty = std::ptr::addr_of!(RECORDER_TYPE) as *const rb_data_type_t; + let ptr = rb_check_typeddata(obj, ty); + if ptr.is_null() { + rb_raise( + rb_eIOError, + b"Invalid recorder object\0".as_ptr() as *const c_char, + ); + } + ptr as *mut Recorder +} +``` + +**Allocator for the Ruby class constructs a fresh `Recorder` with default type IDs and inactive tracing.** +```rust +unsafe extern "C" fn ruby_recorder_alloc(klass: VALUE) -> VALUE { + let recorder = Box::new(Recorder { + tracer: create_trace_writer("ruby", &vec![], TraceEventsFileFormat::Binary), + active: false, + id: InternedSymbols::new(), + set_class: Qnil.into(), + open_struct_class: Qnil.into(), + struct_type_versions: HashMap::new(), + int_type_id: runtime_tracing::TypeId::default(), + float_type_id: runtime_tracing::TypeId::default(), + bool_type_id: runtime_tracing::TypeId::default(), + string_type_id: runtime_tracing::TypeId::default(), + symbol_type_id: runtime_tracing::TypeId::default(), + error_type_id: runtime_tracing::TypeId::default(), + }); + let ty = std::ptr::addr_of!(RECORDER_TYPE) as *const rb_data_type_t; + rb_data_typed_object_wrap(klass, Box::into_raw(recorder) as *mut c_void, ty) +} +``` + +**`enable_tracing` attaches a raw event hook so Ruby invokes our callback on line, call, return, and raise events.** +```rust +unsafe extern "C" fn enable_tracing(self_val: VALUE) -> VALUE { + let recorder = &mut *get_recorder(self_val); + if !recorder.active { + let raw_cb: unsafe extern "C" fn(VALUE, *mut rb_trace_arg_t) = event_hook_raw; + let func: rb_event_hook_func_t = Some(transmute(raw_cb)); + rb_add_event_hook2( + func, + RUBY_EVENT_LINE | RUBY_EVENT_CALL | RUBY_EVENT_RETURN | RUBY_EVENT_RAISE, + self_val, + rb_event_hook_flag_t::RUBY_EVENT_HOOK_FLAG_RAW_ARG, + ); + recorder.active = true; + } + Qnil.into() +} +``` + +**`disable_tracing` removes that hook and marks the recorder inactive.** +```rust +unsafe extern "C" fn disable_tracing(self_val: VALUE) -> VALUE { + let recorder = &mut *get_recorder(self_val); + if recorder.active { + let raw_cb: unsafe extern "C" fn(VALUE, *mut rb_trace_arg_t) = event_hook_raw; + let func: rb_event_hook_func_t = Some(transmute(raw_cb)); + rb_remove_event_hook_with_data(func, self_val); + recorder.active = false; + } + Qnil.into() +} +``` + +**`cstr_to_string` converts a C string pointer to a Rust `String`, returning `None` when the pointer is null.** +```rust +unsafe fn cstr_to_string(ptr: *const c_char) -> Option { + if ptr.is_null() { + return None; + } + CStr::from_ptr(ptr).to_str().ok().map(|s| s.to_string()) +} +``` + +**`rstring_lossy` reads a Ruby `String`'s raw bytes and builds a UTF‑8 string, replacing invalid sequences.** +```rust +unsafe fn rstring_lossy(val: VALUE) -> String { + let ptr = RSTRING_PTR(val); + let len = RSTRING_LEN(val) as usize; + let slice = std::slice::from_raw_parts(ptr as *const u8, len); + String::from_utf8_lossy(slice).to_string() +} +``` + +**`to_value` begins value translation, first enforcing a recursion limit and checking for `nil`.** +```rust +unsafe fn to_value(recorder: &mut Recorder, val: VALUE, depth: usize) -> ValueRecord { + if depth == 0 { + return ValueRecord::None { + type_id: recorder.error_type_id, + }; + } + if NIL_P(val) { + return ValueRecord::None { + type_id: recorder.error_type_id, + }; + } +``` + +**Booleans map to `Bool` records, distinguishing `true` from `false` and reusing a cached type ID.** +```rust + if val == (Qtrue as VALUE) || val == (Qfalse as VALUE) { + return ValueRecord::Bool { + b: val == (Qtrue as VALUE), + type_id: recorder.bool_type_id, + }; + } +``` + +**Integers become `Int` records holding the numeric value and its type ID.** +```rust + if RB_INTEGER_TYPE_P(val) { + let i = rb_num2long(val) as i64; + return ValueRecord::Int { + i, + type_id: recorder.int_type_id, + }; + } +``` + +**For floats, lazily register the `Float` type and then store the numeric value with the obtained ID.** +```rust + if RB_FLOAT_TYPE_P(val) { + let f = rb_num2dbl(val); + let type_id = if recorder.float_type_id == runtime_tracing::NONE_TYPE_ID { + let id = TraceWriter::ensure_type_id(&mut *recorder.tracer, TypeKind::Float, "Float"); + recorder.float_type_id = id; + id + } else { + recorder.float_type_id + }; + return ValueRecord::Float { f, type_id }; + } +``` + +**Symbols are encoded as strings using their interned names and the cached symbol type ID.** +```rust + if RB_SYMBOL_P(val) { + return ValueRecord::String { + text: cstr_to_string(rb_id2name(rb_sym2id(val))).unwrap_or_default(), + type_id: recorder.symbol_type_id, + }; + } +``` + +**Finally, Ruby `String` objects are copied lossily into UTF‑8 and tagged with the string type ID.** +```rust + if RB_TYPE_P(val, rb_sys::ruby_value_type::RUBY_T_STRING) { + return ValueRecord::String { + text: rstring_lossy(val), + type_id: recorder.string_type_id, + }; + } +} +``` diff --git a/listing-005.md b/listing-005.md new file mode 100644 index 0000000..a12fefc --- /dev/null +++ b/listing-005.md @@ -0,0 +1,260 @@ +# Listing 005 + +This listing continues the Rust-side value conversion, handling collections, ranges, sets, time, regexps, structs, OpenStructs, +and arbitrary objects before introducing `record_variables`, which captures locals from a binding. All snippets originate from +`gems/codetracer-ruby-recorder/ext/native_tracer/src/lib.rs`. + +**Recognize a Ruby Array and prepare to iterate over its elements.** +```rust + if RB_TYPE_P(val, rb_sys::ruby_value_type::RUBY_T_ARRAY) { + let len = RARRAY_LEN(val) as usize; + let mut elements = Vec::with_capacity(len); + let ptr = RARRAY_CONST_PTR(val); +``` + +**Recursively convert each element and accumulate the results.** +```rust + for i in 0..len { + let elem = *ptr.add(i); + elements.push(to_value(recorder, elem, depth - 1)); + } +``` + +**Register the Array type and return a sequence record.** +```rust + let type_id = TraceWriter::ensure_type_id(&mut *recorder.tracer, TypeKind::Seq, "Array"); + return ValueRecord::Sequence { + elements, + is_slice: false, + type_id, + }; + } +``` + +**Convert a Ruby Hash by first turning it into an array of pairs.** +```rust + if RB_TYPE_P(val, rb_sys::ruby_value_type::RUBY_T_HASH) { + let pairs = rb_funcall(val, recorder.id.to_a, 0); + let len = RARRAY_LEN(pairs) as usize; + let ptr = RARRAY_CONST_PTR(pairs); +``` + +**For each pair, build a struct with `k` and `v` fields.** +```rust + let mut elements = Vec::with_capacity(len); + for i in 0..len { + let pair = *ptr.add(i); + if !RB_TYPE_P(pair, rb_sys::ruby_value_type::RUBY_T_ARRAY) || RARRAY_LEN(pair) < 2 { + continue; + } + let pair_ptr = RARRAY_CONST_PTR(pair); + let key = *pair_ptr.add(0); + let val_elem = *pair_ptr.add(1); + elements.push(struct_value( + recorder, + "Pair", + &["k", "v"], + &[key, val_elem], + depth, + )); + } +``` + +**Return the Hash as a sequence of `Pair` structs.** +```rust + let type_id = TraceWriter::ensure_type_id(&mut *recorder.tracer, TypeKind::Seq, "Hash"); + return ValueRecord::Sequence { + elements, + is_slice: false, + type_id, + }; + } +``` + +**Ranges serialize their `begin` and `end` values into a struct.** +```rust + if rb_obj_is_kind_of(val, rb_cRange) != 0 { + let begin_val = rb_funcall(val, recorder.id.begin, 0); + let end_val = rb_funcall(val, recorder.id.end, 0); + return struct_value( + recorder, + "Range", + &["begin", "end"], + &[begin_val, end_val], + depth, + ); + } +``` + +**Detect `Set` only once and serialize it as a sequence of members.** +```rust + if NIL_P(recorder.set_class) { + if rb_const_defined(rb_cObject, recorder.id.set_const) != 0 { + recorder.set_class = rb_const_get(rb_cObject, recorder.id.set_const); + } + } + if !NIL_P(recorder.set_class) && rb_obj_is_kind_of(val, recorder.set_class) != 0 { + let arr = rb_funcall(val, recorder.id.to_a, 0); + if RB_TYPE_P(arr, rb_sys::ruby_value_type::RUBY_T_ARRAY) { + let len = RARRAY_LEN(arr) as usize; + let ptr = RARRAY_CONST_PTR(arr); + let mut elements = Vec::with_capacity(len); + for i in 0..len { + let elem = *ptr.add(i); + elements.push(to_value(recorder, elem, depth - 1)); + } + let type_id = TraceWriter::ensure_type_id(&mut *recorder.tracer, TypeKind::Seq, "Set"); + return ValueRecord::Sequence { + elements, + is_slice: false, + type_id, + }; + } + } +``` + +**Time objects expose seconds and nanoseconds via helper methods.** +```rust + if rb_obj_is_kind_of(val, rb_cTime) != 0 { + let sec = rb_funcall(val, recorder.id.to_i, 0); + let nsec = rb_funcall(val, recorder.id.nsec, 0); + return struct_value(recorder, "Time", &["sec", "nsec"], &[sec, nsec], depth); + } +``` + +**Regular expressions capture their source pattern and options.** +```rust + if rb_obj_is_kind_of(val, rb_cRegexp) != 0 { + let src = rb_funcall(val, recorder.id.source, 0); + let opts = rb_funcall(val, recorder.id.options, 0); + return struct_value( + recorder, + "Regexp", + &["source", "options"], + &[src, opts], + depth, + ); + } +``` + +**Structs are unpacked by member names and values; unknown layouts fall back to raw.** +```rust + if rb_obj_is_kind_of(val, rb_cStruct) != 0 { + let class_name = + cstr_to_string(rb_obj_classname(val)).unwrap_or_else(|| "Struct".to_string()); + let members = rb_funcall(val, recorder.id.members, 0); + let values = rb_funcall(val, recorder.id.values, 0); + if !RB_TYPE_P(members, rb_sys::ruby_value_type::RUBY_T_ARRAY) + || !RB_TYPE_P(values, rb_sys::ruby_value_type::RUBY_T_ARRAY) + { + let text = value_to_string(recorder, val); + let type_id = + TraceWriter::ensure_type_id(&mut *recorder.tracer, TypeKind::Raw, &class_name); + return ValueRecord::Raw { r: text, type_id }; + } +``` + +**Collect each struct field's name and VALUE pointer before delegation to `struct_value`.** +```rust + let len = RARRAY_LEN(values) as usize; + let mem_ptr = RARRAY_CONST_PTR(members); + let val_ptr = RARRAY_CONST_PTR(values); + let mut names: Vec<&str> = Vec::with_capacity(len); + let mut vals: Vec = Vec::with_capacity(len); + for i in 0..len { + let sym = *mem_ptr.add(i); + let id = rb_sym2id(sym); + let cstr = rb_id2name(id); + let name = CStr::from_ptr(cstr).to_str().unwrap_or("?"); + names.push(name); + vals.push(*val_ptr.add(i)); + } + return struct_value(recorder, &class_name, &names, &vals, depth); + } +``` + +**OpenStruct values are converted to hashes and reprocessed.** +```rust + if NIL_P(recorder.open_struct_class) { + if rb_const_defined(rb_cObject, recorder.id.open_struct_const) != 0 { + recorder.open_struct_class = rb_const_get(rb_cObject, recorder.id.open_struct_const); + } + } + if !NIL_P(recorder.open_struct_class) && rb_obj_is_kind_of(val, recorder.open_struct_class) != 0 + { + let h = rb_funcall(val, recorder.id.to_h, 0); + return to_value(recorder, h, depth - 1); + } +``` + +**For generic objects, collect instance variables or fall back to a raw string.** +```rust + let class_name = cstr_to_string(rb_obj_classname(val)).unwrap_or_else(|| "Object".to_string()); + // generic object + let ivars = rb_funcall(val, recorder.id.instance_variables, 0); + if !RB_TYPE_P(ivars, rb_sys::ruby_value_type::RUBY_T_ARRAY) { + let text = value_to_string(recorder, val); + let type_id = + TraceWriter::ensure_type_id(&mut *recorder.tracer, TypeKind::Raw, &class_name); + return ValueRecord::Raw { r: text, type_id }; + } +``` + +**Map each instance variable name to its value and emit a struct if any exist.** +```rust + let len = RARRAY_LEN(ivars) as usize; + let ptr = RARRAY_CONST_PTR(ivars); + let mut names: Vec<&str> = Vec::with_capacity(len); + let mut vals: Vec = Vec::with_capacity(len); + for i in 0..len { + let sym = *ptr.add(i); + let id = rb_sym2id(sym); + let cstr = rb_id2name(id); + let name = CStr::from_ptr(cstr).to_str().unwrap_or("?"); + names.push(name); + let value = rb_funcall(val, recorder.id.instance_variable_get, 1, sym); + vals.push(value); + } + if !names.is_empty() { + return struct_value(recorder, &class_name, &names, &vals, depth); + } + let text = value_to_string(recorder, val); + let type_id = TraceWriter::ensure_type_id(&mut *recorder.tracer, TypeKind::Raw, &class_name); + ValueRecord::Raw { r: text, type_id } +} +``` + +**`record_variables` pulls local variable names from a binding.** +```rust +unsafe fn record_variables(recorder: &mut Recorder, binding: VALUE) -> Vec { + let vars = rb_funcall(binding, recorder.id.local_variables, 0); + if !RB_TYPE_P(vars, rb_sys::ruby_value_type::RUBY_T_ARRAY) { + return Vec::new(); + } +``` + +**Iterate over each variable, converting and registering its value.** +```rust + let len = RARRAY_LEN(vars) as usize; + let mut result = Vec::with_capacity(len); + let ptr = RARRAY_CONST_PTR(vars); + for i in 0..len { + let sym = *ptr.add(i); + let id = rb_sym2id(sym); + let name = CStr::from_ptr(rb_id2name(id)).to_str().unwrap_or(""); + let value = rb_funcall(binding, recorder.id.local_variable_get, 1, sym); + let val_rec = to_value(recorder, value, 10); + TraceWriter::register_variable_with_full_value( + &mut *recorder.tracer, + name, + val_rec.clone(), + ); + let var_id = TraceWriter::ensure_variable_id(&mut *recorder.tracer, name); + result.push(FullValueRecord { + variable_id: var_id, + value: val_rec, + }); + } + result +} +``` diff --git a/listing-006.md b/listing-006.md new file mode 100644 index 0000000..6295787 --- /dev/null +++ b/listing-006.md @@ -0,0 +1,275 @@ +# Listing 006 + +This listing continues in `gems/codetracer-ruby-recorder/ext/native_tracer/src/lib.rs`, covering helper routines that extract method parameters, register them with the tracer, and expose C-callable APIs for initialization, flushing, and emitting custom events. + +**Prepare to collect parameters by defining the function signature and required arguments.** +```rust +unsafe fn collect_parameter_values( + recorder: &mut Recorder, + binding: VALUE, + defined_class: VALUE, + mid: ID, +) -> Vec<(String, ValueRecord)> { +``` + +**Convert the method ID to a Ruby symbol and bail out if the method isn't found.** +```rust + let method_sym = rb_id2sym(mid); + if rb_method_boundp(defined_class, mid, 0) == 0 { + return Vec::new(); + } +``` + +**Fetch the `Method` object and query its parameter metadata, ensuring an array is returned.** +```rust + let method_obj = rb_funcall(defined_class, recorder.id.instance_method, 1, method_sym); + let params_ary = rb_funcall(method_obj, recorder.id.parameters, 0); + if !RB_TYPE_P(params_ary, rb_sys::ruby_value_type::RUBY_T_ARRAY) { + return Vec::new(); + } +``` + +**Determine how many parameters exist and prime a results vector of matching capacity.** +```rust + let params_len = RARRAY_LEN(params_ary) as usize; + let params_ptr = RARRAY_CONST_PTR(params_ary); + let mut result = Vec::with_capacity(params_len); +``` + +**Iterate through each parameter description, skipping malformed entries.** +```rust + for i in 0..params_len { + let pair = *params_ptr.add(i); + if !RB_TYPE_P(pair, rb_sys::ruby_value_type::RUBY_T_ARRAY) || RARRAY_LEN(pair) < 2 { + continue; + } + let pair_ptr = RARRAY_CONST_PTR(pair); +``` + +**Extract the parameter's name symbol, ignoring `nil` placeholders.** +```rust + let name_sym = *pair_ptr.add(1); + if NIL_P(name_sym) { + continue; + } +``` + +**Convert the symbol to a C string; if conversion fails, skip the parameter.** +```rust + let name_id = rb_sym2id(name_sym); + let name_c = rb_id2name(name_id); + if name_c.is_null() { + continue; + } + let name = CStr::from_ptr(name_c).to_str().unwrap_or("").to_string(); +``` + +**Read the argument's value from the binding and turn it into a `ValueRecord`.** +```rust + let value = rb_funcall(binding, recorder.id.local_variable_get, 1, name_sym); + let val_rec = to_value(recorder, value, 10); + result.push((name, val_rec)); + } + result +} +``` + +**Define `register_parameter_values` to persist parameters and their values with the tracer.** +```rust +unsafe fn register_parameter_values( + recorder: &mut Recorder, + params: Vec<(String, ValueRecord)>, +) -> Vec { +``` + +**Allocate space for the returned records and walk through each `(name, value)` pair.** +```rust + let mut result = Vec::with_capacity(params.len()); + for (name, val_rec) in params { +``` + +**Record the variable and ensure it has a stable ID in the trace.** +```rust + TraceWriter::register_variable_with_full_value( + &mut *recorder.tracer, + &name, + val_rec.clone(), + ); + let var_id = TraceWriter::ensure_variable_id(&mut *recorder.tracer, &name); +``` + +**Store the final `FullValueRecord` in the results vector and finish.** +```rust + result.push(FullValueRecord { + variable_id: var_id, + value: val_rec, + }); + } + result +} +``` + +**`record_event` logs an arbitrary string event at a given file path and line.** +```rust +unsafe fn record_event(tracer: &mut dyn TraceWriter, path: &str, line: i64, content: String) { + TraceWriter::register_step(tracer, Path::new(path), Line(line)); + TraceWriter::register_special_event(tracer, EventLogKind::Write, &content) +} +``` + +**Begin the C-facing `initialize` function, pulling pointers from Ruby strings.** +```rust +unsafe extern "C" fn initialize(self_val: VALUE, out_dir: VALUE, format: VALUE) -> VALUE { + let recorder_ptr = get_recorder(self_val); + let recorder = &mut *recorder_ptr; + let ptr = RSTRING_PTR(out_dir) as *const u8; + let len = RSTRING_LEN(out_dir) as usize; + let slice = std::slice::from_raw_parts(ptr, len); +``` + +**Determine the output file format, defaulting to JSON when no symbol is supplied.** +```rust + let fmt = if !NIL_P(format) && RB_SYMBOL_P(format) { + let id = rb_sym2id(format); + match CStr::from_ptr(rb_id2name(id)).to_str().unwrap_or("") { + "binaryv0" => runtime_tracing::TraceEventsFileFormat::BinaryV0, + "binary" | "bin" => runtime_tracing::TraceEventsFileFormat::Binary, + "json" => runtime_tracing::TraceEventsFileFormat::Json, + _ => rb_raise(rb_eIOError, b"Unknown format\0".as_ptr() as *const c_char), + } + } else { + runtime_tracing::TraceEventsFileFormat::Json + }; +``` + +**Attempt to start tracing, pre-registering common Ruby types on success.** +```rust + match std::str::from_utf8(slice) { + Ok(path_str) => { + match begin_trace(Path::new(path_str), fmt) { + Ok(t) => { + recorder.tracer = t; + // pre-register common types to match the pure Ruby tracer + recorder.int_type_id = TraceWriter::ensure_type_id( + &mut *recorder.tracer, + TypeKind::Int, + "Integer", + ); + recorder.string_type_id = TraceWriter::ensure_type_id( + &mut *recorder.tracer, + TypeKind::String, + "String", + ); + recorder.bool_type_id = + TraceWriter::ensure_type_id(&mut *recorder.tracer, TypeKind::Bool, "Bool"); + recorder.float_type_id = runtime_tracing::NONE_TYPE_ID; + recorder.symbol_type_id = TraceWriter::ensure_type_id( + &mut *recorder.tracer, + TypeKind::String, + "Symbol", + ); + recorder.error_type_id = TraceWriter::ensure_type_id( + &mut *recorder.tracer, + TypeKind::Error, + "No type", + ); + let path = Path::new(""); + let func_id = TraceWriter::ensure_function_id( + &mut *recorder.tracer, + "", + path, + Line(1), + ); + TraceWriter::add_event( + &mut *recorder.tracer, + TraceLowLevelEvent::Call(CallRecord { + function_id: func_id, + args: vec![], + }), + ); + } + Err(e) => { + let msg = std::ffi::CString::new(e.to_string()) + .unwrap_or_else(|_| std::ffi::CString::new("unknown error").unwrap()); + rb_raise( + rb_eIOError, + b"Failed to flush trace: %s\0".as_ptr() as *const c_char, + msg.as_ptr(), + ); + } + } + } + Err(e) => { + let msg = std::ffi::CString::new(e.to_string()) + .unwrap_or_else(|_| std::ffi::CString::new("invalid utf8").unwrap()); + rb_raise( + rb_eIOError, + b"Invalid UTF-8 in path: %s\0".as_ptr() as *const c_char, + msg.as_ptr(), + ) + } + } +``` + +**Finalize initialization by returning Ruby `nil`.** +```rust + Qnil.into() +} +``` + +**Expose a `flush_trace` function for Ruby to write pending events to disk.** +```rust +unsafe extern "C" fn flush_trace(self_val: VALUE) -> VALUE { + let recorder_ptr = get_recorder(self_val); + let recorder = &mut *recorder_ptr; +``` + +**Attempt the flush and surface I/O errors back to Ruby.** +```rust + if let Err(e) = flush_to_dir(&mut *recorder.tracer) { + let msg = std::ffi::CString::new(e.to_string()) + .unwrap_or_else(|_| std::ffi::CString::new("unknown error").unwrap()); + rb_raise( + rb_eIOError, + b"Failed to flush trace: %s\0".as_ptr() as *const c_char, + msg.as_ptr(), + ); + } +``` + +**Return `nil` to signal success.** +```rust + Qnil.into() +} +``` + +**`record_event_api` lets Ruby code log custom events with a path and line number.** +```rust +unsafe extern "C" fn record_event_api( + self_val: VALUE, + path: VALUE, + line: VALUE, + content: VALUE, +) -> VALUE { +``` + +**Retrieve the recorder and decode the optional path string from Ruby.** +```rust + let recorder = &mut *get_recorder(self_val); + let path_slice = if NIL_P(path) { + "" + } else { + let ptr = RSTRING_PTR(path); + let len = RSTRING_LEN(path) as usize; + std::str::from_utf8(std::slice::from_raw_parts(ptr as *const u8, len)).unwrap_or("") + }; +``` + +**Convert the line number and content, then dispatch the event.** +```rust + let line_num = rb_num2long(line) as i64; + let content_str = value_to_string(recorder, content); + record_event(&mut *recorder.tracer, path_slice, line_num, content_str); + Qnil.into() +} +```