diff --git a/Justfile b/Justfile index ce4a73b..2e67f20 100644 --- a/Justfile +++ b/Justfile @@ -32,9 +32,15 @@ dev: uv run --directory codetracer-python-recorder maturin develop --uv # Run unit tests of dev build -test: - uv run --group dev --group test pytest +test: cargo-test py-test + +# Run Rust unit tests without default features to link Python C library +cargo-test: + uv run cargo test --manifest-path codetracer-python-recorder/Cargo.toml --no-default-features +py-test: + uv run --group dev --group test pytest + # Run tests only on the pure recorder test-pure: uv run --group dev --group test pytest codetracer-pure-python-recorder diff --git a/codetracer-python-recorder/Cargo.lock b/codetracer-python-recorder/Cargo.lock index da12a63..e0c1fa0 100644 --- a/codetracer-python-recorder/Cargo.lock +++ b/codetracer-python-recorder/Cargo.lock @@ -68,6 +68,7 @@ checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" name = "codetracer-python-recorder" version = "0.1.0" dependencies = [ + "bitflags", "pyo3", "runtime_tracing", ] diff --git a/codetracer-python-recorder/Cargo.toml b/codetracer-python-recorder/Cargo.toml index e3c31c3..b25b86c 100644 --- a/codetracer-python-recorder/Cargo.toml +++ b/codetracer-python-recorder/Cargo.toml @@ -8,8 +8,16 @@ repository = "https://github.com/metacraft-labs/codetracer-python-recorder" [lib] name = "codetracer_python_recorder" -crate-type = ["cdylib"] +crate-type = ["cdylib", "rlib"] + +[features] +extension-module = ["pyo3/extension-module"] +default = ["extension-module"] [dependencies] -pyo3 = { version = "0.25.1", features = ["extension-module"] } -runtime_tracing = "0.14.0" \ No newline at end of file +pyo3 = { version = "0.25.1" } +runtime_tracing = "0.14.0" +bitflags = "2.4" + +[dev-dependencies] +pyo3 = { version = "0.25.1", features = ["auto-initialize"] } diff --git a/codetracer-python-recorder/src/lib.rs b/codetracer-python-recorder/src/lib.rs index 92782b5..31d6b52 100644 --- a/codetracer-python-recorder/src/lib.rs +++ b/codetracer-python-recorder/src/lib.rs @@ -3,6 +3,9 @@ use std::sync::atomic::{AtomicBool, Ordering}; use pyo3::exceptions::PyRuntimeError; use pyo3::prelude::*; +pub mod tracer; +pub use crate::tracer::{install_tracer, uninstall_tracer, EventSet, Tracer}; + /// Global flag tracking whether tracing is active. static ACTIVE: AtomicBool = AtomicBool::new(false); diff --git a/codetracer-python-recorder/src/tracer.rs b/codetracer-python-recorder/src/tracer.rs new file mode 100644 index 0000000..2842906 --- /dev/null +++ b/codetracer-python-recorder/src/tracer.rs @@ -0,0 +1,244 @@ +use std::sync::{Mutex, OnceLock}; +use pyo3::{ + exceptions::PyRuntimeError, + prelude::*, + types::{PyAny, PyCFunction, PyModule}, +}; + +const MONITORING_TOOL_NAME: &str = "codetracer"; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct EventId(pub i32); + +#[allow(non_snake_case)] +#[derive(Clone, Copy, Debug)] +pub struct MonitoringEvents { + pub BRANCH: EventId, + pub CALL: EventId, + pub C_RAISE: EventId, + pub C_RETURN: EventId, + pub EXCEPTION_HANDLED: EventId, + pub INSTRUCTION: EventId, + pub JUMP: EventId, + pub LINE: EventId, + pub PY_RESUME: EventId, + pub PY_RETURN: EventId, + pub PY_START: EventId, + pub PY_THROW: EventId, + pub PY_UNWIND: EventId, + pub PY_YIELD: EventId, + pub RAISE: EventId, + pub RERAISE: EventId, + pub STOP_ITERATION: EventId, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct ToolId { + pub id: u8, +} + +pub type CallbackFn<'py> = Bound<'py, PyCFunction>; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct EventSet(pub i32); + +pub const NO_EVENTS: EventSet = EventSet(0); + +impl EventSet { + pub const fn empty() -> Self { + NO_EVENTS + } + pub fn contains(&self, ev: &EventId) -> bool { + (self.0 & ev.0) != 0 + } +} + +pub fn acquire_tool_id(py: Python<'_>) -> PyResult { + let monitoring = py.import("sys")?.getattr("monitoring")?; + const FALLBACK_ID: u8 = 5; + monitoring.call_method1("use_tool_id", (FALLBACK_ID, MONITORING_TOOL_NAME))?; + Ok(ToolId { id: FALLBACK_ID }) +} + +pub fn load_monitoring_events(py: Python<'_>) -> PyResult { + let monitoring = py.import("sys")?.getattr("monitoring")?; + let events = monitoring.getattr("events")?; + Ok(MonitoringEvents { + BRANCH: EventId(events.getattr("BRANCH")?.extract()?), + CALL: EventId(events.getattr("CALL")?.extract()?), + C_RAISE: EventId(events.getattr("C_RAISE")?.extract()?), + C_RETURN: EventId(events.getattr("C_RETURN")?.extract()?), + EXCEPTION_HANDLED: EventId(events.getattr("EXCEPTION_HANDLED")?.extract()?), + INSTRUCTION: EventId(events.getattr("INSTRUCTION")?.extract()?), + JUMP: EventId(events.getattr("JUMP")?.extract()?), + LINE: EventId(events.getattr("LINE")?.extract()?), + PY_RESUME: EventId(events.getattr("PY_RESUME")?.extract()?), + PY_RETURN: EventId(events.getattr("PY_RETURN")?.extract()?), + PY_START: EventId(events.getattr("PY_START")?.extract()?), + PY_THROW: EventId(events.getattr("PY_THROW")?.extract()?), + PY_UNWIND: EventId(events.getattr("PY_UNWIND")?.extract()?), + PY_YIELD: EventId(events.getattr("PY_YIELD")?.extract()?), + RAISE: EventId(events.getattr("RAISE")?.extract()?), + RERAISE: EventId(events.getattr("RERAISE")?.extract()?), + STOP_ITERATION: EventId(events.getattr("STOP_ITERATION")?.extract()?), + }) +} + +static MONITORING_EVENTS: OnceLock = OnceLock::new(); + +pub fn monitoring_events(py: Python<'_>) -> PyResult<&'static MonitoringEvents> { + if let Some(ev) = MONITORING_EVENTS.get() { + return Ok(ev); + } + let ev = load_monitoring_events(py)?; + let _ = MONITORING_EVENTS.set(ev); + Ok(MONITORING_EVENTS.get().unwrap()) +} + +pub fn register_callback( + py: Python<'_>, + tool: &ToolId, + event: &EventId, + cb: Option<&CallbackFn<'_>>, +) -> PyResult<()> { + let monitoring = py.import("sys")?.getattr("monitoring")?; + match cb { + Some(cb) => { + monitoring.call_method("register_callback", (tool.id, event.0, cb), None)?; + } + None => { + monitoring.call_method("register_callback", (tool.id, event.0, py.None()), None)?; + } + } + Ok(()) +} + +pub fn events_union(ids: &[EventId]) -> EventSet { + let mut bits = 0i32; + for id in ids { + bits |= id.0; + } + EventSet(bits) +} + +pub fn set_events(py: Python<'_>, tool: &ToolId, set: EventSet) -> PyResult<()> { + let monitoring = py.import("sys")?.getattr("monitoring")?; + monitoring.call_method1("set_events", (tool.id, set.0))?; + Ok(()) +} + +pub fn free_tool_id(py: Python<'_>, tool: &ToolId) -> PyResult<()> { + let monitoring = py.import("sys")?.getattr("monitoring")?; + monitoring.call_method1("free_tool_id", (tool.id,))?; + Ok(()) +} + + +/// Trait implemented by tracing backends. +/// +/// Each method corresponds to an event from `sys.monitoring`. Default +/// implementations allow implementers to only handle the events they care +/// about. +pub trait Tracer: Send { + /// Return the set of events the tracer wants to receive. + fn interest(&self, _events: &MonitoringEvents) -> EventSet { + NO_EVENTS + } + + /// Called on Python function calls. + fn on_call( + &mut self, + _py: Python<'_>, + _code: &Bound<'_, PyAny>, + _offset: i32, + _callable: &Bound<'_, PyAny>, + _arg0: Option<&Bound<'_, PyAny>>, + ) { + } + + /// Called on line execution. + fn on_line(&mut self, _py: Python<'_>, _code: &Bound<'_, PyAny>, _lineno: u32) {} +} + +struct Global { + tracer: Box, + mask: EventSet, + tool: ToolId, +} + +static GLOBAL: Mutex> = Mutex::new(None); + +/// Install a tracer and hook it into Python's `sys.monitoring`. +pub fn install_tracer(py: Python<'_>, tracer: Box) -> PyResult<()> { + let mut guard = GLOBAL.lock().unwrap(); + if guard.is_some() { + return Err(PyRuntimeError::new_err("tracer already installed")); + } + + let tool = acquire_tool_id(py)?; + let events = monitoring_events(py)?; + + let module = PyModule::new(py, "_codetracer_callbacks")?; + + let mask = tracer.interest(events); + + if mask.contains(&events.CALL) { + let cb = wrap_pyfunction!(callback_call, &module)?; + + register_callback(py, &tool, &events.CALL, Some(&cb))?; + + } + if mask.contains(&events.LINE) { + let cb = wrap_pyfunction!(callback_line, &module)?; + register_callback(py, &tool, &events.LINE, Some(&cb))?; + } + set_events(py, &tool, mask)?; + + + *guard = Some(Global { + tracer, + mask, + tool, + }); + Ok(()) +} + +/// Remove the installed tracer if any. +pub fn uninstall_tracer(py: Python<'_>) -> PyResult<()> { + let mut guard = GLOBAL.lock().unwrap(); + if let Some(global) = guard.take() { + let events = monitoring_events(py)?; + if global.mask.contains(&events.CALL) { + register_callback(py, &global.tool, &events.CALL, None)?; + } + if global.mask.contains(&events.LINE) { + register_callback(py, &global.tool, &events.LINE, None)?; + } + set_events(py, &global.tool, NO_EVENTS)?; + free_tool_id(py, &global.tool)?; + } + Ok(()) +} + +#[pyfunction] +fn callback_call( + py: Python<'_>, + code: Bound<'_, PyAny>, + offset: i32, + callable: Bound<'_, PyAny>, + arg0: Option>, +) -> PyResult<()> { + if let Some(global) = GLOBAL.lock().unwrap().as_mut() { + global.tracer.on_call(py, &code, offset, &callable, arg0.as_ref()); + } + Ok(()) +} + +#[pyfunction] +fn callback_line(py: Python<'_>, code: Bound<'_, PyAny>, lineno: u32) -> PyResult<()> { + if let Some(global) = GLOBAL.lock().unwrap().as_mut() { + global.tracer.on_line(py, &code, lineno); + } + Ok(()) +} diff --git a/codetracer-python-recorder/tests/print_tracer.rs b/codetracer-python-recorder/tests/print_tracer.rs new file mode 100644 index 0000000..5972af7 --- /dev/null +++ b/codetracer-python-recorder/tests/print_tracer.rs @@ -0,0 +1,47 @@ +use codetracer_python_recorder::{install_tracer, uninstall_tracer, EventSet, Tracer}; +use codetracer_python_recorder::tracer::{MonitoringEvents, events_union}; +use pyo3::prelude::*; +use std::ffi::CString; +use std::sync::atomic::{AtomicUsize, Ordering}; + +static CALL_COUNT: AtomicUsize = AtomicUsize::new(0); + +struct PrintTracer; + +impl Tracer for PrintTracer { + fn interest(&self, events:&MonitoringEvents) -> EventSet { + events_union(&[events.CALL]) + } + + fn on_call( + &mut self, + _py: Python<'_>, + _code: &pyo3::Bound<'_, pyo3::types::PyAny>, + _offset: i32, + _callable: &pyo3::Bound<'_, pyo3::types::PyAny>, + _arg0: Option<&pyo3::Bound<'_, pyo3::types::PyAny>>, + ) { + CALL_COUNT.fetch_add(1, Ordering::SeqCst); + } +} + +#[test] +fn tracer_prints_on_call() { + Python::with_gil(|py| { + CALL_COUNT.store(0, Ordering::SeqCst); + if let Err(e) = install_tracer(py, Box::new(PrintTracer)) { + e.print(py); + panic!("Install Tracer failed"); + } + let code = CString::new("def foo():\n return 1\nfoo()").expect("CString::new failed"); + if let Err(e) = py.run(code.as_c_str(), None, None) { + e.print(py); + uninstall_tracer(py).ok(); + panic!("Python raised an exception"); + } + uninstall_tracer(py).unwrap(); + let count = CALL_COUNT.load(Ordering::SeqCst); + assert!(count >= 1, "expected at least one CALL event, got {}", count); + }); +} + diff --git a/design-docs/design-001.md b/design-docs/design-001.md index 1945608..063e668 100644 --- a/design-docs/design-001.md +++ b/design-docs/design-001.md @@ -8,6 +8,13 @@ The tracer collects `sys.monitoring` events, converts them to `runtime_tracing` ## Architecture +### Tracer Abstraction +Rust code exposes a `Tracer` trait representing callbacks for Python +`sys.monitoring` events. Implementations advertise their desired events via an +`EventMask` bit flag returned from `interest`. A `Dispatcher` wraps a trait +object and forwards events only when the mask contains the corresponding flag, +allowing tracers to implement just the methods they care about. + ### Tool Initialization - Acquire a tool identifier via `sys.monitoring.use_tool_id`; store it for the lifetime of the tracer. ```rs