diff --git a/.config/nextest.toml b/.config/nextest.toml new file mode 100644 index 0000000..820ad89 --- /dev/null +++ b/.config/nextest.toml @@ -0,0 +1,3 @@ +[profile.default] +# Hard cap on wall-clock time for each individual test. +test-timeout = "60s" diff --git a/Justfile b/Justfile index d4429ad..a5c15e4 100644 --- a/Justfile +++ b/Justfile @@ -38,7 +38,7 @@ test: cargo-test py-test # Run Rust unit tests without default features to link Python C library cargo-test: - uv run cargo test --manifest-path codetracer-python-recorder/Cargo.toml --no-default-features + uv run cargo nextest run --manifest-path codetracer-python-recorder/Cargo.toml --no-default-features py-test: uv run --group dev --group test pytest diff --git a/codetracer-python-recorder/Cargo.lock b/codetracer-python-recorder/Cargo.lock index 755285a..09078b0 100644 --- a/codetracer-python-recorder/Cargo.lock +++ b/codetracer-python-recorder/Cargo.lock @@ -134,6 +134,7 @@ dependencies = [ "once_cell", "pyo3", "runtime_tracing", + "tempfile", ] [[package]] @@ -184,6 +185,22 @@ dependencies = [ "log", ] +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "fscommon" version = "0.1.1" @@ -275,6 +292,12 @@ version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + [[package]] name = "lock_api" version = "0.4.13" @@ -515,6 +538,19 @@ dependencies = [ "zeekstd", ] +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + [[package]] name = "ryu" version = "1.0.20" @@ -599,6 +635,19 @@ version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" +[[package]] +name = "tempfile" +version = "3.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + [[package]] name = "unicode-ident" version = "1.0.18" diff --git a/codetracer-python-recorder/Cargo.toml b/codetracer-python-recorder/Cargo.toml index 76ea493..714f362 100644 --- a/codetracer-python-recorder/Cargo.toml +++ b/codetracer-python-recorder/Cargo.toml @@ -25,3 +25,4 @@ env_logger = "0.11" [dev-dependencies] pyo3 = { version = "0.25.1", features = ["auto-initialize"] } +tempfile = "3.10" diff --git a/codetracer-python-recorder/src/code_object.rs b/codetracer-python-recorder/src/code_object.rs index cf9e04d..d1dfbfa 100644 --- a/codetracer-python-recorder/src/code_object.rs +++ b/codetracer-python-recorder/src/code_object.rs @@ -1,7 +1,7 @@ +use dashmap::DashMap; use once_cell::sync::OnceCell; use pyo3::prelude::*; use pyo3::types::PyCode; -use dashmap::DashMap; use std::sync::Arc; /// A wrapper around Python `code` objects providing cached access to @@ -50,44 +50,41 @@ impl CodeObjectWrapper { } pub fn filename<'py>(&'py self, py: Python<'py>) -> PyResult<&'py str> { - let value = self.cache.filename.get_or_try_init(|| -> PyResult { - let s: String = self - .as_bound(py) - .getattr("co_filename")? - .extract()?; - Ok(s) - })?; + let value = self + .cache + .filename + .get_or_try_init(|| -> PyResult { + let s: String = self.as_bound(py).getattr("co_filename")?.extract()?; + Ok(s) + })?; Ok(value.as_str()) } pub fn qualname<'py>(&'py self, py: Python<'py>) -> PyResult<&'py str> { - let value = self.cache.qualname.get_or_try_init(|| -> PyResult { - let s: String = self - .as_bound(py) - .getattr("co_qualname")? - .extract()?; - Ok(s) - })?; + let value = self + .cache + .qualname + .get_or_try_init(|| -> PyResult { + let s: String = self.as_bound(py).getattr("co_qualname")?.extract()?; + Ok(s) + })?; Ok(value.as_str()) } pub fn first_line(&self, py: Python<'_>) -> PyResult { - let value = *self.cache.firstlineno.get_or_try_init(|| -> PyResult { - let v: u32 = self - .as_bound(py) - .getattr("co_firstlineno")? - .extract()?; - Ok(v) - })?; + let value = *self + .cache + .firstlineno + .get_or_try_init(|| -> PyResult { + let v: u32 = self.as_bound(py).getattr("co_firstlineno")?.extract()?; + Ok(v) + })?; Ok(value) } pub fn arg_count(&self, py: Python<'_>) -> PyResult { let value = *self.cache.argcount.get_or_try_init(|| -> PyResult { - let v: u16 = self - .as_bound(py) - .getattr("co_argcount")? - .extract()?; + let v: u16 = self.as_bound(py).getattr("co_argcount")?.extract()?; Ok(v) })?; Ok(value) @@ -95,28 +92,31 @@ impl CodeObjectWrapper { pub fn flags(&self, py: Python<'_>) -> PyResult { let value = *self.cache.flags.get_or_try_init(|| -> PyResult { - let v: u32 = self - .as_bound(py) - .getattr("co_flags")? - .extract()?; + let v: u32 = self.as_bound(py).getattr("co_flags")?.extract()?; Ok(v) })?; Ok(value) } fn lines<'py>(&'py self, py: Python<'py>) -> PyResult<&'py [LineEntry]> { - let vec = self.cache.lines.get_or_try_init(|| -> PyResult> { - let mut entries = Vec::new(); - let iter = self.as_bound(py).call_method0("co_lines")?; - let iter = iter.try_iter()?; - for item in iter { - let (start, _end, line): (u32, u32, Option) = item?.extract()?; - if let Some(line) = line { - entries.push(LineEntry { offset: start, line }); + let vec = self + .cache + .lines + .get_or_try_init(|| -> PyResult> { + let mut entries = Vec::new(); + let iter = self.as_bound(py).call_method0("co_lines")?; + let iter = iter.try_iter()?; + for item in iter { + let (start, _end, line): (u32, u32, Option) = item?.extract()?; + if let Some(line) = line { + entries.push(LineEntry { + offset: start, + line, + }); + } } - } - Ok(entries) - })?; + Ok(entries) + })?; Ok(vec.as_slice()) } @@ -161,4 +161,3 @@ impl CodeObjectRegistry { self.map.clear(); } } - diff --git a/codetracer-python-recorder/src/lib.rs b/codetracer-python-recorder/src/lib.rs index 314a0ff..585ffdf 100644 --- a/codetracer-python-recorder/src/lib.rs +++ b/codetracer-python-recorder/src/lib.rs @@ -1,17 +1,23 @@ +//! Runtime tracing module backed by PyO3. +//! +//! Tracer implementations must return `CallbackResult` from every callback so they can +//! signal when CPython should disable further monitoring for a location by propagating +//! the `sys.monitoring.DISABLE` sentinel. + use std::fs; -use std::path::{PathBuf, Path}; +use std::path::Path; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Once; use pyo3::exceptions::PyRuntimeError; use pyo3::prelude::*; -use std::fmt; - pub mod code_object; -pub mod tracer; mod runtime_tracer; +pub mod tracer; pub use crate::code_object::{CodeObjectRegistry, CodeObjectWrapper}; -pub use crate::tracer::{install_tracer, uninstall_tracer, EventSet, Tracer}; +pub use crate::tracer::{ + install_tracer, uninstall_tracer, CallbackOutcome, CallbackResult, EventSet, Tracer, +}; /// Global flag tracking whether tracing is active. static ACTIVE: AtomicBool = AtomicBool::new(false); @@ -25,20 +31,14 @@ fn init_rust_logging_with_default(default_filter: &str) { let env = env_logger::Env::default().default_filter_or(default_filter); // Use a compact format with timestamps and targets to aid debugging. let mut builder = env_logger::Builder::from_env(env); - builder - .format_timestamp_micros() - .format_target(true); + builder.format_timestamp_micros().format_target(true); let _ = builder.try_init(); }); } /// Start tracing using sys.monitoring and runtime_tracing writer. #[pyfunction] -fn start_tracing( - path: &str, - format: &str, - activation_path: Option<&str>, -) -> PyResult<()> { +fn start_tracing(path: &str, format: &str, activation_path: Option<&str>) -> PyResult<()> { // Ensure logging is ready before any tracer logs might be emitted. // Default only our crate to debug to avoid excessive verbosity from deps. init_rust_logging_with_default("codetracer_python_recorder=debug"); @@ -49,26 +49,31 @@ fn start_tracing( // Interpret `path` as a directory where trace files will be written. let out_dir = Path::new(path); if out_dir.exists() && !out_dir.is_dir() { - return Err(PyRuntimeError::new_err("trace path exists and is not a directory")); + return Err(PyRuntimeError::new_err( + "trace path exists and is not a directory", + )); } if !out_dir.exists() { // Best-effort create the directory tree - fs::create_dir_all(&out_dir) - .map_err(|e| PyRuntimeError::new_err(format!("failed to create trace directory: {}", e)))?; + fs::create_dir_all(&out_dir).map_err(|e| { + PyRuntimeError::new_err(format!("failed to create trace directory: {}", e)) + })?; } // Map format string to enum let fmt = match format.to_lowercase().as_str() { "json" => runtime_tracing::TraceEventsFileFormat::Json, // Use BinaryV0 for "binary" to avoid streaming writer here. - "binary" | "binaryv0" | "binary_v0" | "b0" => runtime_tracing::TraceEventsFileFormat::BinaryV0, + "binary" | "binaryv0" | "binary_v0" | "b0" => { + runtime_tracing::TraceEventsFileFormat::BinaryV0 + } //TODO AI! We need to assert! that the format is among the known values. other => { eprintln!("Unknown format '{}', defaulting to binary (v0)", other); runtime_tracing::TraceEventsFileFormat::BinaryV0 } }; - + // Build output file paths inside the directory. let (events_path, meta_path, paths_path) = match fmt { runtime_tracing::TraceEventsFileFormat::Json => ( @@ -90,17 +95,10 @@ fn start_tracing( // Program and args: keep minimal; Python-side API stores full session info if needed let sys = py.import("sys")?; let argv = sys.getattr("argv")?; - let program: String = argv - .get_item(0)? - .extract::()?; + let program: String = argv.get_item(0)?.extract::()?; //TODO: Error-handling. What to do if argv is empty? Does this ever happen? - let mut tracer = runtime_tracer::RuntimeTracer::new( - &program, - &[], - fmt, - activation_path, - ); + let mut tracer = runtime_tracer::RuntimeTracer::new(&program, &[], fmt, activation_path); // Start location: prefer activation path, otherwise best-effort argv[0] let start_path: &Path = activation_path.unwrap_or(Path::new(&program)); diff --git a/codetracer-python-recorder/src/runtime_tracer.rs b/codetracer-python-recorder/src/runtime_tracer.rs index 3599cf2..bceefa4 100644 --- a/codetracer-python-recorder/src/runtime_tracer.rs +++ b/codetracer-python-recorder/src/runtime_tracer.rs @@ -1,13 +1,24 @@ +use std::collections::HashSet; use std::path::{Path, PathBuf}; use pyo3::prelude::*; -use pyo3::types::{PyAny, PyList, PyTuple, PyDict}; +use pyo3::types::{PyAny, PyDict, PyList, PyMapping, PyTuple}; +use pyo3::{ffi, PyErr}; -use runtime_tracing::{Line, TraceEventsFileFormat, TraceWriter, TypeKind, ValueRecord, NONE_VALUE}; use runtime_tracing::NonStreamingTraceWriter; +use runtime_tracing::{ + Line, TraceEventsFileFormat, TraceWriter, TypeKind, ValueRecord, NONE_VALUE, +}; use crate::code_object::CodeObjectWrapper; -use crate::tracer::{events_union, EventSet, MonitoringEvents, Tracer}; +use crate::tracer::{ + events_union, CallbackOutcome, CallbackResult, EventSet, MonitoringEvents, Tracer, +}; + +extern "C" { + fn PyFrame_GetLocals(frame: *mut ffi::PyFrameObject) -> *mut ffi::PyObject; + fn PyFrame_GetGlobals(frame: *mut ffi::PyFrameObject) -> *mut ffi::PyObject; +} // Logging is handled via the `log` crate macros (e.g., log::debug!). @@ -25,6 +36,18 @@ pub struct RuntimeTracer { // Whether we've already completed a one-shot activation window activation_done: bool, started: bool, + ignored_code_ids: HashSet, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum ShouldTrace { + Trace, + SkipAndDisable, +} + +fn is_real_filename(filename: &str) -> bool { + let trimmed = filename.trim(); + !(trimmed.starts_with('<') && trimmed.ends_with('>')) } impl RuntimeTracer { @@ -46,14 +69,24 @@ impl RuntimeTracer { activation_code_id: None, activation_done: false, started, + ignored_code_ids: HashSet::new(), } } /// Configure output files and write initial metadata records. - pub fn begin(&mut self, meta_path: &Path, paths_path: &Path, events_path: &Path, start_path: &Path, start_line: u32) -> PyResult<()> { - TraceWriter::begin_writing_trace_metadata(&mut self.writer, meta_path).map_err(to_py_err)?; + pub fn begin( + &mut self, + meta_path: &Path, + paths_path: &Path, + events_path: &Path, + start_path: &Path, + start_line: u32, + ) -> PyResult<()> { + TraceWriter::begin_writing_trace_metadata(&mut self.writer, meta_path) + .map_err(to_py_err)?; TraceWriter::begin_writing_trace_paths(&mut self.writer, paths_path).map_err(to_py_err)?; - TraceWriter::begin_writing_trace_events(&mut self.writer, events_path).map_err(to_py_err)?; + TraceWriter::begin_writing_trace_events(&mut self.writer, events_path) + .map_err(to_py_err)?; TraceWriter::start(&mut self.writer, start_path, Line(start_line as i64)); Ok(()) } @@ -61,7 +94,9 @@ impl RuntimeTracer { /// Return true when tracing is active; may become true on first event /// from the activation file if configured. fn ensure_started<'py>(&mut self, py: Python<'py>, code: &CodeObjectWrapper) { - if self.started || self.activation_done { return; } + if self.started || self.activation_done { + return; + } if let Some(activation) = &self.activation_path { if let Ok(filename) = code.filename(py) { let f = Path::new(filename); @@ -71,7 +106,10 @@ impl RuntimeTracer { if f == activation { self.started = true; self.activation_code_id = Some(code.id()); - log::debug!("[RuntimeTracer] activated on enter: {}", activation.display()); + log::debug!( + "[RuntimeTracer] activated on enter: {}", + activation.display() + ); } } } @@ -107,7 +145,10 @@ impl RuntimeTracer { // not allowed. if let Ok(s) = v.extract::() { let ty = TraceWriter::ensure_type_id(&mut self.writer, TypeKind::String, "String"); - return ValueRecord::String { text: s, type_id: ty }; + return ValueRecord::String { + text: s, + type_id: ty, + }; } // Python tuple -> ValueRecord::Tuple with recursively-encoded elements @@ -118,7 +159,10 @@ impl RuntimeTracer { elements.push(self.encode_value(_py, &item)); } let ty = TraceWriter::ensure_type_id(&mut self.writer, TypeKind::Tuple, "Tuple"); - return ValueRecord::Tuple { elements, type_id: ty }; + return ValueRecord::Tuple { + elements, + type_id: ty, + }; } // Python list -> ValueRecord::Sequence with recursively-encoded elements @@ -128,7 +172,11 @@ impl RuntimeTracer { elements.push(self.encode_value(_py, &item)); } let ty = TraceWriter::ensure_type_id(&mut self.writer, TypeKind::Seq, "List"); - return ValueRecord::Sequence { elements, is_slice: false, type_id: ty }; + return ValueRecord::Sequence { + elements, + is_slice: false, + type_id: ty, + }; } // Python dict -> represent as a Sequence of (key, value) Tuples. @@ -146,34 +194,75 @@ impl RuntimeTracer { let key_obj = t.get_item(0).unwrap(); let val_obj = t.get_item(1).unwrap(); let key_rec = if let Ok(s) = key_obj.extract::() { - ValueRecord::String { text: s, type_id: str_ty } + ValueRecord::String { + text: s, + type_id: str_ty, + } } else { self.encode_value(_py, &key_obj) }; let val_rec = self.encode_value(_py, &val_obj); - let pair_rec = ValueRecord::Tuple { elements: vec![key_rec, val_rec], type_id: tuple_ty }; + let pair_rec = ValueRecord::Tuple { + elements: vec![key_rec, val_rec], + type_id: tuple_ty, + }; elements.push(pair_rec); } } } - return ValueRecord::Sequence { elements, is_slice: false, type_id: seq_ty }; + return ValueRecord::Sequence { + elements, + is_slice: false, + type_id: seq_ty, + }; } // Fallback to Raw string representation let ty = TraceWriter::ensure_type_id(&mut self.writer, TypeKind::Raw, "Object"); match v.str() { - Ok(s) => ValueRecord::Raw { r: s.to_string_lossy().into_owned(), type_id: ty }, - Err(_) => ValueRecord::Error { msg: "".to_string(), type_id: ty }, + Ok(s) => ValueRecord::Raw { + r: s.to_string_lossy().into_owned(), + type_id: ty, + }, + Err(_) => ValueRecord::Error { + msg: "".to_string(), + type_id: ty, + }, } } - fn ensure_function_id(&mut self, py: Python<'_>, code: &CodeObjectWrapper) -> PyResult { + fn ensure_function_id( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + ) -> PyResult { //TODO AI! current runtime_tracer logic expects that `name` is unique and is used as a key for the function. //This is wrong. We need to write a test that exposes this issue let name = code.qualname(py)?; let filename = code.filename(py)?; let first_line = code.first_line(py)?; - Ok(TraceWriter::ensure_function_id(&mut self.writer, name, Path::new(filename), Line(first_line as i64))) + Ok(TraceWriter::ensure_function_id( + &mut self.writer, + name, + Path::new(filename), + Line(first_line as i64), + )) + } + + fn should_trace_code(&mut self, py: Python<'_>, code: &CodeObjectWrapper) -> ShouldTrace { + let code_id = code.id(); + if self.ignored_code_ids.contains(&code_id) { + return ShouldTrace::SkipAndDisable; + } + let filename = code + .filename(py) + .expect("RuntimeTracer::should_trace_code failed to resolve filename"); + if is_real_filename(filename) { + ShouldTrace::Trace + } else { + self.ignored_code_ids.insert(code_id); + ShouldTrace::SkipAndDisable + } } } @@ -187,43 +276,65 @@ impl Tracer for RuntimeTracer { events_union(&[events.PY_START, events.LINE, events.PY_RETURN]) } - fn on_py_start(&mut self, py: Python<'_>, code: &CodeObjectWrapper, _offset: i32) -> PyResult<()> { - // Activate lazily if configured; ignore until then + fn on_py_start( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + _offset: i32, + ) -> CallbackResult { self.ensure_started(py, code); - if !self.started { return Ok(()); } - // Trace event entry + if matches!( + self.should_trace_code(py, code), + ShouldTrace::SkipAndDisable + ) { + return Ok(CallbackOutcome::DisableLocation); + } + if !self.started { + return Ok(CallbackOutcome::Continue); + } + match (code.filename(py), code.qualname(py)) { (Ok(fname), Ok(qname)) => { log::debug!("[RuntimeTracer] on_py_start: {} ({})", qname, fname) } _ => log::debug!("[RuntimeTracer] on_py_start"), } + if let Ok(fid) = self.ensure_function_id(py, code) { - // Attempt to capture function arguments from the current frame. - // Fail fast on any error per source-code rules. let mut args: Vec = Vec::new(); let frame_and_args = (|| -> PyResult<()> { - // Current Python frame where the function just started executing - let sys = py.import("sys")?; - let frame = sys.getattr("_getframe")?.call1((0,))?; - let locals = frame.getattr("f_locals")?; - - // Argument names come from co_varnames in the order defined by CPython: - // [positional (pos-only + pos-or-kw)] [+ varargs] [+ kw-only] [+ kwargs] - // In CPython 3.8+ semantics, `co_argcount` is the TOTAL number of positional - // parameters (including positional-only and pos-or-keyword). Use it directly - // for the positional slice; `co_posonlyargcount` is only needed if we want to - // distinguish the two groups, which we do not here. - let argcount = code.arg_count(py)? as usize; // total positional (pos-only + pos-or-kw) - let posonly: usize = code - .as_bound(py) - .getattr("co_posonlyargcount")? - .extract()?; - let kwonly: usize = code - .as_bound(py) - .getattr("co_kwonlyargcount")? - .extract()?; + let frame_ptr = unsafe { ffi::PyEval_GetFrame() }; + if frame_ptr.is_null() { + return Err(pyo3::exceptions::PyRuntimeError::new_err( + "on_py_start: null frame", + )); + } + unsafe { + ffi::Py_XINCREF(frame_ptr.cast()); + } + unsafe { + if ffi::PyFrame_FastToLocalsWithError(frame_ptr) < 0 { + ffi::Py_DECREF(frame_ptr.cast()); + let err = PyErr::fetch(py); + return Err(err); + } + } + + let locals_raw = unsafe { PyFrame_GetLocals(frame_ptr) }; + if locals_raw.is_null() { + unsafe { + ffi::Py_DECREF(frame_ptr.cast()); + } + return Err(pyo3::exceptions::PyRuntimeError::new_err( + "on_py_start: PyFrame_GetLocals returned null", + )); + } + let locals = unsafe { Bound::::from_owned_ptr(py, locals_raw) }; + + let argcount = code.arg_count(py)? as usize; + let _posonly: usize = code.as_bound(py).getattr("co_posonlyargcount")?.extract()?; + let kwonly: usize = code.as_bound(py).getattr("co_kwonlyargcount")?.extract()?; let flags = code.flags(py)?; const CO_VARARGS: u32 = 0x04; const CO_VARKEYWORDS: u32 = 0x08; @@ -231,9 +342,7 @@ impl Tracer for RuntimeTracer { let varnames_obj = code.as_bound(py).getattr("co_varnames")?; let varnames: Vec = varnames_obj.extract()?; - // 1) Positional parameters (pos-only + pos-or-kw) let mut idx = 0usize; - // `argcount` already includes positional-only parameters let take_n = std::cmp::min(argcount, varnames.len()); for name in varnames.iter().take(take_n) { match locals.get_item(name) { @@ -241,12 +350,13 @@ impl Tracer for RuntimeTracer { let vrec = self.encode_value(py, &val); args.push(TraceWriter::arg(&mut self.writer, name, vrec)); } - Err(_) => {} + Err(e) => { + panic!("Error {:?}", e) + } } idx += 1; } - // 2) Varargs (*args) if (flags & CO_VARARGS) != 0 && idx < varnames.len() { let name = &varnames[idx]; if let Ok(val) = locals.get_item(name) { @@ -256,7 +366,6 @@ impl Tracer for RuntimeTracer { idx += 1; } - // 3) Keyword-only parameters let kwonly_take = std::cmp::min(kwonly, varnames.len().saturating_sub(idx)); for name in varnames.iter().skip(idx).take(kwonly_take) { match locals.get_item(name) { @@ -264,12 +373,13 @@ impl Tracer for RuntimeTracer { let vrec = self.encode_value(py, &val); args.push(TraceWriter::arg(&mut self.writer, name, vrec)); } - Err(_) => {} + Err(e) => { + panic!("Error {:?}", e) + } } } idx = idx.saturating_add(kwonly_take); - // 4) Kwargs (**kwargs) if (flags & CO_VARKEYWORDS) != 0 && idx < varnames.len() { let name = &varnames[idx]; if let Ok(val) = locals.get_item(name) { @@ -277,36 +387,148 @@ impl Tracer for RuntimeTracer { args.push(TraceWriter::arg(&mut self.writer, name, vrec)); } } + unsafe { + ffi::Py_DECREF(frame_ptr.cast()); + } Ok(()) })(); + if let Err(e) = frame_and_args { - // Raise a clear error; do not silently continue with empty args. - let rete =Err(pyo3::exceptions::PyRuntimeError::new_err(format!( - "on_py_start: failed to capture args: {}", - e - ))); - log::debug!("error {:?}", rete); - return rete; + let message = format!("on_py_start: failed to capture args: {}", e); + log::error!("{message}"); + return Err(pyo3::exceptions::PyRuntimeError::new_err(message)); } TraceWriter::register_call(&mut self.writer, fid, args); } - Ok(()) + + Ok(CallbackOutcome::Continue) } - fn on_line(&mut self, py: Python<'_>, code: &CodeObjectWrapper, lineno: u32) { - // Activate lazily if configured; ignore until then + fn on_line(&mut self, py: Python<'_>, code: &CodeObjectWrapper, lineno: u32) -> CallbackResult { self.ensure_started(py, code); - if !self.started { return; } - // Trace event entry + if matches!( + self.should_trace_code(py, code), + ShouldTrace::SkipAndDisable + ) { + return Ok(CallbackOutcome::DisableLocation); + } + if !self.started { + return Ok(CallbackOutcome::Continue); + } + if let Ok(fname) = code.filename(py) { log::debug!("[RuntimeTracer] on_line: {}:{}", fname, lineno); } else { log::debug!("[RuntimeTracer] on_line: :{}", lineno); } + + let mut frame_ptr = unsafe { ffi::PyEval_GetFrame() }; + if frame_ptr.is_null() { + panic!("PyEval_GetFrame returned null frame"); + } + unsafe { + ffi::Py_XINCREF(frame_ptr.cast()); + } + let target_code_ptr = code.as_bound(py).as_ptr(); + loop { + if frame_ptr.is_null() { + break; + } + let frame_code_ptr = unsafe { ffi::PyFrame_GetCode(frame_ptr) }; + if frame_code_ptr.is_null() { + unsafe { + ffi::Py_DECREF(frame_ptr.cast()); + } + panic!("PyFrame_GetCode returned null"); + } + let frame_code: Py = + unsafe { Py::from_owned_ptr(py, frame_code_ptr as *mut ffi::PyObject) }; + if frame_code.as_ptr() == target_code_ptr { + break; + } + let back = unsafe { ffi::PyFrame_GetBack(frame_ptr) }; + unsafe { + ffi::Py_DECREF(frame_ptr.cast()); + } + frame_ptr = back; + } + if frame_ptr.is_null() { + panic!("Failed to locate frame for code object {}", code.id()); + } + + // Synchronise fast locals so PyFrame_GetLocals sees current values. + unsafe { + if ffi::PyFrame_FastToLocalsWithError(frame_ptr) < 0 { + ffi::Py_DECREF(frame_ptr.cast()); + let err = PyErr::fetch(py); + panic!("Failed to sync frame locals: {err}"); + } + } + if let Ok(filename) = code.filename(py) { TraceWriter::register_step(&mut self.writer, Path::new(filename), Line(lineno as i64)); } + + // Obtain concrete dict objects for iteration. + let locals_raw = unsafe { PyFrame_GetLocals(frame_ptr) }; + if locals_raw.is_null() { + unsafe { + ffi::Py_DECREF(frame_ptr.cast()); + } + panic!("PyFrame_GetLocals returned null"); + } + let globals_raw = unsafe { PyFrame_GetGlobals(frame_ptr) }; + if globals_raw.is_null() { + unsafe { + ffi::Py_DECREF(frame_ptr.cast()); + } + panic!("PyFrame_GetGlobals returned null"); + } + let locals_is_globals = locals_raw == globals_raw; + let locals_any = unsafe { Bound::::from_owned_ptr(py, locals_raw) }; + let globals_any = unsafe { Bound::::from_owned_ptr(py, globals_raw) }; + let locals_mapping = locals_any + .downcast::() + .expect("Frame locals was not a mapping"); + let globals_mapping = globals_any + .downcast::() + .expect("Frame globals was not a mapping"); + let locals_dict = PyDict::new(py); + locals_dict + .update(&locals_mapping) + .expect("Failed to materialize locals dict"); + let globals_dict = PyDict::new(py); + globals_dict + .update(&globals_mapping) + .expect("Failed to materialize globals dict"); + + let mut recorded: HashSet = HashSet::new(); + + for (key, value) in locals_dict.iter() { + let name: String = key.extract().expect("Local name was not a string"); + let encoded = self.encode_value(py, &value); + TraceWriter::register_variable_with_full_value(&mut self.writer, &name, encoded); + recorded.insert(name); + } + + if !locals_is_globals { + for (key, value) in globals_dict.iter() { + let name: String = key.extract().expect("Global name was not a string"); + if name == "__builtins__" || recorded.contains(&name) { + continue; + } + let encoded = self.encode_value(py, &value); + TraceWriter::register_variable_with_full_value(&mut self.writer, &name, encoded); + recorded.insert(name); + } + } + + unsafe { + ffi::Py_DECREF(frame_ptr.cast()); + } + + Ok(CallbackOutcome::Continue) } fn on_py_return( @@ -315,18 +537,30 @@ impl Tracer for RuntimeTracer { code: &CodeObjectWrapper, _offset: i32, retval: &Bound<'_, PyAny>, - ) { - // Activate lazily if configured; ignore until then + ) -> CallbackResult { self.ensure_started(py, code); - if !self.started { return; } - // Trace event entry + if matches!( + self.should_trace_code(py, code), + ShouldTrace::SkipAndDisable + ) { + return Ok(CallbackOutcome::DisableLocation); + } + if !self.started { + return Ok(CallbackOutcome::Continue); + } + match (code.filename(py), code.qualname(py)) { - (Ok(fname), Ok(qname)) => log::debug!("[RuntimeTracer] on_py_return: {} ({})", qname, fname), + (Ok(fname), Ok(qname)) => { + log::debug!("[RuntimeTracer] on_py_return: {} ({})", qname, fname) + } _ => log::debug!("[RuntimeTracer] on_py_return"), } - // Determine whether this is the activation owner's return - let is_activation_return = self.activation_code_id.map(|id| id == code.id()).unwrap_or(false); - + + let is_activation_return = self + .activation_code_id + .map(|id| id == code.id()) + .unwrap_or(false); + let val = self.encode_value(py, retval); TraceWriter::register_return(&mut self.writer, val); if is_activation_return { @@ -334,6 +568,8 @@ impl Tracer for RuntimeTracer { self.activation_done = true; log::debug!("[RuntimeTracer] deactivated on activation return"); } + + Ok(CallbackOutcome::Continue) } fn flush(&mut self, _py: Python<'_>) -> PyResult<()> { @@ -348,6 +584,7 @@ impl Tracer for RuntimeTracer { // Streaming writer: no partial flush to avoid closing the stream. } } + self.ignored_code_ids.clear(); Ok(()) } @@ -357,6 +594,709 @@ impl Tracer for RuntimeTracer { TraceWriter::finish_writing_trace_metadata(&mut self.writer).map_err(to_py_err)?; TraceWriter::finish_writing_trace_paths(&mut self.writer).map_err(to_py_err)?; TraceWriter::finish_writing_trace_events(&mut self.writer).map_err(to_py_err)?; + self.ignored_code_ids.clear(); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::tracer::CallbackOutcome; + use pyo3::types::{PyCode, PyModule}; + use pyo3::wrap_pyfunction; + use runtime_tracing::{FullValueRecord, TraceLowLevelEvent, ValueRecord}; + use std::cell::Cell; + use std::collections::BTreeMap; + use std::ffi::CString; + + thread_local! { + static ACTIVE_TRACER: Cell<*mut RuntimeTracer> = Cell::new(std::ptr::null_mut()); + static LAST_OUTCOME: Cell> = Cell::new(None); + } + + struct ScopedTracer; + + impl ScopedTracer { + fn new(tracer: &mut RuntimeTracer) -> Self { + let ptr = tracer as *mut _; + ACTIVE_TRACER.with(|cell| cell.set(ptr)); + ScopedTracer + } + } + + impl Drop for ScopedTracer { + fn drop(&mut self) { + ACTIVE_TRACER.with(|cell| cell.set(std::ptr::null_mut())); + } + } + + fn last_outcome() -> Option { + LAST_OUTCOME.with(|cell| cell.get()) + } + + #[test] + fn detects_real_filenames() { + assert!(is_real_filename("example.py")); + assert!(is_real_filename(" /tmp/module.py ")); + assert!(is_real_filename("src/.py")); + assert!(!is_real_filename("")); + assert!(!is_real_filename(" ")); + assert!(!is_real_filename("")); + } + + #[test] + fn skips_synthetic_filename_events() { + Python::with_gil(|py| { + let mut tracer = RuntimeTracer::new("test.py", &[], TraceEventsFileFormat::Json, None); + ensure_test_module(py); + let script = format!("{PRELUDE}\nsnapshot()\n"); + { + let _guard = ScopedTracer::new(&mut tracer); + LAST_OUTCOME.with(|cell| cell.set(None)); + let script_c = CString::new(script).expect("script contains nul byte"); + py.run(script_c.as_c_str(), None, None) + .expect("execute synthetic script"); + } + assert!( + tracer.writer.events.is_empty(), + "expected no events for synthetic filename" + ); + assert_eq!(last_outcome(), Some(CallbackOutcome::DisableLocation)); + + let compile_fn = py + .import("builtins") + .expect("import builtins") + .getattr("compile") + .expect("fetch compile"); + let binding = compile_fn + .call1(("pass", "", "exec")) + .expect("compile code object"); + let code_obj = binding.downcast::().expect("downcast code object"); + let wrapper = CodeObjectWrapper::new(py, &code_obj); + assert_eq!( + tracer.should_trace_code(py, &wrapper), + ShouldTrace::SkipAndDisable + ); + }); + } + + #[test] + fn traces_real_file_events() { + let snapshots = run_traced_script("snapshot()\n"); + assert!( + !snapshots.is_empty(), + "expected snapshots for real file execution" + ); + assert_eq!(last_outcome(), Some(CallbackOutcome::Continue)); + } + + #[test] + fn callbacks_do_not_import_sys_monitoring() { + let body = r#" +import builtins +_orig_import = builtins.__import__ + +def guard(name, *args, **kwargs): + if name == "sys.monitoring": + raise RuntimeError("callback imported sys.monitoring") + return _orig_import(name, *args, **kwargs) + +builtins.__import__ = guard +try: + snapshot() +finally: + builtins.__import__ = _orig_import +"#; + let snapshots = run_traced_script(body); + assert!( + !snapshots.is_empty(), + "expected snapshots when import guard active" + ); + assert_eq!(last_outcome(), Some(CallbackOutcome::Continue)); + } + + #[pyfunction] + fn capture_line(py: Python<'_>, code: Bound<'_, PyCode>, lineno: u32) -> PyResult<()> { + ACTIVE_TRACER.with(|cell| -> PyResult<()> { + let ptr = cell.get(); + if ptr.is_null() { + panic!("No active RuntimeTracer for capture_line"); + } + unsafe { + let tracer = &mut *ptr; + let wrapper = CodeObjectWrapper::new(py, &code); + match tracer.on_line(py, &wrapper, lineno) { + Ok(outcome) => { + LAST_OUTCOME.with(|cell| cell.set(Some(outcome))); + Ok(()) + } + Err(err) => Err(err), + } + } + })?; Ok(()) } + + const PRELUDE: &str = r#" +import inspect +from test_tracer import capture_line + +def snapshot(line=None): + frame = inspect.currentframe().f_back + lineno = frame.f_lineno if line is None else line + capture_line(frame.f_code, lineno) + +def snap(value): + frame = inspect.currentframe().f_back + capture_line(frame.f_code, frame.f_lineno) + return value +"#; + + #[derive(Debug, Clone, PartialEq)] + enum SimpleValue { + None, + Bool(bool), + Int(i64), + String(String), + Tuple(Vec), + Sequence(Vec), + Raw(String), + } + + impl SimpleValue { + fn from_value(value: &ValueRecord) -> Self { + match value { + ValueRecord::None { .. } => SimpleValue::None, + ValueRecord::Bool { b, .. } => SimpleValue::Bool(*b), + ValueRecord::Int { i, .. } => SimpleValue::Int(*i), + ValueRecord::String { text, .. } => SimpleValue::String(text.clone()), + ValueRecord::Tuple { elements, .. } => { + SimpleValue::Tuple(elements.iter().map(SimpleValue::from_value).collect()) + } + ValueRecord::Sequence { elements, .. } => { + SimpleValue::Sequence(elements.iter().map(SimpleValue::from_value).collect()) + } + ValueRecord::Raw { r, .. } => SimpleValue::Raw(r.clone()), + ValueRecord::Error { msg, .. } => SimpleValue::Raw(msg.clone()), + other => SimpleValue::Raw(format!("{other:?}")), + } + } + } + + #[derive(Debug)] + struct Snapshot { + line: i64, + vars: BTreeMap, + } + + fn collect_snapshots(events: &[TraceLowLevelEvent]) -> Vec { + let mut names: Vec = Vec::new(); + let mut snapshots: Vec = Vec::new(); + let mut current: Option = None; + for event in events { + match event { + TraceLowLevelEvent::VariableName(name) => names.push(name.clone()), + TraceLowLevelEvent::Step(step) => { + if let Some(snapshot) = current.take() { + snapshots.push(snapshot); + } + current = Some(Snapshot { + line: step.line.0, + vars: BTreeMap::new(), + }); + } + TraceLowLevelEvent::Value(FullValueRecord { variable_id, value }) => { + if let Some(snapshot) = current.as_mut() { + let index = variable_id.0; + let name = names + .get(index) + .cloned() + .unwrap_or_else(|| panic!("Missing variable name for id {}", index)); + snapshot.vars.insert(name, SimpleValue::from_value(value)); + } + } + _ => {} + } + } + if let Some(snapshot) = current.take() { + snapshots.push(snapshot); + } + snapshots + } + + fn ensure_test_module(py: Python<'_>) { + let module = PyModule::new(py, "test_tracer").expect("create module"); + module + .add_function(wrap_pyfunction!(capture_line, &module).expect("wrap capture_line")) + .expect("add function"); + py.import("sys") + .expect("import sys") + .getattr("modules") + .expect("modules attr") + .set_item("test_tracer", module) + .expect("insert module"); + } + + fn run_traced_script(body: &str) -> Vec { + Python::with_gil(|py| { + let mut tracer = RuntimeTracer::new("test.py", &[], TraceEventsFileFormat::Json, None); + ensure_test_module(py); + let tmp = tempfile::tempdir().expect("create temp dir"); + let script_path = tmp.path().join("script.py"); + let script = format!("{PRELUDE}\n{body}"); + std::fs::write(&script_path, &script).expect("write script"); + { + let _guard = ScopedTracer::new(&mut tracer); + LAST_OUTCOME.with(|cell| cell.set(None)); + let run_code = format!( + "import runpy\nrunpy.run_path(r\"{}\")", + script_path.display() + ); + let run_code_c = CString::new(run_code).expect("script contains nul byte"); + py.run(run_code_c.as_c_str(), None, None) + .expect("execute test script"); + } + collect_snapshots(&tracer.writer.events) + }) + } + + fn assert_var(snapshot: &Snapshot, name: &str, expected: SimpleValue) { + let actual = snapshot + .vars + .get(name) + .unwrap_or_else(|| panic!("{name} missing at line {}", snapshot.line)); + assert_eq!( + actual, &expected, + "Unexpected value for {name} at line {}", + snapshot.line + ); + } + + fn find_snapshot_with_vars<'a>(snapshots: &'a [Snapshot], names: &[&str]) -> &'a Snapshot { + snapshots + .iter() + .find(|snap| names.iter().all(|n| snap.vars.contains_key(*n))) + .unwrap_or_else(|| panic!("No snapshot containing variables {:?}", names)) + } + + fn assert_no_variable(snapshots: &[Snapshot], name: &str) { + if snapshots.iter().any(|snap| snap.vars.contains_key(name)) { + panic!("Variable {name} unexpectedly captured"); + } + } + + #[test] + fn captures_simple_function_locals() { + let snapshots = run_traced_script( + r#" +def simple_function(x): + snapshot() + a = 1 + snapshot() + b = a + x + snapshot() + return a, b + +simple_function(5) +"#, + ); + + assert_var(&snapshots[0], "x", SimpleValue::Int(5)); + assert!(!snapshots[0].vars.contains_key("a")); + assert_var(&snapshots[1], "a", SimpleValue::Int(1)); + assert_var(&snapshots[2], "b", SimpleValue::Int(6)); + } + + #[test] + fn captures_closure_variables() { + let snapshots = run_traced_script( + r#" +def outer_func(x): + snapshot() + y = 1 + snapshot() + def inner_func(z): + nonlocal y + snapshot() + w = x + y + z + snapshot() + y = w + snapshot() + return w + total = inner_func(5) + snapshot() + return y, total + +result = outer_func(2) +"#, + ); + + let inner_entry = find_snapshot_with_vars(&snapshots, &["x", "y", "z"]); + assert_var(inner_entry, "x", SimpleValue::Int(2)); + assert_var(inner_entry, "y", SimpleValue::Int(1)); + + let w_snapshot = find_snapshot_with_vars(&snapshots, &["w", "x", "y", "z"]); + assert_var(w_snapshot, "w", SimpleValue::Int(8)); + + let outer_after = find_snapshot_with_vars(&snapshots, &["total", "y"]); + assert_var(outer_after, "total", SimpleValue::Int(8)); + assert_var(outer_after, "y", SimpleValue::Int(8)); + } + + #[test] + fn captures_globals() { + let snapshots = run_traced_script( + r#" +GLOBAL_VAL = 10 +counter = 0 +snapshot() + +def global_test(): + snapshot() + local_copy = GLOBAL_VAL + snapshot() + global counter + counter += 1 + snapshot() + return local_copy, counter + +before = counter +snapshot() +result = global_test() +snapshot() +after = counter +snapshot() +"#, + ); + + let access_global = find_snapshot_with_vars(&snapshots, &["local_copy", "GLOBAL_VAL"]); + assert_var(access_global, "GLOBAL_VAL", SimpleValue::Int(10)); + assert_var(access_global, "local_copy", SimpleValue::Int(10)); + + let last_counter = snapshots + .iter() + .rev() + .find(|snap| snap.vars.contains_key("counter")) + .expect("Expected at least one counter snapshot"); + assert_var(last_counter, "counter", SimpleValue::Int(1)); + } + + #[test] + fn captures_class_scope() { + let snapshots = run_traced_script( + r#" +CONSTANT = 42 +snapshot() + +class MetaCounter(type): + count = 0 + snapshot() + def __init__(cls, name, bases, attrs): + snapshot() + MetaCounter.count += 1 + super().__init__(name, bases, attrs) + +class Sample(metaclass=MetaCounter): + snapshot() + a = 10 + snapshot() + b = a + 5 + snapshot() + print(a, b, CONSTANT) + snapshot() + def method(self): + snapshot() + return self.a + self.b + +instance = Sample() +snapshot() +instances = MetaCounter.count +snapshot() +_ = instance.method() +snapshot() +"#, + ); + + let meta_init = find_snapshot_with_vars(&snapshots, &["cls", "name", "attrs"]); + assert_var(meta_init, "name", SimpleValue::String("Sample".to_string())); + + let class_body = find_snapshot_with_vars(&snapshots, &["a", "b"]); + assert_var(class_body, "a", SimpleValue::Int(10)); + assert_var(class_body, "b", SimpleValue::Int(15)); + + let method_snapshot = find_snapshot_with_vars(&snapshots, &["self"]); + assert!(method_snapshot.vars.contains_key("self")); + } + + #[test] + fn captures_lambda_and_comprehensions() { + let snapshots = run_traced_script( + r#" +factor = 2 +snapshot() +double = lambda y: snap(y * factor) +snapshot() +lambda_value = double(5) +snapshot() +squares = [snap(n ** 2) for n in range(3)] +snapshot() +scaled_set = {snap(n * factor) for n in range(3)} +snapshot() +mapping = {n: snap(n * factor) for n in range(3)} +snapshot() +gen_exp = (snap(n * factor) for n in range(3)) +snapshot() +result_list = list(gen_exp) +snapshot() +"#, + ); + + let lambda_snapshot = find_snapshot_with_vars(&snapshots, &["y", "factor"]); + assert_var(lambda_snapshot, "y", SimpleValue::Int(5)); + assert_var(lambda_snapshot, "factor", SimpleValue::Int(2)); + + let list_comp = find_snapshot_with_vars(&snapshots, &["n", "factor"]); + assert!(matches!(list_comp.vars.get("n"), Some(SimpleValue::Int(_)))); + + let result_snapshot = find_snapshot_with_vars(&snapshots, &["result_list"]); + assert!(matches!( + result_snapshot.vars.get("result_list"), + Some(SimpleValue::Sequence(_)) + )); + } + + #[test] + fn captures_generators_and_coroutines() { + let snapshots = run_traced_script( + r#" +import asyncio +snapshot() + + +def counter_gen(n): + snapshot() + total = 0 + for i in range(n): + total += i + snapshot() + yield total + snapshot() + return total + +async def async_sum(data): + snapshot() + total = 0 + for x in data: + total += x + snapshot() + await asyncio.sleep(0) + snapshot() + return total + +gen = counter_gen(3) +gen_results = list(gen) +snapshot() +coroutine_result = asyncio.run(async_sum([1, 2, 3])) +snapshot() +"#, + ); + + let generator_step = find_snapshot_with_vars(&snapshots, &["i", "total"]); + assert!(matches!( + generator_step.vars.get("i"), + Some(SimpleValue::Int(_)) + )); + + let coroutine_steps: Vec<&Snapshot> = snapshots + .iter() + .filter(|snap| snap.vars.contains_key("x")) + .collect(); + assert!(!coroutine_steps.is_empty()); + let final_coroutine_step = coroutine_steps.last().unwrap(); + assert_var(final_coroutine_step, "total", SimpleValue::Int(6)); + + let coroutine_result_snapshot = find_snapshot_with_vars(&snapshots, &["coroutine_result"]); + assert!(coroutine_result_snapshot + .vars + .contains_key("coroutine_result")); + } + + #[test] + fn captures_exception_and_with_blocks() { + let snapshots = run_traced_script( + r#" +import io +__file__ = "test_script.py" + +def exception_and_with_demo(x): + snapshot() + try: + inv = 10 / x + snapshot() + except ZeroDivisionError as e: + snapshot() + error_msg = f"Error: {e}" + snapshot() + else: + snapshot() + inv += 1 + snapshot() + finally: + snapshot() + final_flag = True + snapshot() + with io.StringIO("dummy line") as f: + snapshot() + first_line = f.readline() + snapshot() + snapshot() + return locals() + +result1 = exception_and_with_demo(0) +snapshot() +result2 = exception_and_with_demo(5) +snapshot() +"#, + ); + + let except_snapshot = find_snapshot_with_vars(&snapshots, &["e", "error_msg"]); + assert!(matches!( + except_snapshot.vars.get("error_msg"), + Some(SimpleValue::String(_)) + )); + + let finally_snapshot = find_snapshot_with_vars(&snapshots, &["final_flag"]); + assert_var(finally_snapshot, "final_flag", SimpleValue::Bool(true)); + + let with_snapshot = find_snapshot_with_vars(&snapshots, &["f", "first_line"]); + assert!(with_snapshot.vars.contains_key("first_line")); + } + + #[test] + fn captures_decorators() { + let snapshots = run_traced_script( + r#" +setting = "Hello" +snapshot() + + +def my_decorator(func): + snapshot() + def wrapper(*args, **kwargs): + snapshot() + return func(*args, **kwargs) + return wrapper + +@my_decorator +def greet(name): + snapshot() + message = f"Hi, {name}" + snapshot() + return message + +output = greet("World") +snapshot() +"#, + ); + + let decorator_snapshot = find_snapshot_with_vars(&snapshots, &["func", "setting"]); + assert!(decorator_snapshot.vars.contains_key("func")); + + let wrapper_snapshot = find_snapshot_with_vars(&snapshots, &["args", "kwargs", "setting"]); + assert!(wrapper_snapshot.vars.contains_key("args")); + + let greet_snapshot = find_snapshot_with_vars(&snapshots, &["name", "message"]); + assert_var( + greet_snapshot, + "name", + SimpleValue::String("World".to_string()), + ); + } + + #[test] + fn captures_dynamic_execution() { + let snapshots = run_traced_script( + r#" +expr_code = "dynamic_var = 99" +snapshot() +exec(expr_code) +snapshot() +check = dynamic_var + 1 +snapshot() + +def eval_test(): + snapshot() + value = 10 + formula = "value * 2" + snapshot() + result = eval(formula) + snapshot() + return result + +out = eval_test() +snapshot() +"#, + ); + + let exec_snapshot = find_snapshot_with_vars(&snapshots, &["dynamic_var"]); + assert_var(exec_snapshot, "dynamic_var", SimpleValue::Int(99)); + + let eval_snapshot = find_snapshot_with_vars(&snapshots, &["value", "formula"]); + assert_var(eval_snapshot, "value", SimpleValue::Int(10)); + } + + #[test] + fn captures_imports() { + let snapshots = run_traced_script( + r#" +import math +snapshot() + +def import_test(): + snapshot() + import os + snapshot() + constant = math.pi + snapshot() + cwd = os.getcwd() + snapshot() + return constant, cwd + +val, path = import_test() +snapshot() +"#, + ); + + let global_import = find_snapshot_with_vars(&snapshots, &["math"]); + assert!(matches!( + global_import.vars.get("math"), + Some(SimpleValue::Raw(_)) + )); + + let local_import = find_snapshot_with_vars(&snapshots, &["os", "constant"]); + assert!(local_import.vars.contains_key("os")); + } + + #[test] + fn builtins_not_recorded() { + let snapshots = run_traced_script( + r#" +def builtins_test(seq): + snapshot() + n = len(seq) + snapshot() + m = max(seq) + snapshot() + return n, m + +result = builtins_test([5, 3, 7]) +snapshot() +"#, + ); + + let len_snapshot = find_snapshot_with_vars(&snapshots, &["n"]); + assert_var(len_snapshot, "n", SimpleValue::Int(3)); + assert_no_variable(&snapshots, "len"); + } } diff --git a/codetracer-python-recorder/src/tracer.rs b/codetracer-python-recorder/src/tracer.rs index 51aef83..f3c3080 100644 --- a/codetracer-python-recorder/src/tracer.rs +++ b/codetracer-python-recorder/src/tracer.rs @@ -1,11 +1,11 @@ -use std::any::Any; -use std::sync::{Mutex, OnceLock}; +use crate::code_object::{CodeObjectRegistry, CodeObjectWrapper}; use pyo3::{ exceptions::PyRuntimeError, prelude::*, types::{PyAny, PyCFunction, PyCode, PyModule}, }; -use crate::code_object::{CodeObjectWrapper, CodeObjectRegistry}; +use std::any::Any; +use std::sync::{Mutex, OnceLock}; const MONITORING_TOOL_NAME: &str = "codetracer"; @@ -47,6 +47,19 @@ pub struct EventSet(pub i32); pub const NO_EVENTS: EventSet = EventSet(0); +/// Outcome returned by tracer callbacks to control CPython monitoring. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum CallbackOutcome { + /// Continue receiving events for the current location. + Continue, + /// Disable future events for the current location by returning + /// `sys.monitoring.DISABLE`. + DisableLocation, +} + +/// Result type shared by tracer callbacks. +pub type CallbackResult = PyResult; + impl EventSet { pub const fn empty() -> Self { NO_EVENTS @@ -136,16 +149,25 @@ pub fn free_tool_id(py: Python<'_>, tool: &ToolId) -> PyResult<()> { Ok(()) } - /// Trait implemented by tracing backends. /// /// Each method corresponds to an event from `sys.monitoring`. Default /// implementations allow implementers to only handle the events they care /// about. +/// +/// Every callback returns a `CallbackResult` so implementations can propagate +/// Python exceptions or request that CPython disables future events for a +/// location by yielding the `CallbackOutcome::DisableLocation` sentinel. pub trait Tracer: Send + Any { /// Downcast support for implementations that need to be accessed /// behind a `Box` (e.g., for flushing/finishing). - fn as_any(&mut self) -> &mut dyn Any where Self: 'static, Self: Sized { self } + fn as_any(&mut self) -> &mut dyn Any + where + Self: 'static, + Self: Sized, + { + self + } /// Return the set of events the tracer wants to receive. fn interest(&self, _events: &MonitoringEvents) -> EventSet { @@ -160,14 +182,29 @@ pub trait Tracer: Send + Any { _offset: i32, _callable: &Bound<'_, PyAny>, _arg0: Option<&Bound<'_, PyAny>>, - ) { + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) } /// Called on line execution. - fn on_line(&mut self, _py: Python<'_>, _code: &CodeObjectWrapper, _lineno: u32) {} + fn on_line( + &mut self, + _py: Python<'_>, + _code: &CodeObjectWrapper, + _lineno: u32, + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) + } /// Called when an instruction is about to be executed (by offset). - fn on_instruction(&mut self, _py: Python<'_>, _code: &CodeObjectWrapper, _offset: i32) {} + fn on_instruction( + &mut self, + _py: Python<'_>, + _code: &CodeObjectWrapper, + _offset: i32, + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) + } /// Called when a jump in the control flow graph is made. fn on_jump( @@ -176,7 +213,8 @@ pub trait Tracer: Send + Any { _code: &CodeObjectWrapper, _offset: i32, _destination_offset: i32, - ) { + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) } /// Called when a conditional branch is considered. @@ -186,7 +224,8 @@ pub trait Tracer: Send + Any { _code: &CodeObjectWrapper, _offset: i32, _destination_offset: i32, - ) { + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) } /// Called at start of a Python function (frame on stack). @@ -194,10 +233,24 @@ pub trait Tracer: Send + Any { /// Implementations should fail fast on irrecoverable conditions /// (e.g., inability to access the current frame/locals) by /// returning an error. - fn on_py_start(&mut self, _py: Python<'_>, _code: &CodeObjectWrapper, _offset: i32) -> PyResult<()> { Ok(()) } + fn on_py_start( + &mut self, + _py: Python<'_>, + _code: &CodeObjectWrapper, + _offset: i32, + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) + } /// Called on resumption of a generator/coroutine (not via throw()). - fn on_py_resume(&mut self, _py: Python<'_>, _code: &CodeObjectWrapper, _offset: i32) {} + fn on_py_resume( + &mut self, + _py: Python<'_>, + _code: &CodeObjectWrapper, + _offset: i32, + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) + } /// Called immediately before a Python function returns. fn on_py_return( @@ -206,7 +259,8 @@ pub trait Tracer: Send + Any { _code: &CodeObjectWrapper, _offset: i32, _retval: &Bound<'_, PyAny>, - ) { + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) } /// Called immediately before a Python function yields. @@ -216,7 +270,8 @@ pub trait Tracer: Send + Any { _code: &CodeObjectWrapper, _offset: i32, _retval: &Bound<'_, PyAny>, - ) { + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) } /// Called when a Python function is resumed by throw(). @@ -226,7 +281,8 @@ pub trait Tracer: Send + Any { _code: &CodeObjectWrapper, _offset: i32, _exception: &Bound<'_, PyAny>, - ) { + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) } /// Called when exiting a Python function during exception unwinding. @@ -236,7 +292,8 @@ pub trait Tracer: Send + Any { _code: &CodeObjectWrapper, _offset: i32, _exception: &Bound<'_, PyAny>, - ) { + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) } /// Called when an exception is raised (excluding STOP_ITERATION). @@ -246,7 +303,8 @@ pub trait Tracer: Send + Any { _code: &CodeObjectWrapper, _offset: i32, _exception: &Bound<'_, PyAny>, - ) { + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) } /// Called when an exception is re-raised. @@ -256,7 +314,8 @@ pub trait Tracer: Send + Any { _code: &CodeObjectWrapper, _offset: i32, _exception: &Bound<'_, PyAny>, - ) { + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) } /// Called when an exception is handled. @@ -266,7 +325,8 @@ pub trait Tracer: Send + Any { _code: &CodeObjectWrapper, _offset: i32, _exception: &Bound<'_, PyAny>, - ) { + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) } /// Called when an artificial StopIteration is raised. @@ -291,7 +351,8 @@ pub trait Tracer: Send + Any { _offset: i32, _callable: &Bound<'_, PyAny>, _arg0: Option<&Bound<'_, PyAny>>, - ) { + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) } /// Called when an exception is raised from any non-Python callable. @@ -302,14 +363,19 @@ pub trait Tracer: Send + Any { _offset: i32, _callable: &Bound<'_, PyAny>, _arg0: Option<&Bound<'_, PyAny>>, - ) { + ) -> CallbackResult { + Ok(CallbackOutcome::Continue) } /// Flush any buffered state to storage. Default is a no-op. - fn flush(&mut self, _py: Python<'_>) -> PyResult<()> { Ok(()) } + fn flush(&mut self, _py: Python<'_>) -> PyResult<()> { + Ok(()) + } /// Finish and close any underlying writers. Default is a no-op. - fn finish(&mut self, _py: Python<'_>) -> PyResult<()> { Ok(()) } + fn finish(&mut self, _py: Python<'_>) -> PyResult<()> { + Ok(()) + } } struct Global { @@ -317,10 +383,20 @@ struct Global { tracer: Box, mask: EventSet, tool: ToolId, + disable_sentinel: Py, } static GLOBAL: Mutex> = Mutex::new(None); +impl Global { + fn handle_callback(&self, py: Python<'_>, result: CallbackResult) -> PyResult> { + match result? { + CallbackOutcome::Continue => Ok(py.None()), + CallbackOutcome::DisableLocation => Ok(self.disable_sentinel.clone_ref(py)), + } + } +} + /// Install a tracer and hook it into Python's `sys.monitoring`. pub fn install_tracer(py: Python<'_>, tracer: Box) -> PyResult<()> { let mut guard = GLOBAL.lock().unwrap(); @@ -330,6 +406,8 @@ pub fn install_tracer(py: Python<'_>, tracer: Box) -> PyResult<()> { let tool = acquire_tool_id(py)?; let events = monitoring_events(py)?; + let monitoring = py.import("sys")?.getattr("monitoring")?; + let disable_sentinel = monitoring.getattr("DISABLE")?.unbind(); let module = PyModule::new(py, "_codetracer_callbacks")?; @@ -412,6 +490,7 @@ pub fn install_tracer(py: Python<'_>, tracer: Box) -> PyResult<()> { tracer, mask, tool, + disable_sentinel, }); Ok(()) } @@ -466,7 +545,7 @@ pub fn uninstall_tracer(py: Python<'_>) -> PyResult<()> { if global.mask.contains(&events.EXCEPTION_HANDLED) { register_callback(py, &global.tool, &events.EXCEPTION_HANDLED, None)?; } - // See comment in tracer trait + // See comment in tracer trait // if global.mask.contains(&events.STOP_ITERATION) { // register_callback(py, &global.tool, &events.STOP_ITERATION, None)?; // } @@ -499,30 +578,41 @@ fn callback_call( offset: i32, callable: Bound<'_, PyAny>, arg0: Option>, -) -> PyResult<()> { +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global.tracer.on_call(py, &wrapper, offset, &callable, arg0.as_ref()); + let result = global + .tracer + .on_call(py, &wrapper, offset, &callable, arg0.as_ref()); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } #[pyfunction] -fn callback_line(py: Python<'_>, code: Bound<'_, PyCode>, lineno: u32) -> PyResult<()> { +fn callback_line(py: Python<'_>, code: Bound<'_, PyCode>, lineno: u32) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global.tracer.on_line(py, &wrapper, lineno); + let result = global.tracer.on_line(py, &wrapper, lineno); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } #[pyfunction] -fn callback_instruction(py: Python<'_>, code: Bound<'_, PyCode>, instruction_offset: i32) -> PyResult<()> { +fn callback_instruction( + py: Python<'_>, + code: Bound<'_, PyCode>, + instruction_offset: i32, +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global.tracer.on_instruction(py, &wrapper, instruction_offset); + let result = global + .tracer + .on_instruction(py, &wrapper, instruction_offset); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } #[pyfunction] @@ -531,14 +621,15 @@ fn callback_jump( code: Bound<'_, PyCode>, instruction_offset: i32, destination_offset: i32, -) -> PyResult<()> { +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global + let result = global .tracer .on_jump(py, &wrapper, instruction_offset, destination_offset); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } #[pyfunction] @@ -547,42 +638,50 @@ fn callback_branch( code: Bound<'_, PyCode>, instruction_offset: i32, destination_offset: i32, -) -> PyResult<()> { +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global + let result = global .tracer .on_branch(py, &wrapper, instruction_offset, destination_offset); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } #[pyfunction] -fn callback_py_start(py: Python<'_>, code: Bound<'_, PyCode>, instruction_offset: i32) -> PyResult<()> { +fn callback_py_start( + py: Python<'_>, + code: Bound<'_, PyCode>, + instruction_offset: i32, +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - match global.tracer.on_py_start(py, &wrapper, instruction_offset) { - Ok(()) => Ok(()), + let result = global.tracer.on_py_start(py, &wrapper, instruction_offset); + return match result { + Ok(outcome) => global.handle_callback(py, Ok(outcome)), Err(err) => { - // Disable further monitoring immediately on first callback error. - // Soft-stop within this lock to avoid deadlocking on GLOBAL. let _ = set_events(py, &global.tool, NO_EVENTS); log::error!("Event monitoring turned off due to exception. No new events will be recorded! {}", err); Err(err) } - } - } else { - Ok(()) + }; } + Ok(py.None()) } #[pyfunction] -fn callback_py_resume(py: Python<'_>, code: Bound<'_, PyCode>, instruction_offset: i32) -> PyResult<()> { +fn callback_py_resume( + py: Python<'_>, + code: Bound<'_, PyCode>, + instruction_offset: i32, +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global.tracer.on_py_resume(py, &wrapper, instruction_offset); + let result = global.tracer.on_py_resume(py, &wrapper, instruction_offset); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } #[pyfunction] @@ -591,12 +690,15 @@ fn callback_py_return( code: Bound<'_, PyCode>, instruction_offset: i32, retval: Bound<'_, PyAny>, -) -> PyResult<()> { +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global.tracer.on_py_return(py, &wrapper, instruction_offset, &retval); + let result = global + .tracer + .on_py_return(py, &wrapper, instruction_offset, &retval); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } #[pyfunction] @@ -605,12 +707,15 @@ fn callback_py_yield( code: Bound<'_, PyCode>, instruction_offset: i32, retval: Bound<'_, PyAny>, -) -> PyResult<()> { +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global.tracer.on_py_yield(py, &wrapper, instruction_offset, &retval); + let result = global + .tracer + .on_py_yield(py, &wrapper, instruction_offset, &retval); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } #[pyfunction] @@ -619,12 +724,15 @@ fn callback_py_throw( code: Bound<'_, PyCode>, instruction_offset: i32, exception: Bound<'_, PyAny>, -) -> PyResult<()> { +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global.tracer.on_py_throw(py, &wrapper, instruction_offset, &exception); + let result = global + .tracer + .on_py_throw(py, &wrapper, instruction_offset, &exception); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } #[pyfunction] @@ -633,12 +741,15 @@ fn callback_py_unwind( code: Bound<'_, PyCode>, instruction_offset: i32, exception: Bound<'_, PyAny>, -) -> PyResult<()> { +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global.tracer.on_py_unwind(py, &wrapper, instruction_offset, &exception); + let result = global + .tracer + .on_py_unwind(py, &wrapper, instruction_offset, &exception); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } #[pyfunction] @@ -647,12 +758,15 @@ fn callback_raise( code: Bound<'_, PyCode>, instruction_offset: i32, exception: Bound<'_, PyAny>, -) -> PyResult<()> { +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global.tracer.on_raise(py, &wrapper, instruction_offset, &exception); + let result = global + .tracer + .on_raise(py, &wrapper, instruction_offset, &exception); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } #[pyfunction] @@ -661,12 +775,15 @@ fn callback_reraise( code: Bound<'_, PyCode>, instruction_offset: i32, exception: Bound<'_, PyAny>, -) -> PyResult<()> { +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global.tracer.on_reraise(py, &wrapper, instruction_offset, &exception); + let result = global + .tracer + .on_reraise(py, &wrapper, instruction_offset, &exception); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } #[pyfunction] @@ -675,14 +792,16 @@ fn callback_exception_handled( code: Bound<'_, PyCode>, instruction_offset: i32, exception: Bound<'_, PyAny>, -) -> PyResult<()> { +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global - .tracer - .on_exception_handled(py, &wrapper, instruction_offset, &exception); + let result = + global + .tracer + .on_exception_handled(py, &wrapper, instruction_offset, &exception); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } // See comment in Tracer trait @@ -708,14 +827,15 @@ fn callback_c_return( offset: i32, callable: Bound<'_, PyAny>, arg0: Option>, -) -> PyResult<()> { +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global + let result = global .tracer .on_c_return(py, &wrapper, offset, &callable, arg0.as_ref()); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } #[pyfunction] @@ -725,12 +845,13 @@ fn callback_c_raise( offset: i32, callable: Bound<'_, PyAny>, arg0: Option>, -) -> PyResult<()> { +) -> PyResult> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); - global + let result = global .tracer .on_c_raise(py, &wrapper, offset, &callable, arg0.as_ref()); + return global.handle_callback(py, result); } - Ok(()) + Ok(py.None()) } diff --git a/codetracer-python-recorder/tests/code_object_wrapper.rs b/codetracer-python-recorder/tests/code_object_wrapper.rs index 888f8d3..dc6dc5f 100644 --- a/codetracer-python-recorder/tests/code_object_wrapper.rs +++ b/codetracer-python-recorder/tests/code_object_wrapper.rs @@ -1,4 +1,4 @@ -use codetracer_python_recorder::code_object::{CodeObjectWrapper, CodeObjectRegistry}; +use codetracer_python_recorder::code_object::{CodeObjectRegistry, CodeObjectWrapper}; use pyo3::prelude::*; use pyo3::types::{PyCode, PyModule}; use std::ffi::CString; @@ -9,13 +9,10 @@ fn wrapper_basic_attributes() { let src = CString::new("def f(x):\n return x + 1\n").unwrap(); let filename = CString::new("").unwrap(); let module = CString::new("m").unwrap(); - let m = PyModule::from_code(py, src.as_c_str(), filename.as_c_str(), module.as_c_str()).unwrap(); - let func = m.getattr("f").unwrap(); - let code: Bound<'_, PyCode> = func - .getattr("__code__") - .unwrap() - .downcast_into() + let m = PyModule::from_code(py, src.as_c_str(), filename.as_c_str(), module.as_c_str()) .unwrap(); + let func = m.getattr("f").unwrap(); + let code: Bound<'_, PyCode> = func.getattr("__code__").unwrap().downcast_into().unwrap(); let wrapper = CodeObjectWrapper::new(py, &code); assert_eq!(wrapper.arg_count(py).unwrap(), 1); assert_eq!(wrapper.filename(py).unwrap(), ""); @@ -30,13 +27,10 @@ fn wrapper_line_for_offset() { let src = CString::new("def g():\n a = 1\n b = 2\n return a + b\n").unwrap(); let filename = CString::new("").unwrap(); let module = CString::new("m2").unwrap(); - let m = PyModule::from_code(py, src.as_c_str(), filename.as_c_str(), module.as_c_str()).unwrap(); - let func = m.getattr("g").unwrap(); - let code: Bound<'_, PyCode> = func - .getattr("__code__") - .unwrap() - .downcast_into() + let m = PyModule::from_code(py, src.as_c_str(), filename.as_c_str(), module.as_c_str()) .unwrap(); + let func = m.getattr("g").unwrap(); + let code: Bound<'_, PyCode> = func.getattr("__code__").unwrap().downcast_into().unwrap(); let wrapper = CodeObjectWrapper::new(py, &code); let lines = code.call_method0("co_lines").unwrap(); let iter = lines.try_iter().unwrap(); @@ -58,9 +52,8 @@ fn registry_reuses_wrappers() { let src = CString::new("def h():\n return 0\n").unwrap(); let filename = CString::new("").unwrap(); let module = CString::new("m3").unwrap(); - let m = - PyModule::from_code(py, src.as_c_str(), filename.as_c_str(), module.as_c_str()) - .unwrap(); + let m = PyModule::from_code(py, src.as_c_str(), filename.as_c_str(), module.as_c_str()) + .unwrap(); let func = m.getattr("h").unwrap(); let code: Bound<'_, PyCode> = func .getattr("__code__") diff --git a/codetracer-python-recorder/tests/print_tracer.rs b/codetracer-python-recorder/tests/print_tracer.rs index e1e6cd8..a36a690 100644 --- a/codetracer-python-recorder/tests/print_tracer.rs +++ b/codetracer-python-recorder/tests/print_tracer.rs @@ -1,5 +1,9 @@ -use codetracer_python_recorder::{install_tracer, uninstall_tracer, EventSet, Tracer, CodeObjectWrapper}; -use codetracer_python_recorder::tracer::{MonitoringEvents, events_union}; +use codetracer_python_recorder::tracer::{ + events_union, CallbackOutcome, CallbackResult, MonitoringEvents, +}; +use codetracer_python_recorder::{ + install_tracer, uninstall_tracer, CodeObjectWrapper, EventSet, Tracer, +}; use pyo3::prelude::*; use std::ffi::CString; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -20,8 +24,9 @@ impl Tracer for PrintTracer { _offset: i32, _callable: &Bound<'_, PyAny>, _arg0: Option<&Bound<'_, PyAny>>, - ) { + ) -> CallbackResult { CALL_COUNT.fetch_add(1, Ordering::SeqCst); + Ok(CallbackOutcome::Continue) } } @@ -35,7 +40,11 @@ fn tracer_prints_on_call() { py.run(code.as_c_str(), None, None).unwrap(); uninstall_tracer(py).unwrap(); let count = CALL_COUNT.load(Ordering::SeqCst); - assert!(count >= 1, "expected at least one CALL event, got {}", count); + assert!( + count >= 1, + "expected at least one CALL event, got {}", + count + ); }); } @@ -81,94 +90,180 @@ impl Tracer for CountingTracer { ]) } - fn on_line(&mut self, _py: Python<'_>, _code: &CodeObjectWrapper, lineno: u32) { + fn on_line( + &mut self, + _py: Python<'_>, + _code: &CodeObjectWrapper, + lineno: u32, + ) -> CallbackResult { LINE_COUNT.fetch_add(1, Ordering::SeqCst); println!("LINE at {}", lineno); + Ok(CallbackOutcome::Continue) } - fn on_instruction(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32) { + fn on_instruction( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + ) -> CallbackResult { INSTRUCTION_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("INSTRUCTION at {}", line); } + Ok(CallbackOutcome::Continue) } - fn on_jump(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _dest: i32) { + fn on_jump( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + _dest: i32, + ) -> CallbackResult { JUMP_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("JUMP at {}", line); } + Ok(CallbackOutcome::Continue) } - fn on_branch(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _dest: i32) { + fn on_branch( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + _dest: i32, + ) -> CallbackResult { BRANCH_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("BRANCH at {}", line); } + Ok(CallbackOutcome::Continue) } - fn on_py_start(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32) -> PyResult<()> { + fn on_py_start( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + ) -> CallbackResult { PY_START_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("PY_START at {}", line); } - Ok(()) + Ok(CallbackOutcome::Continue) } - fn on_py_resume(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32) { + fn on_py_resume( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + ) -> CallbackResult { PY_RESUME_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("PY_RESUME at {}", line); } + Ok(CallbackOutcome::Continue) } - fn on_py_return(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _retval: &Bound<'_, PyAny>) { + fn on_py_return( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + _retval: &Bound<'_, PyAny>, + ) -> CallbackResult { PY_RETURN_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("PY_RETURN at {}", line); } + Ok(CallbackOutcome::Continue) } - fn on_py_yield(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _retval: &Bound<'_, PyAny>) { + fn on_py_yield( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + _retval: &Bound<'_, PyAny>, + ) -> CallbackResult { PY_YIELD_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("PY_YIELD at {}", line); } + Ok(CallbackOutcome::Continue) } - fn on_py_throw(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _exc: &Bound<'_, PyAny>) { + fn on_py_throw( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + _exc: &Bound<'_, PyAny>, + ) -> CallbackResult { PY_THROW_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("PY_THROW at {}", line); } + Ok(CallbackOutcome::Continue) } - fn on_py_unwind(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _exc: &Bound<'_, PyAny>) { + fn on_py_unwind( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + _exc: &Bound<'_, PyAny>, + ) -> CallbackResult { PY_UNWIND_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("PY_UNWIND at {}", line); } + Ok(CallbackOutcome::Continue) } - fn on_raise(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _exc: &Bound<'_, PyAny>) { + fn on_raise( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + _exc: &Bound<'_, PyAny>, + ) -> CallbackResult { RAISE_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("RAISE at {}", line); } + Ok(CallbackOutcome::Continue) } - fn on_reraise(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _exc: &Bound<'_, PyAny>) { + fn on_reraise( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + _exc: &Bound<'_, PyAny>, + ) -> CallbackResult { RERAISE_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("RERAISE at {}", line); } + Ok(CallbackOutcome::Continue) } - fn on_exception_handled(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _exc: &Bound<'_, PyAny>) { + fn on_exception_handled( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + _exc: &Bound<'_, PyAny>, + ) -> CallbackResult { EXCEPTION_HANDLED_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("EXCEPTION_HANDLED at {}", line); } + Ok(CallbackOutcome::Continue) } // fn on_stop_iteration( @@ -184,18 +279,34 @@ impl Tracer for CountingTracer { // } // } - fn on_c_return(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _call: &Bound<'_, PyAny>, _arg0: Option<&Bound<'_, PyAny>>) { + fn on_c_return( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + _call: &Bound<'_, PyAny>, + _arg0: Option<&Bound<'_, PyAny>>, + ) -> CallbackResult { C_RETURN_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("C_RETURN at {}", line); } + Ok(CallbackOutcome::Continue) } - fn on_c_raise(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _call: &Bound<'_, PyAny>, _arg0: Option<&Bound<'_, PyAny>>) { + fn on_c_raise( + &mut self, + py: Python<'_>, + code: &CodeObjectWrapper, + offset: i32, + _call: &Bound<'_, PyAny>, + _arg0: Option<&Bound<'_, PyAny>>, + ) -> CallbackResult { C_RAISE_COUNT.fetch_add(1, Ordering::SeqCst); if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("C_RAISE at {}", line); } + Ok(CallbackOutcome::Continue) } } @@ -222,7 +333,8 @@ fn tracer_handles_all_events() { e.print(py); panic!("Install Tracer failed"); } - let code = CString::new(r#" + let code = CString::new( + r#" def test_all(): x = 0 if x == 0: @@ -280,28 +392,90 @@ def only_stop_iter(): yield for _ in only_stop_iter(): pass -"#).expect("CString::new failed"); +"#, + ) + .expect("CString::new failed"); if let Err(e) = py.run(code.as_c_str(), None, None) { e.print(py); uninstall_tracer(py).ok(); panic!("Python raised an exception"); } uninstall_tracer(py).unwrap(); - assert!(LINE_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one LINE event, got {}", LINE_COUNT.load(Ordering::SeqCst)); - assert!(INSTRUCTION_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one INSTRUCTION event, got {}", INSTRUCTION_COUNT.load(Ordering::SeqCst)); - assert!(JUMP_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one JUMP event, got {}", JUMP_COUNT.load(Ordering::SeqCst)); - assert!(BRANCH_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one BRANCH event, got {}", BRANCH_COUNT.load(Ordering::SeqCst)); - assert!(PY_START_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one PY_START event, got {}", PY_START_COUNT.load(Ordering::SeqCst)); - assert!(PY_RESUME_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one PY_RESUME event, got {}", PY_RESUME_COUNT.load(Ordering::SeqCst)); - assert!(PY_RETURN_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one PY_RETURN event, got {}", PY_RETURN_COUNT.load(Ordering::SeqCst)); - assert!(PY_YIELD_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one PY_YIELD event, got {}", PY_YIELD_COUNT.load(Ordering::SeqCst)); - assert!(PY_THROW_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one PY_THROW event, got {}", PY_THROW_COUNT.load(Ordering::SeqCst)); - assert!(PY_UNWIND_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one PY_UNWIND event, got {}", PY_UNWIND_COUNT.load(Ordering::SeqCst)); - assert!(RAISE_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one RAISE event, got {}", RAISE_COUNT.load(Ordering::SeqCst)); - assert!(RERAISE_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one RERAISE event, got {}", RERAISE_COUNT.load(Ordering::SeqCst)); - assert!(EXCEPTION_HANDLED_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one EXCEPTION_HANDLED event, got {}", EXCEPTION_HANDLED_COUNT.load(Ordering::SeqCst)); + assert!( + LINE_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one LINE event, got {}", + LINE_COUNT.load(Ordering::SeqCst) + ); + assert!( + INSTRUCTION_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one INSTRUCTION event, got {}", + INSTRUCTION_COUNT.load(Ordering::SeqCst) + ); + assert!( + JUMP_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one JUMP event, got {}", + JUMP_COUNT.load(Ordering::SeqCst) + ); + assert!( + BRANCH_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one BRANCH event, got {}", + BRANCH_COUNT.load(Ordering::SeqCst) + ); + assert!( + PY_START_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one PY_START event, got {}", + PY_START_COUNT.load(Ordering::SeqCst) + ); + assert!( + PY_RESUME_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one PY_RESUME event, got {}", + PY_RESUME_COUNT.load(Ordering::SeqCst) + ); + assert!( + PY_RETURN_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one PY_RETURN event, got {}", + PY_RETURN_COUNT.load(Ordering::SeqCst) + ); + assert!( + PY_YIELD_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one PY_YIELD event, got {}", + PY_YIELD_COUNT.load(Ordering::SeqCst) + ); + assert!( + PY_THROW_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one PY_THROW event, got {}", + PY_THROW_COUNT.load(Ordering::SeqCst) + ); + assert!( + PY_UNWIND_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one PY_UNWIND event, got {}", + PY_UNWIND_COUNT.load(Ordering::SeqCst) + ); + assert!( + RAISE_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one RAISE event, got {}", + RAISE_COUNT.load(Ordering::SeqCst) + ); + assert!( + RERAISE_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one RERAISE event, got {}", + RERAISE_COUNT.load(Ordering::SeqCst) + ); + assert!( + EXCEPTION_HANDLED_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one EXCEPTION_HANDLED event, got {}", + EXCEPTION_HANDLED_COUNT.load(Ordering::SeqCst) + ); // assert!(STOP_ITERATION_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one STOP_ITERATION event, got {}", STOP_ITERATION_COUNT.load(Ordering::SeqCst)); //Issue - assert!(C_RETURN_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one C_RETURN event, got {}", C_RETURN_COUNT.load(Ordering::SeqCst)); - assert!(C_RAISE_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one C_RAISE event, got {}", C_RAISE_COUNT.load(Ordering::SeqCst)); + assert!( + C_RETURN_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one C_RETURN event, got {}", + C_RETURN_COUNT.load(Ordering::SeqCst) + ); + assert!( + C_RAISE_COUNT.load(Ordering::SeqCst) >= 1, + "expected at least one C_RAISE event, got {}", + C_RAISE_COUNT.load(Ordering::SeqCst) + ); }); } diff --git a/codetracer-python-recorder/tests/test_fail_fast_on_py_start.py b/codetracer-python-recorder/tests/test_fail_fast_on_py_start.py deleted file mode 100644 index 8af3109..0000000 --- a/codetracer-python-recorder/tests/test_fail_fast_on_py_start.py +++ /dev/null @@ -1,50 +0,0 @@ -import runpy -import sys -from pathlib import Path - -import pytest - - -def test_fail_fast_when_frame_access_fails(tmp_path: Path): - # Import the built extension module - import codetracer_python_recorder.codetracer_python_recorder as cpr - - # Prepare a simple program that triggers a Python function call - prog = tmp_path / "prog.py" - prog.write_text( - """ -def f(): - return 1 - -f() -""" - ) - - # Monkeypatch sys._getframe to simulate a failure when capturing args - original_getframe = getattr(sys, "_getframe") - - def boom(*_args, **_kwargs): # pragma: no cover - intentionally fails - raise RuntimeError("boom: _getframe disabled") - - sys._getframe = boom # type: ignore[attr-defined] - - try: - # Start tracing; activate only for our program path so stray imports don't trigger - cpr.start_tracing(str(tmp_path), "json", activation_path=str(prog)) - - with pytest.raises(RuntimeError) as excinfo: - runpy.run_path(str(prog), run_name="__main__") - - # Ensure the error surfaced clearly and didn’t get swallowed - assert "_getframe" in str(excinfo.value) or "boom" in str(excinfo.value) - - # After the first failure, tracing must be disabled so - # subsequent Python function calls do not trigger the same error. - # Re-running the same program path should no longer raise. - ns = runpy.run_path(str(prog), run_name="__main__") - assert isinstance(ns, dict) - finally: - # Restore state - sys._getframe = original_getframe # type: ignore[attr-defined] - cpr.stop_tracing() - diff --git a/design-docs/only-real-filenames.md b/design-docs/only-real-filenames.md new file mode 100644 index 0000000..75d3b72 --- /dev/null +++ b/design-docs/only-real-filenames.md @@ -0,0 +1,135 @@ +In Python monitoring sometimes the co_filename of a code object +doesn't point to a real file, but something else. Those filenames look +like `<...>`. + +Lines from those files cannot be traced. For this reason, we should +skip them for all monitoring events. + +sys.monitoring provides the capability to turn off monitoring for +specific lines by having the callback return a special value +`sys.monitoring.DISABLE`. We want to use this functionality to disable +monitoring of those lines and improve performance. + +The following changes need to be made: + +1. Extend the `Tracer` trait so every callback can signal back a + `sys.monitoring` action (continue or disable). Update all existing + implementations and tests to use the new return type. +2. Add reusable logic that decides whether a given code object refers + to a real on-disk file and cache the decision per `co_filename` / + code id. +3. Invoke the new filtering logic from every `RuntimeTracer` callback + before any expensive work. When a code object should be ignored, + skip our bookkeeping and return the disable sentinel to CPython so + further events from that location stop firing. + +Note: We cannot import `sys.monitoring` inside the hot callbacks, +because in some embedded runtimes importing during tracing is either +prohibited or will deadlock. We must therefore cache the +`sys.monitoring.DISABLE` sentinel ahead of time while we are still in a +safe context (e.g., during tracer installation). + +We need to make sure that our test suite has comprehensive tests that +prove the new filtering/disable behaviour and cover regressions on the +public tracer API. + +# Technical design solutions + +## Tracer callback return values + +- Introduce a new enum `CallbackOutcome` in `src/tracer.rs` with two + variants: `Continue` (default) and `DisableLocation`. +- Define a `type CallbackResult = PyResult` so every + trait method can surface Python errors and signal whether the + location must be disabled. `Continue` replaces the current implicit + unit return. +- Update the `Tracer` trait so all callbacks return `CallbackResult`. + Default implementations continue to return `Ok(CallbackOutcome::Continue)` + so existing tracers only need minimal changes. +- The PyO3 callback shims (`callback_line`, `callback_py_start`, etc.) + will translate `CallbackOutcome::DisableLocation` into the cached + Python sentinel and otherwise return `None`. This keeps the Python + side compliant with `sys.monitoring` semantics + (see https://docs.python.org/3/library/sys.monitoring.html#sys.monitoring.DISABLE). + +## Accessing `sys.monitoring.DISABLE` + +- During `install_tracer`, after we obtain `monitoring_events`, load + `sys.monitoring.DISABLE` once and store it in the global tracer state + (`Global` struct) as a `Py`. Because `Py` is `Send` + + `Sync`, it can be safely cached behind the global mutex and reused + inside callbacks without re-importing modules. +- Provide a helper on `Global` (e.g., `fn disable_sentinel<'py>(&self, + py: Python<'py>) -> Bound<'py, PyAny>`) that returns the bound object + when we need to hand the sentinel back to Python. +- Make sure `uninstall_tracer` drops the sentinel alongside other + state so a new install can reload it cleanly. + +## `RuntimeTracer` filename filtering + +- Add a dedicated method `fn should_trace_code(&mut self, + py: Python<'_>, code: &CodeObjectWrapper) -> ShouldTrace` returning a + new internal enum `{ Trace, SkipAndDisable }`. + - A file is considered “real” when `co_filename` does not match the + `<...>` pattern. For now we treat any filename that begins with `<` + and ends with `>` (after trimming whitespace) as synthetic. This + covers ``, ``, ``, etc. + - Cache negative decisions in a `HashSet` keyed by the code + object id so subsequent events avoid repeating the string checks. + The set is cleared on `flush()`/`finish()` if we reset state. +- Each public callback (`on_py_start`, `on_line`, `on_py_return`) will + call `should_trace_code` first. When the decision is `SkipAndDisable` + we: + - Return `CallbackOutcome::DisableLocation` immediately so CPython + stops sending events for that location. + - Avoid calling any of the expensive frame/value capture paths. +- When the decision allows tracing, we continue with the existing + behaviour. The activation-path logic runs before the filtering so a + deactivated tracer still ignores events regardless of filename. + +## Backwards compatibility and ergonomics + +- `RuntimeTracer` becomes the only tracer that returns + `DisableLocation`; other tracers keep returning `Continue`. +- Update the test helper tracers under `tests/` to use the new return + type but still assert on event counts; their filenames will remain + real so behaviour does not change. +- Document the change in the crate-level docs (`src/lib.rs`) to warn + downstream implementors that callbacks now return `CallbackResult`. + +# Test suite + +- Rust unit test for the pure filename predicate (e.g., + ``, ``, `script.py`) to prevent + regressions in the heuristic. +- Runtime tracer integration test that registers a `RuntimeTracer`, + executes code with a `` filename, and asserts that: + - No events are written to the trace writer. + - The corresponding callbacks return the disable sentinel (inspect + via a lightweight shim or mock writer). +- Complementary test that runs a real file (use `tempfile` to emit a + small script) and ensures events are still recorded. +- Regression tests for the updated trait: adjust `tests/print_tracer.rs` + counting tracer to assert it still receives events and that the + return value defaults to `Continue`. +- Add a smoke test checking we do not attempt to import + `sys.monitoring` inside callbacks by patching the module import hook + during a run. + +# Implementation Plan + +1. Introduce `CallbackOutcome`/`CallbackResult` in `src/tracer.rs` and + update every trait method signature plus the PyO3 callback shims. + Store the `sys.monitoring.DISABLE` sentinel in the `Global` state. +2. Propagate signature updates through existing tracers and tests, + ensuring they all return `CallbackOutcome::Continue`. +3. Extend `RuntimeTracer` with the filename filtering method, cached + skip set, and early-return logic that emits `DisableLocation` when + appropriate. +4. Update the runtime tracer callbacks (`on_py_start`, `on_line`, + `on_py_return`, and any other events we wire up later) to invoke the + filtering method first. +5. Expand the test suite with the new unit/integration coverage and + adjust existing tests to the trait changes. +6. Perform a final pass to document the new behaviour in public docs + and ensure formatting/lints pass. diff --git a/design-docs/value-capture.md b/design-docs/value-capture.md new file mode 100644 index 0000000..4523806 --- /dev/null +++ b/design-docs/value-capture.md @@ -0,0 +1,353 @@ +Implement full variable capture in codetracer-python-recorder. Add a +comprehensive test suite. Here is the spec for the task and the tests: + +# Python Tracing Recorder: Capturing All Visible Variables at Each Line + +## Overview of Python Variable Scopes + +In CPython, the accessible variables at a given execution point consist of: + +* Local variables of the current function or code block (including parameters). + +* Closure (nonlocal) variables that come from enclosing functions (if any). + +* Global variables defined at the module level (the current module’s namespace). + +(Built-ins are also always accessible if not shadowed, but they are usually not included in “visible variables” snapshots for tracing.) + +Each executing frame in CPython carries these variables in its namespace. To capture a snapshot of all variables accessible at a line, we need to inspect the frame’s environment, combining locals, nonlocals, and globals. This must work for any code construct (functions, methods, comprehensions, class bodies, etc.) under CPython 3.12 and 3.13. + +## Using the CPython C API (via PyO3) to Get Variables + +1. **Access the current frame**: The sys.monitoring API’s line event callback does not directly provide a frame object. We can obtain the current PyFrameObject via the C API. Using PyO3’s FFI, you can call: +* `PyEval_GetFrame()` - return current thread state's frame, NULL if no frame is executing +* `PyThreadState_GetFrame(PyThreadState *tstate)` - return a given thread state's frame, NULL if on frame is currently executing. +This yields the top-of-stack frame – if your callback is a C function, that should be the frame of the user code. If your callback is a Python function, you may need frame.f_back to get the user code’s frame.) + +2. **Get all local and closure variables**: Once you have the `PyFrameObject *frame`, retrieve the frame’s local variables mapping. In Python 3.12+, `frame.f_locals` is a proxy that reflects both local variables and any closure (cell/free) variables with their current values. In C, you can use `PyFrame_GetLocals(frame)` + +3. **Get global variables**: The frame’s globals are in `frame.f_globals`. You can obtain this dictionary via `PyFrame_GetGlobals(frame)`. This is the module’s global namespace. + +4. Encode them in the trace state. You can use the function `encode_value` to encode each one of those variables in a format suitable for recording and then record them using the capabilities provided by `runtime_tracing` crate. + +## Important Details and Edge Cases + +* **Closure (free) variables**: In modern CPython, closure variables are handled seamlessly via the frame’s locals proxy. You do not need to separately fetch function.__closure__ or outer frame variables – the frame’s local mapping already includes free vars. The PEP for frame proxies explicitly states that each access to `frame.f_locals` yields a mapping of local and closure variable names to their current values. This ensures that in a nested function, variables from an enclosing scope (nonlocals) appear in the inner frame’s locals mapping (bound to the value in the closure cell). + +* **Comprehensions and generators**: In Python 3, list comprehensions, generator expressions, and the like are implemented as separate function frames. The above approach still works since those have their own frames (with any needed closure variables included similarly). Just grab that frame’s locals and globals as usual. + +* **Class bodies and module level**: A class body or module top-level code is executed in an unoptimized frame where `locals == globals` (module) or a new class namespace dict. You need to make sure that you don't record variables twice! Here's a sketch how to do this: +```rust +use pyo3::prelude::*; +use pyo3::ffi; +use std::ptr; + +pub unsafe fn locals_is_globals_ffi(_py: Python<'_>, frame: *mut ffi::PyFrameObject) -> PyResult { + // Ensure f_locals exists and is synced with fast-locals + if ffi::PyFrame_FastToLocalsWithError(frame) < 0 { + return Err(PyErr::fetch(_py)); + } + let f_locals = (*frame).f_locals; + let f_globals = (*frame).f_globals; + Ok(!f_locals.is_null() && ptr::eq(f_locals, f_globals)) +} +``` + +* **Builtins**: Typically, built-in names (from frame.f_builtins) are implicitly accessible if not shadowed, but they are usually not included in a variables snapshot. You should ignore the builtins + +* **Name resolution order**: If needed, CPython 3.12 introduced PyFrame_GetVar(frame, name) which will retrieve a variable by name as the interpreter would – checking locals (including cells), then globals, then builtins. This could be used to fetch specific variables on demand. However, for capturing all variables, it’s more efficient to pull the mappings as described above rather than querying names one by one. + + +## Putting It Together + +In your Rust/PyO3 tracing recorder, for each line event you can do something like: + +* Get the current frame (`frame_obj`). + +* Get the locals proxy via `PyFrame_GetLocals`. Iterate over each object, construct its representation via `encode_value` and then add it to the trace. + +* If locals != globals, get the globals dict (`globals_dict = PyFrame_GetGlobals(frame_obj)`) and process it just like the locals + + +By using these facilities via PyO3, you can reliably capture all visible variables at each line of execution in your tracing recorder. + +## References + +Python C-API – Frame Objects: functions to access frame attributes (locals, globals, etc.). + +PEP 667 – Frame locals proxy (Python 3.13): frame.f_locals now reflects local + cell + free variables’ values. + +PEP 558 – Defined semantics for locals(): introduced Py + + +# Comprehensive Test Suite for Python Tracing Recorder + +This test suite is designed to verify that a tracing recorder (using sys.monitoring and frame inspection) correctly captures all variables visible at each executable line of Python code. Each test covers a distinct scope or visibility scenario in Python. The tracer should record every variable that is in scope at that line, ensuring no visible name is missed. We include functions, closures, globals, class scopes, comprehensions, generators, exception blocks, and more, to guarantee full coverage of Python's LEGB (Local, Enclosing, Global, Built-in) name resolution rules. + +Each test case below provides a brief description of what it covers, followed by a code snippet (Python script) that exercises that behavior. No actual tracing logic is included – we only show the source code whose execution should be monitored. The expectation is that at runtime, the tracer’s LINE event will fire on each line and the recorder will capture all variables accessible in that scope at that moment. + +## 1. Simple Function: Parameters and Locals + +**Scope**: This test focuses on a simple function with a parameter and local variables. It verifies that the recorder sees function parameters and any locals on each line inside the function. On entering the function, the parameter should be visible; as lines execute, newly assigned local variables become visible too. This ensures that basic function scope is handled. + +```py +def simple_function(x): + a = 1 # Parameter x is visible; local a is being defined + b = a + x # Locals a, b and parameter x are visible (b defined this line) + return a, b # Locals a, b and x still visible at return + + +# Test the function +result = simple_function(5) +``` + +_Expected_: The tracer should capture x (parameter) and then a and b as they become defined in simple_function. + +## 2. Nested Functions and Closure Variables (nonlocal) + +**Scope**: This test covers nested functions, where an inner function uses a closure variable from its outer function. We verify that variables in the enclosing (nonlocal) scope are visible inside the inner function, and that the nonlocal statement allows the inner function to modify the outer variable. Both the outer function’s locals and the inner function’s locals (plus closed-over variables) should be captured appropriately. + +```py +def outer_func(x): + y = 1 + def inner_func(z): + nonlocal y # Declare y from outer_func as nonlocal + w = x + y + z # x (outer param), y (outer var), z (inner param), w (inner local) + y = w # Modify outer variable y + return w + total = inner_func(5) # Calls inner_func, which updates y + return y, total # y is updated in outer scope +result = outer_func(2) +``` + +_Expected_: Inside `inner_func`, the tracer should capture x, y (from outer scope), z, and w at each line. In `outer_func`, it should capture x, y, and later the returned total. This ensures enclosing scope variables are handled (nonlocal variables are accessible to nested functions). + +## 3. Global and Module-Level Variables + +**Scope**: This test validates visibility of module-level (global) variables. It defines globals and uses them inside a function, including modifying a global with the global statement. We ensure that at each line, global names are captured when in scope (either at the module level or when referenced inside a function). + +```py +GLOBAL_VAL = 10 +counter = 0 + +def global_test(): + local_copy = GLOBAL_VAL # Access a global variable + global counter + counter += 1 # Modify a global variable + return local_copy, counter + +# Use the function and check global effects +before = counter +result = global_test() +after = counter +``` + +_Expected_: The tracer should capture *GLOBAL_VAL* and counter as globals on relevant lines. At the module level, GLOBAL_VAL, counter, before, after, etc. are in the global namespace. Inside global_test(), it should capture local_copy and see GLOBAL_VAL as a global. The global counter declaration ensures counter is treated as global in that function and its updated value remains in the module scope. + +## 4. Class Definition Scope and Metaclass + +**Scope:** This test targets class definition bodies, including the effect of a metaclass. When a class body executes, it has a local namespace that becomes the class’s attribute dictionary. We verify that variables assigned in the class body are captured, and that references to those variables or to globals are handled. Additionally, we include a metaclass to ensure that class creation via a metaclass is also traced. + +```python +CONSTANT = 42 + +class MetaCounter(type): + count = 0 + def __init__(cls, name, bases, attrs): + MetaCounter.count += 1 # cls, name, bases, attrs visible; MetaCounter.count updated + super().__init__(name, bases, attrs) + +class Sample(metaclass=MetaCounter): + a = 10 + b = a + 5 # uses class attribute a + print(a, b, CONSTANT) # can access class attrs a, b and global CONSTANT + def method(self): + return self.a + self.b + +# After class definition, metaclass count should have incremented +instances = MetaCounter.count +``` + +**Expected:** Within `MetaCounter`, the tracer should capture class-level attributes like `count` as well as method parameters (`cls`, `name`, `bases`, `attrs`) during class creation. In `Sample`’s body, it should capture `a` once defined, then `b` and `a` on the next line, and even allow access to `CONSTANT` (a global) during class body execution. After definition, `Sample.a` and `Sample.b` exist as class attributes (not directly as globals outside the class). The tracer should handle the class scope like a local namespace for that block. + +## 5. Lambdas and Comprehensions (List, Set, Dict, Generator) + +**Scope:** This combined test covers lambda expressions and various comprehensions, each of which introduces an inner scope. We ensure the tracer captures variables inside these expressions, including any outer variables they close over and the loop variables within comprehensions. Notably, in Python 3, the loop variable in a comprehension is local to the comprehension and not visible outside. + +Lambda: Tests an inline lambda function with its own parameter and expression. + +List Comprehension: Uses a loop variable internally and an external variable. + +Set & Dict Comprehensions: Similar scope behavior with their own loop variables. + +Generator Expression: A generator comprehension that lazily produces values. + +```python +factor = 2 +double = lambda y: y * factor # 'y' is local parameter, 'factor' is captured from outer scope + +squares = [n**2 for n in range(3)] # 'n' is local to comprehension, not visible after +scaled_set = {n * factor for n in range(3)} # set comprehension capturing outer 'factor' +mapping = {n: n*factor for n in range(3)} # dict comprehension with local n +gen_exp = (n * factor for n in range(3)) # generator expression (lazy evaluated) +result_list = list(gen_exp) # force generator to evaluate +``` + +**Expected:** Inside the lambda, `y` (parameter) and `factor` (enclosing variable) are visible to the tracer. In each comprehension, the loop variable (e.g., `n`) and any outer variables (`factor`) should be captured during the comprehension's execution. After the comprehension, the loop variable is no longer defined (e.g., `n` is not accessible outside the list comprehension). The generator expression has a similar scope to a comprehension; its variables should be captured when it's iterated. All these ensure the recorder handles anonymous function scopes and comprehension internals. + +## 6. Generators and Coroutines (async/await) + +**Scope:** This test covers a generator function and an async coroutine function. Generators use yield to produce values and suspend execution, while async coroutines use await. We ensure that local variables persist across yields/awaits and remain visible when execution resumes (on each line hit). This verifies that the tracer captures the state in suspended functions. + +```python +def counter_gen(n): + total = 0 + for i in range(n): + total += i + yield total # At yield: i and total are visible and persisted across resumes + return total + +import asyncio +async def async_sum(data): + total = 0 + for x in data: + total += x + await asyncio.sleep(0) # At await: x and total persist in coroutine + return total + +# Run the generator +gen = counter_gen(3) +gen_results = list(gen) # exhaust the generator + +# Run the async coroutine +coroutine_result = asyncio.run(async_sum([1, 2, 3])) +``` + +**Expected:** In `counter_gen`, at each yield line the tracer should capture `i` and `total` (and after resumption, those values are still available). In `async_sum`, at the await line, `x` and `total` are captured and remain after the await. The tracer must handle the resumption of these functions (triggered by `PY_RESUME` events) and still see previously defined locals. This test ensures generator state and coroutine state do not lose any variables between pauses. + +## 7. Try/Except/Finally and With Statement + +**Scope:** This test combines exception handling blocks and context manager usage. It verifies that the tracer captures variables introduced in a try/except flow (including the exception variable, which has a limited scope) as well as in a with statement context manager. We specifically ensure the exception alias is only visible inside the except block, and that variables from try, else, and finally blocks, as well as the with target, are all accounted for. + +```python +def exception_and_with_demo(x): + try: + inv = 10 / x # In try: 'inv' defined if no error + except ZeroDivisionError as e: + error_msg = f"Error: {e}" # In except: 'e' (exception) and 'error_msg' are visible + else: + inv += 1 # In else: 'inv' still visible here + finally: + final_flag = True # In finally: 'final_flag' visible (e is out of scope here) + + with open(__file__, 'r') as f: + first_line = f.readline() # Inside with: 'f' (file handle) and 'first_line' visible + return locals() # return all locals for inspection + +# Execute with a case that triggers exception and one that does not +result1 = exception_and_with_demo(0) # triggers ZeroDivisionError +result2 = exception_and_with_demo(5) # normal execution +``` + +**Expected:** In the except block, the tracer should capture the exception object name (`e`) and any locals like `error_msg`, but after the block `e` goes out of scope (no longer in `locals()`). The else block runs when no exception, and the tracer sees `inv` there. The finally block executes in both cases, with `final_flag` visible. During the with block, the tracer captures the context manager’s target (`f`) and any inner variables (`first_line`). This test ensures all branches of try/except/else/finally and the scope entering/exiting a with are handled. + +## 8. Decorators and Function Wrappers + +**Scope:** This test involves function decorators, which themselves often use closures. We have a decorator that closes over a free variable and wraps a function. The goal is to ensure that when the decorated function is defined and called, the tracer captures variables both in the decorator’s scope and in the wrapped function’s scope. This covers the scenario of variables visible during decoration and invocation. + +```python +setting = "Hello" + +def my_decorator(func): + def wrapper(*args, **kwargs): + # Inside wrapper: 'args', 'kwargs', and 'setting' from outer scope are visible + print("Decorator wrapping with setting:", setting) + return func(*args, **kwargs) + return wrapper + +@my_decorator +def greet(name): + message = f"Hi, {name}" # Inside greet: 'name' and 'message' are locals + return message + +# Call the decorated function +output = greet("World") +``` + +**Expected:** When defining `greet`, the decorator `my_decorator` is applied. The tracer should capture that process: inside `my_decorator`, the `func` parameter and the outer variable `setting` are visible. Within `wrapper`, on each call, `args`, `kwargs`, and the closed-over `setting` are visible to the tracer. Inside `greet`, normal function locals apply (`name`, `message`). This test ensures decorated functions don’t hide any variables from the tracer (it must trace through the decorator and the function execution). + +## 9. Dynamic Execution (eval and exec) + +**Scope:** This test checks dynamic creation and access of variables using `eval()` and `exec()`. The recorder should capture variables introduced by an exec at the moment they become available, as well as usage of variables via eval strings. We ensure that even dynamically created names or accessed names are seen by the tracer just like normal variables. + +```python +expr_code = "dynamic_var = 99" +exec(expr_code) # Executes code, defining a new variable dynamically +check = dynamic_var + 1 # Uses the dynamically created variable + +def eval_test(): + value = 10 + formula = "value * 2" + result = eval(formula) # 'value' (local) is accessed dynamically via eval + return result +out = eval_test() +``` + +**Expected:** At the `exec(expr_code)` line, the tracer should capture that `dynamic_var` gets created in the global scope. On the next line, `dynamic_var` is visible and used. Inside `eval_test()`, when `eval(formula)` is executed, the tracer should see the local `value` (and `formula`) in that frame, confirming that eval could access `value`. All dynamically introduced or accessed names should be recorded as they appear. + +## 10. Import Statements and Visibility + +**Scope:** This test covers the effect of import statements on variable visibility. Importing modules or names introduces new variables (module objects or imported names) into the local or global namespace. We test both a global import and a local (within-function) import to ensure the tracer captures these names when they become available. + +```python +import math # Import at module level introduces 'math' in globals + +def import_test(): + import os # Import inside function introduces 'os' as a local name + constant = math.pi # Can use global import inside function + cwd = os.getcwd() # Uses the locally imported module + return constant, cwd + +val, path = import_test() +``` + +**Expected:** After the top-level import `math`, the tracer should list `math` as a new global variable. Inside `import_test()`, after the `import os` line, `os` should appear as a local variable in that function’s scope. The usage of `math.pi` shows that globals remain accessible in the function, and the use of `os.getcwd()` confirms `os` is in the local namespace. This test ensures imported names are captured at the appropriate scope (global or local) when they are introduced. + +## 11. Built-in Scope (Builtins) + +**Scope:** This test highlights built-in names, which are always available via Python’s built-in scope (e.g., `len`, `print`, `ValueError`). The tracer is not required to explicitly list all built-ins at each line (as that would be overwhelming), but we include this case to note that built-in functions or constants are accessible in any scope. We ensure usage of a built-in is traced like any other variable access, although the recorder +may choose not to list the entire built-in namespace. + +```python +def builtins_test(seq): + n = len(seq) # 'len' is a built-in function + m = max(seq) # 'max' is another built-in + return n, m + +result = builtins_test([5, 3, 7]) +``` + +**Expected:** In the `builtins_test` function, calls to `len` and `max` are made. The tracer would see `seq`, `n`, and `m` as local variables, and while `len`/`max` are resolved from the built-in scope, the recorder may not list them as they are implicitly available (built-ins are found after global scope in name resolution). The important point is that using built-ins does not introduce new names in the user-defined scopes. This test is mostly a note that built-in scope exists and built-in names are always accessible (the tracer could capture them, but it's typically unnecessary to record every built-in name). + +--- + +**Conclusion:** The above tests collectively cover all major visibility scenarios in Python. By running a tracing recorder with these snippets, one can verify that at every executable line, the recorder correctly identifies all variables that are in scope (function locals, closure variables, globals, class locals, comprehension temporaries, exception variables, etc.). This comprehensive coverage ensures the tracing tool is robust against Python’s various scoping rules and constructs. + +# General Rules + +* This spec is for `/codetracer-python-recorder` project and NOT for `/codetracer-pure-python-recorder` +* Code and tests should be added to `/codetracer-python-recorder/src/runtime_tracer.rs` +* Performance is important. Avoid using Python modules and functions and prefer PyO3 methods including the FFI API. +* If you want to run Python do it like so `uv run python` This will set up the right venv. Similarly for running tests `uv run pytest`. +* After every code change you need to run `just dev` to make sure that you are testing the new code. Otherwise some tests might run against the old code + +* Avoid defensive programming: when encountering edge cases which are + not explicitly mentioned in the specification, the default behaviour + should be to crash (using `panic!`). We will only handle them after + we receive a report from a user which confirms that the edge case + does happen in real life. +* Do not make any code changes to unrelated parts of the code. The only callback that should change behaviour is `on_line` +* If the code has already implemented part of the specification described here find out what is missing and implement that +* If a test fails repeatedly after three attempts to fix the code STOP. Let a human handle it. DON'T DELETE TESTS!!! +* When writing tests be careful with concurrency. If two tests run at the same time using the same Python interpreter (or same Rust process?) they will both try to register callbacks via sys.monitoring and could deadlock. +* If you want to test Rust code without using just, use `cargo nextest`, not `cargo test` diff --git a/examples/threading.py b/examples/threading_example.py similarity index 100% rename from examples/threading.py rename to examples/threading_example.py diff --git a/examples/value_capture_all.py b/examples/value_capture_all.py new file mode 100644 index 0000000..31ea084 --- /dev/null +++ b/examples/value_capture_all.py @@ -0,0 +1,231 @@ +"""Single example exercising many variable-visibility scenarios for value capture. + +Covers: +- Simple function locals/params +- Nested functions, nonlocal (closures) +- Globals read/write +- Class body scope and metaclass +- Lambdas and all comprehensions +- Generators and async coroutines +- try/except/else/finally and with +- Decorators and wrappers +- eval/exec dynamic names +- Imports at module and function scope +- Builtins usage +""" + +from __future__ import annotations + +#import asyncio +import math + +# 1. Simple function: params and locals +def simple_function(x: int): + a = 1 + b = a + x + return a, b + +# Globals +GLOBAL_VAL = 10 +counter = 0 +setting = "Hello" +CONSTANT = 42 + + +# 8. Decorator and wrapper (captures free var `setting`) +def my_decorator(func): + def wrapper(*args, **kwargs): + # variables visible here: args, kwargs, setting, func (closed over) + return func(*args, **kwargs) + return wrapper + + +@my_decorator +def greet(name: str) -> str: + message = f"Hi, {name}" + return message + + + + +# 2. Nested functions and nonlocal +def outer_func(x: int): + y = 1 + + def inner_func(z: int): + nonlocal y + w = x + y + z + y = w + return w + + total = inner_func(5) + return y, total + + +# 3. Globals +def global_test(): + local_copy = GLOBAL_VAL + global counter + counter += 1 + return local_copy, counter + + +# 4. Class scope and metaclass +class MetaCounter(type): + count = 0 + + def __init__(cls, name, bases, attrs): + MetaCounter.count += 1 + super().__init__(name, bases, attrs) + + +class Sample(metaclass=MetaCounter): + a = 10 + b = a + 5 + c = a + b + CONSTANT + + def method(self): + return self.a + self.b + + +# 6. Generators and async coroutines +def counter_gen(n: int): + total = 0 + for i in range(n): + total += i + yield total + return total + + +# async def async_sum(data: list[int]) -> int: +# total = 0 +# for x in data: +# total += x +# await asyncio.sleep(0) +# return total + + +# async def agen(n: int): +# for i in range(n): +# yield i + 1 + + +# 7. try/except/finally and with +def exception_and_with_demo(x: int): + try: + inv = 10 / x + except ZeroDivisionError as e: + error_msg = f"Error: {e}" + else: + inv += 1 + finally: + final_flag = True + + with open(__file__, "r") as f: + first_line = f.readline() + return locals() + + +# 9. eval/exec +def eval_test(): + value = 10 + formula = "value * 2" + result = eval(formula) + return result + + +# 10. Imports and visibility +def import_test(): + import os + constant = math.pi + cwd = os.getcwd() + return constant, cwd + + +# 11. Builtins +def builtins_test(seq): + n = len(seq) + m = max(seq) + return n, m + + +def main() -> None: + 1 + res1 = simple_function(5) + + # 2 + res2 = outer_func(2) + + # 3 + before = counter + _local_copy, _ctr = global_test() + after = counter + + # 5. Lambdas and comprehensions + factor = 2 + double = lambda y: y * factor # noqa: E731 + squares = [n ** 2 for n in range(3)] + scaled_set = {n * factor for n in range(3)} + mapping = {n: n * factor for n in range(3)} + gen_exp = (n * factor for n in range(3)) + result_list = list(gen_exp) + + # 6. Generators and async coroutines + gen = counter_gen(3) + gen_results = list(gen) + # coroutine_result = asyncio.run(async_sum([1, 2, 3])) + + # async def consume() -> int: + # acc = 0 + # async for x in agen(3): + # acc += x + # return acc + + # async_acc = asyncio.run(consume()) + + # 7. try/except/finally and with + r1 = exception_and_with_demo(0) + r2 = exception_and_with_demo(5) + has_e = "error_msg" in r1 + has_inv = "inv" in r2 + has_final_flag = r1.get("final_flag", False) and r2.get("final_flag", False) + + # 8. Decorator and wrapper + output = greet("World") + + # 9. eval/exec + expr_code = "dynamic_var = 99" + exec(expr_code, globals()) + dynamic_var = globals()["dynamic_var"] + check = dynamic_var + 1 + out = eval_test() + + # 10. import visibility + constant, cwd = import_test() + + # 11. builtins + built_n, built_m = builtins_test([5, 3, 7]) + + #Aggregate a compact, deterministic summary + print( + "ok", + res1[0] + res1[1], # simple_function sum + sum(res2), # outer_func sum + after - before, # global counter increment + MetaCounter.count, # metaclass incremented classes + sum(squares), len(scaled_set), len(mapping), sum(result_list), + sum(gen_results), # generator totals + #coroutine_result, async_acc, # async results + has_e, has_inv, has_final_flag, # exception/with signals + len(output), # decorator + greet result length + dynamic_var, check, out, # eval/exec values + f"{constant:.3f}", # math.pi to 3 decimals + bool(len(cwd)), # cwd non-empty is True + built_n, built_m, # builtins result + double(7), # lambda capture + Sample.c, # class body computed constant + ) + + +if __name__ == "__main__": + main() diff --git a/flake.nix b/flake.nix index 3c462e8..e818bb8 100644 --- a/flake.nix +++ b/flake.nix @@ -31,6 +31,8 @@ rustc rustfmt clippy + rust-analyzer + cargo-nextest # Build tooling for Python extensions maturin