diff --git a/.gitignore b/.gitignore index f7ca325..7fc2ae7 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,6 @@ build *~ .idea/ -.cargo/ \ No newline at end of file +.cargo/ + +**/*.egg-info/ diff --git a/codetracer-python-recorder/codetracer_python_recorder/__init__.py b/codetracer-python-recorder/codetracer_python_recorder/__init__.py index 600e890..b22a5a4 100644 --- a/codetracer-python-recorder/codetracer_python_recorder/__init__.py +++ b/codetracer-python-recorder/codetracer_python_recorder/__init__.py @@ -9,5 +9,8 @@ from . import api as _api from .api import * # re-export public API symbols +from .auto_start import auto_start_from_env + +auto_start_from_env() __all__ = _api.__all__ diff --git a/codetracer-python-recorder/codetracer_python_recorder/api.py b/codetracer-python-recorder/codetracer_python_recorder/api.py index c8fca0b..d42d98c 100644 --- a/codetracer-python-recorder/codetracer_python_recorder/api.py +++ b/codetracer-python-recorder/codetracer_python_recorder/api.py @@ -1,142 +1,12 @@ -"""High-level tracing API built on a Rust backend. - -This module exposes a minimal interface for starting and stopping -runtime traces. The heavy lifting is delegated to the -`codetracer_python_recorder` Rust extension which hooks -into `runtime_tracing` and `sys.monitoring`. -""" +"""High-level tracing API built on a Rust backend.""" from __future__ import annotations -import contextlib -import os -from pathlib import Path -from typing import Iterator, Optional - -from .codetracer_python_recorder import ( - flush_tracing as _flush_backend, - is_tracing as _is_tracing_backend, - start_tracing as _start_backend, - stop_tracing as _stop_backend, -) - -TRACE_BINARY: str = "binary" -TRACE_JSON: str = "json" -DEFAULT_FORMAT: str = TRACE_BINARY - -_active_session: Optional["TraceSession"] = None - - -def start( - path: os.PathLike | str, - *, - format: str = DEFAULT_FORMAT, - start_on_enter: os.PathLike | str | None = None, -) -> "TraceSession": - """Start a global trace session. - - - ``path``: Target directory where trace files will be written. - Files created: ``trace.json``/``trace.bin``, ``trace_metadata.json``, ``trace_paths.json``. - - ``format``: Either ``binary`` or ``json`` (controls events file name/format). - - ``start_on_enter``: Optional file path; when provided, tracing remains - paused until the tracer observes execution entering this file. Useful to - avoid recording interpreter and import startup noise when launching a - script via the CLI. - - The current implementation records trace data through a Rust backend. - """ - global _active_session - if _is_tracing_backend(): - raise RuntimeError("tracing already active") - - trace_path = Path(path) - _start_backend( - str(trace_path), - format, - str(Path(start_on_enter)) if start_on_enter is not None else None, - ) - session = TraceSession(path=trace_path, format=format) - _active_session = session - return session - - -def stop() -> None: - """Stop the active trace session if one is running.""" - global _active_session - if not _is_tracing_backend(): - return - _stop_backend() - _active_session = None - - -def is_tracing() -> bool: - """Return ``True`` when a trace session is active.""" - return _is_tracing_backend() - - -def flush() -> None: - """Flush buffered trace data. +from typing import Iterable - With the current placeholder implementation this is a no-op but the - function is provided to match the planned public API. - """ - if _is_tracing_backend(): - _flush_backend() +from .formats import DEFAULT_FORMAT, TRACE_BINARY, TRACE_JSON +from .session import TraceSession, flush, is_tracing, start, stop, trace - -@contextlib.contextmanager -def trace( - path: os.PathLike | str, - *, - format: str = DEFAULT_FORMAT, -) -> Iterator["TraceSession"]: - """Context manager helper for scoped tracing.""" - session = start( - path, - format=format, - ) - try: - yield session - finally: - session.stop() - - -class TraceSession: - """Handle representing a live tracing session.""" - - path: Path - format: str - - def __init__(self, path: Path, format: str) -> None: - self.path = path - self.format = format - - def stop(self) -> None: - """Stop this trace session.""" - if _active_session is self: - stop() - - def flush(self) -> None: - """Flush buffered trace data for this session.""" - flush() - - def __enter__(self) -> "TraceSession": - return self - - def __exit__(self, exc_type, exc, tb) -> None: # pragma: no cover - thin wrapper - self.stop() - - -def _auto_start_from_env() -> None: - path = os.getenv("CODETRACER_TRACE") - if not path: - return - fmt = os.getenv("CODETRACER_FORMAT", DEFAULT_FORMAT) - start(path, format=fmt) - - -_auto_start_from_env() - -__all__ = [ +__all__: Iterable[str] = ( "TraceSession", "DEFAULT_FORMAT", "TRACE_BINARY", @@ -146,4 +16,4 @@ def _auto_start_from_env() -> None: "is_tracing", "trace", "flush", -] +) diff --git a/codetracer-python-recorder/codetracer_python_recorder/auto_start.py b/codetracer-python-recorder/codetracer_python_recorder/auto_start.py new file mode 100644 index 0000000..8918cc7 --- /dev/null +++ b/codetracer-python-recorder/codetracer_python_recorder/auto_start.py @@ -0,0 +1,40 @@ +"""Environment-driven trace auto-start helper.""" +from __future__ import annotations + +import logging +import os +from typing import Iterable + +from .formats import DEFAULT_FORMAT + +ENV_TRACE_PATH = "CODETRACER_TRACE" +ENV_TRACE_FORMAT = "CODETRACER_FORMAT" + +log = logging.getLogger(__name__) + + +def auto_start_from_env() -> None: + """Start tracing automatically when the relevant environment variables are set.""" + path = os.getenv(ENV_TRACE_PATH) + if not path: + return + + # Delay import to avoid boot-time circular dependencies. + from . import session + + if session.is_tracing(): + log.debug("codetracer auto-start skipped: tracing already active") + return + + fmt = os.getenv(ENV_TRACE_FORMAT, DEFAULT_FORMAT) + log.debug( + "codetracer auto-start triggered", extra={"trace_path": path, "format": fmt} + ) + session.start(path, format=fmt) + + +__all__: Iterable[str] = ( + "ENV_TRACE_FORMAT", + "ENV_TRACE_PATH", + "auto_start_from_env", +) diff --git a/codetracer-python-recorder/codetracer_python_recorder/formats.py b/codetracer-python-recorder/codetracer_python_recorder/formats.py new file mode 100644 index 0000000..4f3a4ef --- /dev/null +++ b/codetracer-python-recorder/codetracer_python_recorder/formats.py @@ -0,0 +1,37 @@ +"""Trace format constants and helpers.""" +from __future__ import annotations + +from typing import Iterable + +TRACE_BINARY: str = "binary" +TRACE_JSON: str = "json" +DEFAULT_FORMAT: str = TRACE_BINARY +SUPPORTED_FORMATS: frozenset[str] = frozenset({TRACE_BINARY, TRACE_JSON}) + + +def normalize_format(value: str | None) -> str: + """Normalise user-provided strings to the format names recognised by the backend. + + The runtime currently accepts ``"binary"`` (plus legacy aliases handled + on the Rust side) and ``"json"``. Unknown formats fall back to the + lower-cased input so the backend can decide how to react; callers can + choose to guard against unsupported values by checking ``SUPPORTED_FORMATS``. + """ + if value is None: + return DEFAULT_FORMAT + return value.lower() + + +def is_supported(value: str) -> bool: + """Return ``True`` if *value* is one of the officially supported formats.""" + return value.lower() in SUPPORTED_FORMATS + + +__all__: Iterable[str] = ( + "DEFAULT_FORMAT", + "TRACE_BINARY", + "TRACE_JSON", + "SUPPORTED_FORMATS", + "is_supported", + "normalize_format", +) diff --git a/codetracer-python-recorder/codetracer_python_recorder/session.py b/codetracer-python-recorder/codetracer_python_recorder/session.py new file mode 100644 index 0000000..5014a6d --- /dev/null +++ b/codetracer-python-recorder/codetracer_python_recorder/session.py @@ -0,0 +1,130 @@ +"""Tracing session management helpers.""" +from __future__ import annotations + +import contextlib +from pathlib import Path +from typing import Iterator, Optional + +from .codetracer_python_recorder import ( + flush_tracing as _flush_backend, + is_tracing as _is_tracing_backend, + start_tracing as _start_backend, + stop_tracing as _stop_backend, +) +from .formats import DEFAULT_FORMAT, SUPPORTED_FORMATS, is_supported, normalize_format + +_active_session: Optional["TraceSession"] = None + + +class TraceSession: + """Handle representing a live tracing session.""" + + path: Path + format: str + + def __init__(self, path: Path, format: str) -> None: + self.path = path + self.format = format + + def stop(self) -> None: + """Stop this trace session.""" + if _active_session is self: + stop() + + def flush(self) -> None: + """Flush buffered trace data for this session.""" + flush() + + def __enter__(self) -> "TraceSession": + return self + + def __exit__(self, exc_type, exc, tb) -> None: # pragma: no cover - thin wrapper + self.stop() + + +def start( + path: str | Path, + *, + format: str = DEFAULT_FORMAT, + start_on_enter: str | Path | None = None, +) -> TraceSession: + """Start a new global trace session.""" + global _active_session + if _is_tracing_backend(): + raise RuntimeError("tracing already active") + + trace_path = _validate_trace_path(Path(path)) + normalized_format = _coerce_format(format) + activation_path = _normalize_activation_path(start_on_enter) + + _start_backend(str(trace_path), normalized_format, activation_path) + session = TraceSession(path=trace_path, format=normalized_format) + _active_session = session + return session + + +def stop() -> None: + """Stop the active trace session if one is running.""" + global _active_session + if not _is_tracing_backend(): + return + _stop_backend() + _active_session = None + + +def is_tracing() -> bool: + """Return ``True`` when a trace session is active.""" + return _is_tracing_backend() + + +def flush() -> None: + """Flush buffered trace data.""" + if _is_tracing_backend(): + _flush_backend() + + +@contextlib.contextmanager +def trace( + path: str | Path, + *, + format: str = DEFAULT_FORMAT, +) -> Iterator[TraceSession]: + """Context manager helper for scoped tracing.""" + session = start(path, format=format) + try: + yield session + finally: + session.stop() + + +def _coerce_format(value: str) -> str: + normalized = normalize_format(value) + if not is_supported(normalized): + supported = ", ".join(sorted(SUPPORTED_FORMATS)) + raise ValueError( + f"unsupported trace format '{value}'. Expected one of: {supported}" + ) + return normalized + + +def _validate_trace_path(path: Path) -> Path: + path = path.expanduser() + if path.exists() and not path.is_dir(): + raise ValueError("trace path exists and is not a directory") + return path + + +def _normalize_activation_path(value: str | Path | None) -> str | None: + if value is None: + return None + return str(Path(value).expanduser()) + + +__all__ = ( + "TraceSession", + "flush", + "is_tracing", + "start", + "stop", + "trace", +) diff --git a/codetracer-python-recorder/src/code_object.rs b/codetracer-python-recorder/src/code_object.rs index d1dfbfa..edfba44 100644 --- a/codetracer-python-recorder/src/code_object.rs +++ b/codetracer-python-recorder/src/code_object.rs @@ -1,3 +1,5 @@ +//! Shared code-object caching utilities for sys.monitoring callbacks. + use dashmap::DashMap; use once_cell::sync::OnceCell; use pyo3::prelude::*; @@ -148,7 +150,8 @@ impl CodeObjectRegistry { self.map .entry(id) .or_insert_with(|| Arc::new(CodeObjectWrapper::new(py, code))) - .clone() //AI? Why do we need to clone here? + // Clone the `Arc` so each caller receives its own reference-counted handle. + .clone() } /// Remove the wrapper for a given code id, if present. diff --git a/codetracer-python-recorder/src/lib.rs b/codetracer-python-recorder/src/lib.rs index 585ffdf..0359648 100644 --- a/codetracer-python-recorder/src/lib.rs +++ b/codetracer-python-recorder/src/lib.rs @@ -4,143 +4,28 @@ //! signal when CPython should disable further monitoring for a location by propagating //! the `sys.monitoring.DISABLE` sentinel. -use std::fs; -use std::path::Path; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Once; - -use pyo3::exceptions::PyRuntimeError; -use pyo3::prelude::*; pub mod code_object; -mod runtime_tracer; -pub mod tracer; +mod logging; +pub mod monitoring; +mod runtime; +mod session; + pub use crate::code_object::{CodeObjectRegistry, CodeObjectWrapper}; -pub use crate::tracer::{ - install_tracer, uninstall_tracer, CallbackOutcome, CallbackResult, EventSet, Tracer, +pub use crate::monitoring as tracer; +pub use crate::monitoring::{ + flush_installed_tracer, install_tracer, uninstall_tracer, CallbackOutcome, CallbackResult, + EventSet, Tracer, }; +pub use crate::session::{flush_tracing, is_tracing, start_tracing, stop_tracing}; -/// Global flag tracking whether tracing is active. -static ACTIVE: AtomicBool = AtomicBool::new(false); - -// Initialize Rust logging once per process. Defaults to debug for this crate -// unless overridden by RUST_LOG. This helps surface debug! output during dev. -static INIT_LOGGER: Once = Once::new(); - -fn init_rust_logging_with_default(default_filter: &str) { - INIT_LOGGER.call_once(|| { - let env = env_logger::Env::default().default_filter_or(default_filter); - // Use a compact format with timestamps and targets to aid debugging. - let mut builder = env_logger::Builder::from_env(env); - builder.format_timestamp_micros().format_target(true); - let _ = builder.try_init(); - }); -} - -/// Start tracing using sys.monitoring and runtime_tracing writer. -#[pyfunction] -fn start_tracing(path: &str, format: &str, activation_path: Option<&str>) -> PyResult<()> { - // Ensure logging is ready before any tracer logs might be emitted. - // Default only our crate to debug to avoid excessive verbosity from deps. - init_rust_logging_with_default("codetracer_python_recorder=debug"); - if ACTIVE.load(Ordering::SeqCst) { - return Err(PyRuntimeError::new_err("tracing already active")); - } - - // Interpret `path` as a directory where trace files will be written. - let out_dir = Path::new(path); - if out_dir.exists() && !out_dir.is_dir() { - return Err(PyRuntimeError::new_err( - "trace path exists and is not a directory", - )); - } - if !out_dir.exists() { - // Best-effort create the directory tree - fs::create_dir_all(&out_dir).map_err(|e| { - PyRuntimeError::new_err(format!("failed to create trace directory: {}", e)) - })?; - } - - // Map format string to enum - let fmt = match format.to_lowercase().as_str() { - "json" => runtime_tracing::TraceEventsFileFormat::Json, - // Use BinaryV0 for "binary" to avoid streaming writer here. - "binary" | "binaryv0" | "binary_v0" | "b0" => { - runtime_tracing::TraceEventsFileFormat::BinaryV0 - } - //TODO AI! We need to assert! that the format is among the known values. - other => { - eprintln!("Unknown format '{}', defaulting to binary (v0)", other); - runtime_tracing::TraceEventsFileFormat::BinaryV0 - } - }; - - // Build output file paths inside the directory. - let (events_path, meta_path, paths_path) = match fmt { - runtime_tracing::TraceEventsFileFormat::Json => ( - out_dir.join("trace.json"), - out_dir.join("trace_metadata.json"), - out_dir.join("trace_paths.json"), - ), - _ => ( - out_dir.join("trace.bin"), - out_dir.join("trace_metadata.json"), - out_dir.join("trace_paths.json"), - ), - }; - - // Activation path: when set, tracing starts only after entering it. - let activation_path = activation_path.map(|s| Path::new(s)); - - Python::with_gil(|py| { - // Program and args: keep minimal; Python-side API stores full session info if needed - let sys = py.import("sys")?; - let argv = sys.getattr("argv")?; - let program: String = argv.get_item(0)?.extract::()?; - //TODO: Error-handling. What to do if argv is empty? Does this ever happen? - - let mut tracer = runtime_tracer::RuntimeTracer::new(&program, &[], fmt, activation_path); - - // Start location: prefer activation path, otherwise best-effort argv[0] - let start_path: &Path = activation_path.unwrap_or(Path::new(&program)); - log::debug!("{}", start_path.display()); - tracer.begin(&meta_path, &paths_path, &events_path, start_path, 1)?; - - // Install callbacks - install_tracer(py, Box::new(tracer))?; - ACTIVE.store(true, Ordering::SeqCst); - Ok(()) - }) -} - -/// Stop tracing by resetting the global flag. -#[pyfunction] -fn stop_tracing() -> PyResult<()> { - Python::with_gil(|py| { - // Uninstall triggers finish() on tracer implementation. - uninstall_tracer(py)?; - ACTIVE.store(false, Ordering::SeqCst); - Ok(()) - }) -} - -/// Query whether tracing is currently active. -#[pyfunction] -fn is_tracing() -> PyResult { - Ok(ACTIVE.load(Ordering::SeqCst)) -} - -/// Flush buffered trace data (best-effort, non-streaming formats only). -#[pyfunction] -fn flush_tracing() -> PyResult<()> { - Python::with_gil(|py| crate::tracer::flush_installed_tracer(py)) -} +use pyo3::prelude::*; /// Python module definition. #[pymodule] fn codetracer_python_recorder(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { // Initialize logging on import so users see logs without extra setup. // Respect RUST_LOG if present; otherwise default to debug for this crate. - init_rust_logging_with_default("codetracer_python_recorder=debug"); + logging::init_rust_logging_with_default("codetracer_python_recorder=debug"); m.add_function(wrap_pyfunction!(start_tracing, m)?)?; m.add_function(wrap_pyfunction!(stop_tracing, m)?)?; m.add_function(wrap_pyfunction!(is_tracing, m)?)?; diff --git a/codetracer-python-recorder/src/logging.rs b/codetracer-python-recorder/src/logging.rs new file mode 100644 index 0000000..72c1095 --- /dev/null +++ b/codetracer-python-recorder/src/logging.rs @@ -0,0 +1,19 @@ +//! Process-wide logging helpers shared by the PyO3 entry points and tests. + +use std::sync::Once; + +/// Initialise the process-wide Rust logger with a default filter. +/// +/// The logger is only set up once per process. Callers can override the filter +/// by setting the `RUST_LOG` environment variable before the first invocation. +pub fn init_rust_logging_with_default(default_filter: &str) { + static INIT_LOGGER: Once = Once::new(); + + INIT_LOGGER.call_once(|| { + let env = env_logger::Env::default().default_filter_or(default_filter); + // Use a compact format with timestamps and targets to aid debugging. + let mut builder = env_logger::Builder::from_env(env); + builder.format_timestamp_micros().format_target(true); + let _ = builder.try_init(); + }); +} diff --git a/codetracer-python-recorder/src/monitoring/mod.rs b/codetracer-python-recorder/src/monitoring/mod.rs new file mode 100644 index 0000000..29b5107 --- /dev/null +++ b/codetracer-python-recorder/src/monitoring/mod.rs @@ -0,0 +1,166 @@ +//! Helpers around CPython's `sys.monitoring` API. + +use pyo3::prelude::*; +use pyo3::types::PyCFunction; +use std::sync::OnceLock; + +mod tracer; + +pub use tracer::{flush_installed_tracer, install_tracer, uninstall_tracer, Tracer}; + +const MONITORING_TOOL_NAME: &str = "codetracer"; + +/// Identifier for a monitoring event bit mask. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct EventId(pub i32); + +#[allow(non_snake_case)] +/// Structured access to CPython's `sys.monitoring.events` values. +#[derive(Clone, Copy, Debug)] +pub struct MonitoringEvents { + pub BRANCH: EventId, + pub CALL: EventId, + pub C_RAISE: EventId, + pub C_RETURN: EventId, + pub EXCEPTION_HANDLED: EventId, + pub INSTRUCTION: EventId, + pub JUMP: EventId, + pub LINE: EventId, + pub PY_RESUME: EventId, + pub PY_RETURN: EventId, + pub PY_START: EventId, + pub PY_THROW: EventId, + pub PY_UNWIND: EventId, + pub PY_YIELD: EventId, + pub RAISE: EventId, + pub RERAISE: EventId, + //pub STOP_ITERATION: EventId, //See comment in Tracer trait +} + +/// Wrapper returned by `sys.monitoring.use_tool_id`. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct ToolId { + pub id: u8, +} + +pub type CallbackFn<'py> = Bound<'py, PyCFunction>; + +/// Bit-set describing which events are enabled for a tool. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct EventSet(pub i32); + +/// Convenience constant representing an empty event mask. +pub const NO_EVENTS: EventSet = EventSet(0); + +/// Outcome returned by tracer callbacks to control CPython monitoring. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum CallbackOutcome { + /// Continue receiving events for the current location. + Continue, + /// Disable future events for the current location by returning + /// `sys.monitoring.DISABLE`. + DisableLocation, +} + +/// Result type shared by tracer callbacks. +pub type CallbackResult = PyResult; + +static MONITORING_EVENTS: OnceLock = OnceLock::new(); + +impl EventSet { + /// Create an empty event mask. + pub const fn empty() -> Self { + NO_EVENTS + } + + /// Return true when the set includes the provided event identifier. + pub fn contains(&self, ev: &EventId) -> bool { + (self.0 & ev.0) != 0 + } +} + +/// Acquire a monitoring tool id for Codetracer. +pub fn acquire_tool_id(py: Python<'_>) -> PyResult { + let monitoring = py.import("sys")?.getattr("monitoring")?; + const FALLBACK_ID: u8 = 5; + monitoring.call_method1("use_tool_id", (FALLBACK_ID, MONITORING_TOOL_NAME))?; + Ok(ToolId { id: FALLBACK_ID }) +} + +/// Load monitoring event identifiers from CPython. +pub fn load_monitoring_events(py: Python<'_>) -> PyResult { + let monitoring = py.import("sys")?.getattr("monitoring")?; + let events = monitoring.getattr("events")?; + Ok(MonitoringEvents { + BRANCH: EventId(events.getattr("BRANCH")?.extract()?), + CALL: EventId(events.getattr("CALL")?.extract()?), + C_RAISE: EventId(events.getattr("C_RAISE")?.extract()?), + C_RETURN: EventId(events.getattr("C_RETURN")?.extract()?), + EXCEPTION_HANDLED: EventId(events.getattr("EXCEPTION_HANDLED")?.extract()?), + INSTRUCTION: EventId(events.getattr("INSTRUCTION")?.extract()?), + JUMP: EventId(events.getattr("JUMP")?.extract()?), + LINE: EventId(events.getattr("LINE")?.extract()?), + PY_RESUME: EventId(events.getattr("PY_RESUME")?.extract()?), + PY_RETURN: EventId(events.getattr("PY_RETURN")?.extract()?), + PY_START: EventId(events.getattr("PY_START")?.extract()?), + PY_THROW: EventId(events.getattr("PY_THROW")?.extract()?), + PY_UNWIND: EventId(events.getattr("PY_UNWIND")?.extract()?), + PY_YIELD: EventId(events.getattr("PY_YIELD")?.extract()?), + RAISE: EventId(events.getattr("RAISE")?.extract()?), + RERAISE: EventId(events.getattr("RERAISE")?.extract()?), + //STOP_ITERATION: EventId(events.getattr("STOP_ITERATION")?.extract()?), //See comment in Tracer trait + }) +} + +/// Cache and return the monitoring event structure for the current interpreter. +pub fn monitoring_events(py: Python<'_>) -> PyResult<&'static MonitoringEvents> { + if let Some(ev) = MONITORING_EVENTS.get() { + return Ok(ev); + } + let ev = load_monitoring_events(py)?; + let _ = MONITORING_EVENTS.set(ev); + Ok(MONITORING_EVENTS.get().unwrap()) +} + +/// Register or unregister a single callback for the provided event. +pub fn register_callback( + py: Python<'_>, + tool: &ToolId, + event: &EventId, + cb: Option<&CallbackFn<'_>>, +) -> PyResult<()> { + let monitoring = py.import("sys")?.getattr("monitoring")?; + match cb { + Some(cb) => { + monitoring.call_method("register_callback", (tool.id, event.0, cb), None)?; + } + None => { + monitoring.call_method("register_callback", (tool.id, event.0, py.None()), None)?; + } + } + Ok(()) +} + +/// Combine multiple event ids into a single bit mask. +pub fn events_union(ids: &[EventId]) -> EventSet { + let mut bits = 0i32; + for id in ids { + bits |= id.0; + } + EventSet(bits) +} + +/// Enable events for the given tool id. +pub fn set_events(py: Python<'_>, tool: &ToolId, set: EventSet) -> PyResult<()> { + let monitoring = py.import("sys")?.getattr("monitoring")?; + monitoring.call_method1("set_events", (tool.id, set.0))?; + Ok(()) +} + +/// Release a previously acquired monitoring tool id. +pub fn free_tool_id(py: Python<'_>, tool: &ToolId) -> PyResult<()> { + let monitoring = py.import("sys")?.getattr("monitoring")?; + monitoring.call_method1("free_tool_id", (tool.id,))?; + Ok(()) +} diff --git a/codetracer-python-recorder/src/tracer.rs b/codetracer-python-recorder/src/monitoring/tracer.rs similarity index 81% rename from codetracer-python-recorder/src/tracer.rs rename to codetracer-python-recorder/src/monitoring/tracer.rs index f3c3080..bc7512f 100644 --- a/codetracer-python-recorder/src/tracer.rs +++ b/codetracer-python-recorder/src/monitoring/tracer.rs @@ -1,153 +1,19 @@ +//! Tracer trait and sys.monitoring callback plumbing. + +use std::any::Any; +use std::sync::Mutex; + use crate::code_object::{CodeObjectRegistry, CodeObjectWrapper}; use pyo3::{ exceptions::PyRuntimeError, prelude::*, - types::{PyAny, PyCFunction, PyCode, PyModule}, + types::{PyAny, PyCode, PyModule}, }; -use std::any::Any; -use std::sync::{Mutex, OnceLock}; - -const MONITORING_TOOL_NAME: &str = "codetracer"; - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -#[repr(transparent)] -pub struct EventId(pub i32); - -#[allow(non_snake_case)] -#[derive(Clone, Copy, Debug)] -pub struct MonitoringEvents { - pub BRANCH: EventId, - pub CALL: EventId, - pub C_RAISE: EventId, - pub C_RETURN: EventId, - pub EXCEPTION_HANDLED: EventId, - pub INSTRUCTION: EventId, - pub JUMP: EventId, - pub LINE: EventId, - pub PY_RESUME: EventId, - pub PY_RETURN: EventId, - pub PY_START: EventId, - pub PY_THROW: EventId, - pub PY_UNWIND: EventId, - pub PY_YIELD: EventId, - pub RAISE: EventId, - pub RERAISE: EventId, - //pub STOP_ITERATION: EventId, //See comment in Tracer trait -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct ToolId { - pub id: u8, -} - -pub type CallbackFn<'py> = Bound<'py, PyCFunction>; - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct EventSet(pub i32); - -pub const NO_EVENTS: EventSet = EventSet(0); - -/// Outcome returned by tracer callbacks to control CPython monitoring. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum CallbackOutcome { - /// Continue receiving events for the current location. - Continue, - /// Disable future events for the current location by returning - /// `sys.monitoring.DISABLE`. - DisableLocation, -} - -/// Result type shared by tracer callbacks. -pub type CallbackResult = PyResult; - -impl EventSet { - pub const fn empty() -> Self { - NO_EVENTS - } - pub fn contains(&self, ev: &EventId) -> bool { - (self.0 & ev.0) != 0 - } -} - -pub fn acquire_tool_id(py: Python<'_>) -> PyResult { - let monitoring = py.import("sys")?.getattr("monitoring")?; - const FALLBACK_ID: u8 = 5; - monitoring.call_method1("use_tool_id", (FALLBACK_ID, MONITORING_TOOL_NAME))?; - Ok(ToolId { id: FALLBACK_ID }) -} - -pub fn load_monitoring_events(py: Python<'_>) -> PyResult { - let monitoring = py.import("sys")?.getattr("monitoring")?; - let events = monitoring.getattr("events")?; - Ok(MonitoringEvents { - BRANCH: EventId(events.getattr("BRANCH")?.extract()?), - CALL: EventId(events.getattr("CALL")?.extract()?), - C_RAISE: EventId(events.getattr("C_RAISE")?.extract()?), - C_RETURN: EventId(events.getattr("C_RETURN")?.extract()?), - EXCEPTION_HANDLED: EventId(events.getattr("EXCEPTION_HANDLED")?.extract()?), - INSTRUCTION: EventId(events.getattr("INSTRUCTION")?.extract()?), - JUMP: EventId(events.getattr("JUMP")?.extract()?), - LINE: EventId(events.getattr("LINE")?.extract()?), - PY_RESUME: EventId(events.getattr("PY_RESUME")?.extract()?), - PY_RETURN: EventId(events.getattr("PY_RETURN")?.extract()?), - PY_START: EventId(events.getattr("PY_START")?.extract()?), - PY_THROW: EventId(events.getattr("PY_THROW")?.extract()?), - PY_UNWIND: EventId(events.getattr("PY_UNWIND")?.extract()?), - PY_YIELD: EventId(events.getattr("PY_YIELD")?.extract()?), - RAISE: EventId(events.getattr("RAISE")?.extract()?), - RERAISE: EventId(events.getattr("RERAISE")?.extract()?), - //STOP_ITERATION: EventId(events.getattr("STOP_ITERATION")?.extract()?), //See comment in Tracer trait - }) -} -static MONITORING_EVENTS: OnceLock = OnceLock::new(); - -pub fn monitoring_events(py: Python<'_>) -> PyResult<&'static MonitoringEvents> { - if let Some(ev) = MONITORING_EVENTS.get() { - return Ok(ev); - } - let ev = load_monitoring_events(py)?; - let _ = MONITORING_EVENTS.set(ev); - Ok(MONITORING_EVENTS.get().unwrap()) -} - -pub fn register_callback( - py: Python<'_>, - tool: &ToolId, - event: &EventId, - cb: Option<&CallbackFn<'_>>, -) -> PyResult<()> { - let monitoring = py.import("sys")?.getattr("monitoring")?; - match cb { - Some(cb) => { - monitoring.call_method("register_callback", (tool.id, event.0, cb), None)?; - } - None => { - monitoring.call_method("register_callback", (tool.id, event.0, py.None()), None)?; - } - } - Ok(()) -} - -pub fn events_union(ids: &[EventId]) -> EventSet { - let mut bits = 0i32; - for id in ids { - bits |= id.0; - } - EventSet(bits) -} - -pub fn set_events(py: Python<'_>, tool: &ToolId, set: EventSet) -> PyResult<()> { - let monitoring = py.import("sys")?.getattr("monitoring")?; - monitoring.call_method1("set_events", (tool.id, set.0))?; - Ok(()) -} - -pub fn free_tool_id(py: Python<'_>, tool: &ToolId) -> PyResult<()> { - let monitoring = py.import("sys")?.getattr("monitoring")?; - monitoring.call_method1("free_tool_id", (tool.id,))?; - Ok(()) -} +use super::{ + acquire_tool_id, free_tool_id, monitoring_events, register_callback, set_events, + CallbackOutcome, CallbackResult, EventSet, MonitoringEvents, ToolId, NO_EVENTS, +}; /// Trait implemented by tracing backends. /// @@ -545,7 +411,6 @@ pub fn uninstall_tracer(py: Python<'_>) -> PyResult<()> { if global.mask.contains(&events.EXCEPTION_HANDLED) { register_callback(py, &global.tool, &events.EXCEPTION_HANDLED, None)?; } - // See comment in tracer trait // if global.mask.contains(&events.STOP_ITERATION) { // register_callback(py, &global.tool, &events.STOP_ITERATION, None)?; // } @@ -556,7 +421,6 @@ pub fn uninstall_tracer(py: Python<'_>) -> PyResult<()> { register_callback(py, &global.tool, &events.C_RAISE, None)?; } - global.registry.clear(); set_events(py, &global.tool, NO_EVENTS)?; free_tool_id(py, &global.tool)?; } @@ -658,14 +522,7 @@ fn callback_py_start( if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let wrapper = global.registry.get_or_insert(py, &code); let result = global.tracer.on_py_start(py, &wrapper, instruction_offset); - return match result { - Ok(outcome) => global.handle_callback(py, Ok(outcome)), - Err(err) => { - let _ = set_events(py, &global.tool, NO_EVENTS); - log::error!("Event monitoring turned off due to exception. No new events will be recorded! {}", err); - Err(err) - } - }; + return global.handle_callback(py, result); } Ok(py.None()) } diff --git a/codetracer-python-recorder/src/runtime/activation.rs b/codetracer-python-recorder/src/runtime/activation.rs new file mode 100644 index 0000000..43ac88e --- /dev/null +++ b/codetracer-python-recorder/src/runtime/activation.rs @@ -0,0 +1,93 @@ +//! Activation gating for the runtime tracer. + +use std::path::{Path, PathBuf}; + +use pyo3::Python; + +use crate::code_object::CodeObjectWrapper; + +/// Tracks activation gating for the runtime tracer. When configured with an +/// activation path, tracing remains paused until code from that file starts +/// executing. Once the activation window completes, tracing is disabled for the +/// remainder of the session. +#[derive(Debug)] +pub struct ActivationController { + activation_path: Option, + activation_code_id: Option, + activation_done: bool, + started: bool, +} + +impl ActivationController { + pub fn new(activation_path: Option<&Path>) -> Self { + let activation_path = activation_path + .map(|p| std::path::absolute(p).expect("activation_path should resolve")); + let started = activation_path.is_none(); + Self { + activation_path, + activation_code_id: None, + activation_done: false, + started, + } + } + + pub fn is_active(&self) -> bool { + self.started + } + + /// Ensure activation state reflects the current event and report whether + /// tracing should continue processing it. + pub fn should_process_event(&mut self, py: Python<'_>, code: &CodeObjectWrapper) -> bool { + self.ensure_started(py, code); + self.is_active() + } + + /// Return the canonical start path for writer initialisation. + pub fn start_path<'a>(&'a self, fallback: &'a Path) -> &'a Path { + self.activation_path.as_deref().unwrap_or(fallback) + } + + /// Attempt to transition into the active state. When the code object + /// corresponds to the activation path, tracing becomes active and remembers + /// the triggering code id so it can stop on return. + pub fn ensure_started(&mut self, py: Python<'_>, code: &CodeObjectWrapper) { + if self.started || self.activation_done { + return; + } + if let Some(activation) = &self.activation_path { + if let Ok(filename) = code.filename(py) { + let file = Path::new(filename); + // `CodeObjectWrapper::filename` is expected to return an absolute + // path. If this assumption turns out to be wrong we will revisit + // the comparison logic. Canonicalisation is deliberately avoided + // here to limit syscalls on hot paths. + if file == activation { + self.started = true; + self.activation_code_id = Some(code.id()); + log::debug!( + "[RuntimeTracer] activated on enter: {}", + activation.display() + ); + } + } + } + } + + /// Handle return events and turn off tracing when the activation function + /// exits. Returns `true` when tracing was deactivated by this call. + pub fn handle_return_event(&mut self, code_id: usize) -> bool { + if self.activation_code_id == Some(code_id) { + self.started = false; + self.activation_done = true; + return true; + } + false + } +} + +impl ActivationController { + #[allow(dead_code)] + pub fn handle_return(&mut self, code_id: usize) -> bool { + self.handle_return_event(code_id) + } +} diff --git a/codetracer-python-recorder/src/runtime/frame_inspector.rs b/codetracer-python-recorder/src/runtime/frame_inspector.rs new file mode 100644 index 0000000..b359b92 --- /dev/null +++ b/codetracer-python-recorder/src/runtime/frame_inspector.rs @@ -0,0 +1,158 @@ +//! Safe helpers around CPython frame inspection for tracing callbacks. + +use std::ptr; + +use pyo3::exceptions::PyRuntimeError; +use pyo3::prelude::*; +use pyo3::types::{PyAny, PyDict, PyMapping}; +use pyo3::{ffi, Py, PyErr}; + +use crate::code_object::CodeObjectWrapper; + +extern "C" { + fn PyFrame_GetLocals(frame: *mut ffi::PyFrameObject) -> *mut ffi::PyObject; + fn PyFrame_GetGlobals(frame: *mut ffi::PyFrameObject) -> *mut ffi::PyObject; +} + +/// Snapshot of the current frame including materialised locals and globals. +#[derive(Debug)] +pub struct FrameSnapshot<'py> { + frame_ptr: *mut ffi::PyFrameObject, + locals: Bound<'py, PyDict>, + globals: Option>, + locals_is_globals: bool, +} + +impl<'py> FrameSnapshot<'py> { + /// Borrow the snapshot of locals for iteration. + pub fn locals(&self) -> &Bound<'py, PyDict> { + &self.locals + } + + /// Borrow the snapshot of globals when distinct from locals. + pub fn globals(&self) -> Option<&Bound<'py, PyDict>> { + self.globals.as_ref() + } + + /// Return true when the original frame referenced the same dict for + /// locals and globals. + pub fn locals_is_globals(&self) -> bool { + self.locals_is_globals + } +} + +impl<'py> Drop for FrameSnapshot<'py> { + fn drop(&mut self) { + if !self.frame_ptr.is_null() { + unsafe { + ffi::Py_DECREF(self.frame_ptr.cast()); + } + self.frame_ptr = ptr::null_mut(); + } + } +} + +/// Capture the frame for *code* and materialise its locals/globals mappings. +/// +/// Returns a RAII snapshot ensuring reference counts are decremented when the +/// snapshot leaves scope. +pub fn capture_frame<'py>( + py: Python<'py>, + code: &CodeObjectWrapper, +) -> PyResult> { + let mut frame_ptr = unsafe { ffi::PyEval_GetFrame() }; + if frame_ptr.is_null() { + return Err(PyRuntimeError::new_err( + "PyEval_GetFrame returned null frame", + )); + } + + unsafe { + ffi::Py_XINCREF(frame_ptr.cast()); + } + + let target_code_ptr = code.as_bound(py).as_ptr(); + + loop { + if frame_ptr.is_null() { + break; + } + let frame_code_ptr = unsafe { ffi::PyFrame_GetCode(frame_ptr) }; + if frame_code_ptr.is_null() { + unsafe { + ffi::Py_DECREF(frame_ptr.cast()); + } + return Err(PyRuntimeError::new_err("PyFrame_GetCode returned null")); + } + let frame_code: Py = unsafe { Py::from_owned_ptr(py, frame_code_ptr.cast()) }; + if frame_code.as_ptr() == target_code_ptr { + break; + } + let back = unsafe { ffi::PyFrame_GetBack(frame_ptr) }; + unsafe { + ffi::Py_DECREF(frame_ptr.cast()); + } + frame_ptr = back; + } + + if frame_ptr.is_null() { + return Err(PyRuntimeError::new_err( + "Failed to locate frame for code object", + )); + } + + unsafe { + if ffi::PyFrame_FastToLocalsWithError(frame_ptr) < 0 { + ffi::Py_DECREF(frame_ptr.cast()); + let err = PyErr::fetch(py); + return Err(err); + } + } + + let locals_raw = unsafe { PyFrame_GetLocals(frame_ptr) }; + if locals_raw.is_null() { + unsafe { + ffi::Py_DECREF(frame_ptr.cast()); + } + return Err(PyRuntimeError::new_err("PyFrame_GetLocals returned null")); + } + let locals_any = unsafe { Bound::::from_owned_ptr(py, locals_raw.cast()) }; + let locals_mapping = locals_any + .downcast::() + .map_err(|_| PyRuntimeError::new_err("Frame locals was not a mapping"))?; + + let globals_raw = unsafe { PyFrame_GetGlobals(frame_ptr) }; + if globals_raw.is_null() { + unsafe { + ffi::Py_DECREF(frame_ptr.cast()); + } + return Err(PyRuntimeError::new_err("PyFrame_GetGlobals returned null")); + } + let globals_any = unsafe { Bound::::from_owned_ptr(py, globals_raw.cast()) }; + let globals_mapping = globals_any + .downcast::() + .map_err(|_| PyRuntimeError::new_err("Frame globals was not a mapping"))?; + + let locals_is_globals = locals_raw == globals_raw; + + let locals_dict = PyDict::new(py); + locals_dict + .update(&locals_mapping) + .expect("Failed to materialize locals dict"); + + let globals_dict = if locals_is_globals { + None + } else { + let dict = PyDict::new(py); + dict.update(&globals_mapping) + .expect("Failed to materialize globals dict"); + Some(dict) + }; + + Ok(FrameSnapshot { + frame_ptr, + locals: locals_dict, + globals: globals_dict, + locals_is_globals, + }) +} diff --git a/codetracer-python-recorder/src/runtime/logging.rs b/codetracer-python-recorder/src/runtime/logging.rs new file mode 100644 index 0000000..6fea9df --- /dev/null +++ b/codetracer-python-recorder/src/runtime/logging.rs @@ -0,0 +1,29 @@ +//! Logging helpers for runtime tracer callbacks. + +use pyo3::Python; + +use crate::code_object::CodeObjectWrapper; + +/// Emit a debug log entry for tracer callbacks, enriching the message with +/// filename, qualified name, and optional line information when available. +pub fn log_event(py: Python<'_>, code: &CodeObjectWrapper, event: &str, lineno: Option) { + if let Some(line) = lineno { + match code.filename(py) { + Ok(filename) => log::debug!("[RuntimeTracer] {event}: {filename}:{line}"), + Err(_) => log::debug!("[RuntimeTracer] {event}: :{line}"), + } + return; + } + + match (code.filename(py), code.qualname(py)) { + (Ok(filename), Ok(qualname)) => { + log::debug!("[RuntimeTracer] {event}: {qualname} ({filename})"); + } + (Ok(filename), Err(_)) => { + log::debug!("[RuntimeTracer] {event}: ({filename})"); + } + _ => { + log::debug!("[RuntimeTracer] {event}"); + } + } +} diff --git a/codetracer-python-recorder/src/runtime_tracer.rs b/codetracer-python-recorder/src/runtime/mod.rs similarity index 59% rename from codetracer-python-recorder/src/runtime_tracer.rs rename to codetracer-python-recorder/src/runtime/mod.rs index bceefa4..ccf0e60 100644 --- a/codetracer-python-recorder/src/runtime_tracer.rs +++ b/codetracer-python-recorder/src/runtime/mod.rs @@ -1,25 +1,33 @@ -use std::collections::HashSet; +//! Runtime tracer facade translating sys.monitoring callbacks into `runtime_tracing` records. + +mod activation; +mod frame_inspector; +mod logging; +mod output_paths; +mod value_capture; +mod value_encoder; + +pub use output_paths::TraceOutputPaths; + +use activation::ActivationController; +use frame_inspector::capture_frame; +use logging::log_event; +use value_capture::{capture_call_arguments, record_return_value, record_visible_scope}; + +use std::collections::{hash_map::Entry, HashMap, HashSet}; use std::path::{Path, PathBuf}; use pyo3::prelude::*; -use pyo3::types::{PyAny, PyDict, PyList, PyMapping, PyTuple}; -use pyo3::{ffi, PyErr}; +use pyo3::types::PyAny; use runtime_tracing::NonStreamingTraceWriter; -use runtime_tracing::{ - Line, TraceEventsFileFormat, TraceWriter, TypeKind, ValueRecord, NONE_VALUE, -}; +use runtime_tracing::{Line, TraceEventsFileFormat, TraceWriter}; use crate::code_object::CodeObjectWrapper; -use crate::tracer::{ +use crate::monitoring::{ events_union, CallbackOutcome, CallbackResult, EventSet, MonitoringEvents, Tracer, }; -extern "C" { - fn PyFrame_GetLocals(frame: *mut ffi::PyFrameObject) -> *mut ffi::PyObject; - fn PyFrame_GetGlobals(frame: *mut ffi::PyFrameObject) -> *mut ffi::PyObject; -} - // Logging is handled via the `log` crate macros (e.g., log::debug!). /// Minimal runtime tracer that maps Python sys.monitoring events to @@ -27,16 +35,10 @@ extern "C" { pub struct RuntimeTracer { writer: NonStreamingTraceWriter, format: TraceEventsFileFormat, - // Activation control: when set, events are ignored until we see - // a code object whose filename matches this path. Once triggered, - // tracing becomes active for the remainder of the session. - activation_path: Option, - // Code object id that triggered activation, used to stop on return - activation_code_id: Option, - // Whether we've already completed a one-shot activation window - activation_done: bool, - started: bool, + activation: ActivationController, + program_path: PathBuf, ignored_code_ids: HashSet, + function_ids: HashMap, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -59,176 +61,25 @@ impl RuntimeTracer { ) -> Self { let mut writer = NonStreamingTraceWriter::new(program, args); writer.set_format(format); - let activation_path = activation_path.map(|p| std::path::absolute(p).unwrap()); - // If activation path is specified, start in paused mode; otherwise start immediately. - let started = activation_path.is_none(); + let activation = ActivationController::new(activation_path); + let program_path = PathBuf::from(program); Self { writer, format, - activation_path, - activation_code_id: None, - activation_done: false, - started, + activation, + program_path, ignored_code_ids: HashSet::new(), + function_ids: HashMap::new(), } } /// Configure output files and write initial metadata records. - pub fn begin( - &mut self, - meta_path: &Path, - paths_path: &Path, - events_path: &Path, - start_path: &Path, - start_line: u32, - ) -> PyResult<()> { - TraceWriter::begin_writing_trace_metadata(&mut self.writer, meta_path) - .map_err(to_py_err)?; - TraceWriter::begin_writing_trace_paths(&mut self.writer, paths_path).map_err(to_py_err)?; - TraceWriter::begin_writing_trace_events(&mut self.writer, events_path) - .map_err(to_py_err)?; - TraceWriter::start(&mut self.writer, start_path, Line(start_line as i64)); - Ok(()) - } - - /// Return true when tracing is active; may become true on first event - /// from the activation file if configured. - fn ensure_started<'py>(&mut self, py: Python<'py>, code: &CodeObjectWrapper) { - if self.started || self.activation_done { - return; - } - if let Some(activation) = &self.activation_path { - if let Ok(filename) = code.filename(py) { - let f = Path::new(filename); - //NOTE(Tzanko): We expect that code.filename contains an absolute path. If it turns out that this is sometimes not the case - //we will investigate. For we won't do additional conversions here. - // If there are issues the fool-proof solution is to use fs::canonicalize which needs to do syscalls - if f == activation { - self.started = true; - self.activation_code_id = Some(code.id()); - log::debug!( - "[RuntimeTracer] activated on enter: {}", - activation.display() - ); - } - } - } - } - - /// Encode a Python value into a `ValueRecord` used by the trace writer. - /// - /// Canonical rules: - /// - `None` -> `NONE_VALUE` - /// - `bool` -> `Bool` - /// - `int` -> `Int` - /// - `str` -> `String` (canonical for text; do not fall back to Raw) - /// - common containers: - /// - Python `tuple` -> `Tuple` with encoded elements - /// - Python `list` -> `Sequence` with encoded elements (not a slice) - /// - any other type -> textual `Raw` via `__str__` best-effort - fn encode_value<'py>(&mut self, _py: Python<'py>, v: &Bound<'py, PyAny>) -> ValueRecord { - // None - if v.is_none() { - return NONE_VALUE; - } - // bool must be checked before int in Python - if let Ok(b) = v.extract::() { - let ty = TraceWriter::ensure_type_id(&mut self.writer, TypeKind::Bool, "Bool"); - return ValueRecord::Bool { b, type_id: ty }; - } - if let Ok(i) = v.extract::() { - let ty = TraceWriter::ensure_type_id(&mut self.writer, TypeKind::Int, "Int"); - return ValueRecord::Int { i, type_id: ty }; - } - // Strings are encoded canonically as `String` to ensure stable tests - // and downstream processing. Falling back to `Raw` for `str` is - // not allowed. - if let Ok(s) = v.extract::() { - let ty = TraceWriter::ensure_type_id(&mut self.writer, TypeKind::String, "String"); - return ValueRecord::String { - text: s, - type_id: ty, - }; - } - - // Python tuple -> ValueRecord::Tuple with recursively-encoded elements - if let Ok(t) = v.downcast::() { - let mut elements: Vec = Vec::with_capacity(t.len()); - for item in t.iter() { - // item: Bound - elements.push(self.encode_value(_py, &item)); - } - let ty = TraceWriter::ensure_type_id(&mut self.writer, TypeKind::Tuple, "Tuple"); - return ValueRecord::Tuple { - elements, - type_id: ty, - }; - } - - // Python list -> ValueRecord::Sequence with recursively-encoded elements - if let Ok(l) = v.downcast::() { - let mut elements: Vec = Vec::with_capacity(l.len()); - for item in l.iter() { - elements.push(self.encode_value(_py, &item)); - } - let ty = TraceWriter::ensure_type_id(&mut self.writer, TypeKind::Seq, "List"); - return ValueRecord::Sequence { - elements, - is_slice: false, - type_id: ty, - }; - } - - // Python dict -> represent as a Sequence of (key, value) Tuples. - // Keys are expected to be strings for kwargs; for non-str keys we - // fall back to best-effort encoding of the key. - if let Ok(d) = v.downcast::() { - let seq_ty = TraceWriter::ensure_type_id(&mut self.writer, TypeKind::Seq, "Dict"); - let tuple_ty = TraceWriter::ensure_type_id(&mut self.writer, TypeKind::Tuple, "Tuple"); - let str_ty = TraceWriter::ensure_type_id(&mut self.writer, TypeKind::String, "String"); - let mut elements: Vec = Vec::with_capacity(d.len()); - let items = d.items(); - for pair in items.iter() { - if let Ok(t) = pair.downcast::() { - if t.len() == 2 { - let key_obj = t.get_item(0).unwrap(); - let val_obj = t.get_item(1).unwrap(); - let key_rec = if let Ok(s) = key_obj.extract::() { - ValueRecord::String { - text: s, - type_id: str_ty, - } - } else { - self.encode_value(_py, &key_obj) - }; - let val_rec = self.encode_value(_py, &val_obj); - let pair_rec = ValueRecord::Tuple { - elements: vec![key_rec, val_rec], - type_id: tuple_ty, - }; - elements.push(pair_rec); - } - } - } - return ValueRecord::Sequence { - elements, - is_slice: false, - type_id: seq_ty, - }; - } - - // Fallback to Raw string representation - let ty = TraceWriter::ensure_type_id(&mut self.writer, TypeKind::Raw, "Object"); - match v.str() { - Ok(s) => ValueRecord::Raw { - r: s.to_string_lossy().into_owned(), - type_id: ty, - }, - Err(_) => ValueRecord::Error { - msg: "".to_string(), - type_id: ty, - }, - } + pub fn begin(&mut self, outputs: &TraceOutputPaths, start_line: u32) -> PyResult<()> { + let start_path = self.activation.start_path(&self.program_path); + log::debug!("{}", start_path.display()); + outputs + .configure_writer(&mut self.writer, start_path, start_line) + .map_err(to_py_err) } fn ensure_function_id( @@ -236,17 +87,21 @@ impl RuntimeTracer { py: Python<'_>, code: &CodeObjectWrapper, ) -> PyResult { - //TODO AI! current runtime_tracer logic expects that `name` is unique and is used as a key for the function. - //This is wrong. We need to write a test that exposes this issue - let name = code.qualname(py)?; - let filename = code.filename(py)?; - let first_line = code.first_line(py)?; - Ok(TraceWriter::ensure_function_id( - &mut self.writer, - name, - Path::new(filename), - Line(first_line as i64), - )) + match self.function_ids.entry(code.id()) { + Entry::Occupied(entry) => Ok(*entry.get()), + Entry::Vacant(slot) => { + let name = code.qualname(py)?; + let filename = code.filename(py)?; + let first_line = code.first_line(py)?; + let function_id = TraceWriter::ensure_function_id( + &mut self.writer, + name, + Path::new(filename), + Line(first_line as i64), + ); + Ok(*slot.insert(function_id)) + } + } } fn should_trace_code(&mut self, py: Python<'_>, code: &CodeObjectWrapper) -> ShouldTrace { @@ -282,251 +137,55 @@ impl Tracer for RuntimeTracer { code: &CodeObjectWrapper, _offset: i32, ) -> CallbackResult { - self.ensure_started(py, code); + let is_active = self.activation.should_process_event(py, code); if matches!( self.should_trace_code(py, code), ShouldTrace::SkipAndDisable ) { return Ok(CallbackOutcome::DisableLocation); } - if !self.started { + if !is_active { return Ok(CallbackOutcome::Continue); } - match (code.filename(py), code.qualname(py)) { - (Ok(fname), Ok(qname)) => { - log::debug!("[RuntimeTracer] on_py_start: {} ({})", qname, fname) - } - _ => log::debug!("[RuntimeTracer] on_py_start"), - } + log_event(py, code, "on_py_start", None); if let Ok(fid) = self.ensure_function_id(py, code) { - let mut args: Vec = Vec::new(); - let frame_and_args = (|| -> PyResult<()> { - let frame_ptr = unsafe { ffi::PyEval_GetFrame() }; - if frame_ptr.is_null() { - return Err(pyo3::exceptions::PyRuntimeError::new_err( - "on_py_start: null frame", - )); - } - unsafe { - ffi::Py_XINCREF(frame_ptr.cast()); - } - - unsafe { - if ffi::PyFrame_FastToLocalsWithError(frame_ptr) < 0 { - ffi::Py_DECREF(frame_ptr.cast()); - let err = PyErr::fetch(py); - return Err(err); - } - } - - let locals_raw = unsafe { PyFrame_GetLocals(frame_ptr) }; - if locals_raw.is_null() { - unsafe { - ffi::Py_DECREF(frame_ptr.cast()); - } - return Err(pyo3::exceptions::PyRuntimeError::new_err( - "on_py_start: PyFrame_GetLocals returned null", - )); + match capture_call_arguments(py, &mut self.writer, code) { + Ok(args) => TraceWriter::register_call(&mut self.writer, fid, args), + Err(err) => { + let message = format!("on_py_start: failed to capture args: {}", err); + log::error!("{message}"); + return Err(pyo3::exceptions::PyRuntimeError::new_err(message)); } - let locals = unsafe { Bound::::from_owned_ptr(py, locals_raw) }; - - let argcount = code.arg_count(py)? as usize; - let _posonly: usize = code.as_bound(py).getattr("co_posonlyargcount")?.extract()?; - let kwonly: usize = code.as_bound(py).getattr("co_kwonlyargcount")?.extract()?; - let flags = code.flags(py)?; - const CO_VARARGS: u32 = 0x04; - const CO_VARKEYWORDS: u32 = 0x08; - - let varnames_obj = code.as_bound(py).getattr("co_varnames")?; - let varnames: Vec = varnames_obj.extract()?; - - let mut idx = 0usize; - let take_n = std::cmp::min(argcount, varnames.len()); - for name in varnames.iter().take(take_n) { - match locals.get_item(name) { - Ok(val) => { - let vrec = self.encode_value(py, &val); - args.push(TraceWriter::arg(&mut self.writer, name, vrec)); - } - Err(e) => { - panic!("Error {:?}", e) - } - } - idx += 1; - } - - if (flags & CO_VARARGS) != 0 && idx < varnames.len() { - let name = &varnames[idx]; - if let Ok(val) = locals.get_item(name) { - let vrec = self.encode_value(py, &val); - args.push(TraceWriter::arg(&mut self.writer, name, vrec)); - } - idx += 1; - } - - let kwonly_take = std::cmp::min(kwonly, varnames.len().saturating_sub(idx)); - for name in varnames.iter().skip(idx).take(kwonly_take) { - match locals.get_item(name) { - Ok(val) => { - let vrec = self.encode_value(py, &val); - args.push(TraceWriter::arg(&mut self.writer, name, vrec)); - } - Err(e) => { - panic!("Error {:?}", e) - } - } - } - idx = idx.saturating_add(kwonly_take); - - if (flags & CO_VARKEYWORDS) != 0 && idx < varnames.len() { - let name = &varnames[idx]; - if let Ok(val) = locals.get_item(name) { - let vrec = self.encode_value(py, &val); - args.push(TraceWriter::arg(&mut self.writer, name, vrec)); - } - } - unsafe { - ffi::Py_DECREF(frame_ptr.cast()); - } - Ok(()) - })(); - - if let Err(e) = frame_and_args { - let message = format!("on_py_start: failed to capture args: {}", e); - log::error!("{message}"); - return Err(pyo3::exceptions::PyRuntimeError::new_err(message)); } - - TraceWriter::register_call(&mut self.writer, fid, args); } Ok(CallbackOutcome::Continue) } fn on_line(&mut self, py: Python<'_>, code: &CodeObjectWrapper, lineno: u32) -> CallbackResult { - self.ensure_started(py, code); + let is_active = self.activation.should_process_event(py, code); if matches!( self.should_trace_code(py, code), ShouldTrace::SkipAndDisable ) { return Ok(CallbackOutcome::DisableLocation); } - if !self.started { + if !is_active { return Ok(CallbackOutcome::Continue); } - if let Ok(fname) = code.filename(py) { - log::debug!("[RuntimeTracer] on_line: {}:{}", fname, lineno); - } else { - log::debug!("[RuntimeTracer] on_line: :{}", lineno); - } - - let mut frame_ptr = unsafe { ffi::PyEval_GetFrame() }; - if frame_ptr.is_null() { - panic!("PyEval_GetFrame returned null frame"); - } - unsafe { - ffi::Py_XINCREF(frame_ptr.cast()); - } - let target_code_ptr = code.as_bound(py).as_ptr(); - loop { - if frame_ptr.is_null() { - break; - } - let frame_code_ptr = unsafe { ffi::PyFrame_GetCode(frame_ptr) }; - if frame_code_ptr.is_null() { - unsafe { - ffi::Py_DECREF(frame_ptr.cast()); - } - panic!("PyFrame_GetCode returned null"); - } - let frame_code: Py = - unsafe { Py::from_owned_ptr(py, frame_code_ptr as *mut ffi::PyObject) }; - if frame_code.as_ptr() == target_code_ptr { - break; - } - let back = unsafe { ffi::PyFrame_GetBack(frame_ptr) }; - unsafe { - ffi::Py_DECREF(frame_ptr.cast()); - } - frame_ptr = back; - } - if frame_ptr.is_null() { - panic!("Failed to locate frame for code object {}", code.id()); - } - - // Synchronise fast locals so PyFrame_GetLocals sees current values. - unsafe { - if ffi::PyFrame_FastToLocalsWithError(frame_ptr) < 0 { - ffi::Py_DECREF(frame_ptr.cast()); - let err = PyErr::fetch(py); - panic!("Failed to sync frame locals: {err}"); - } - } + log_event(py, code, "on_line", Some(lineno)); if let Ok(filename) = code.filename(py) { TraceWriter::register_step(&mut self.writer, Path::new(filename), Line(lineno as i64)); } - // Obtain concrete dict objects for iteration. - let locals_raw = unsafe { PyFrame_GetLocals(frame_ptr) }; - if locals_raw.is_null() { - unsafe { - ffi::Py_DECREF(frame_ptr.cast()); - } - panic!("PyFrame_GetLocals returned null"); - } - let globals_raw = unsafe { PyFrame_GetGlobals(frame_ptr) }; - if globals_raw.is_null() { - unsafe { - ffi::Py_DECREF(frame_ptr.cast()); - } - panic!("PyFrame_GetGlobals returned null"); - } - let locals_is_globals = locals_raw == globals_raw; - let locals_any = unsafe { Bound::::from_owned_ptr(py, locals_raw) }; - let globals_any = unsafe { Bound::::from_owned_ptr(py, globals_raw) }; - let locals_mapping = locals_any - .downcast::() - .expect("Frame locals was not a mapping"); - let globals_mapping = globals_any - .downcast::() - .expect("Frame globals was not a mapping"); - let locals_dict = PyDict::new(py); - locals_dict - .update(&locals_mapping) - .expect("Failed to materialize locals dict"); - let globals_dict = PyDict::new(py); - globals_dict - .update(&globals_mapping) - .expect("Failed to materialize globals dict"); + let snapshot = capture_frame(py, code)?; let mut recorded: HashSet = HashSet::new(); - - for (key, value) in locals_dict.iter() { - let name: String = key.extract().expect("Local name was not a string"); - let encoded = self.encode_value(py, &value); - TraceWriter::register_variable_with_full_value(&mut self.writer, &name, encoded); - recorded.insert(name); - } - - if !locals_is_globals { - for (key, value) in globals_dict.iter() { - let name: String = key.extract().expect("Global name was not a string"); - if name == "__builtins__" || recorded.contains(&name) { - continue; - } - let encoded = self.encode_value(py, &value); - TraceWriter::register_variable_with_full_value(&mut self.writer, &name, encoded); - recorded.insert(name); - } - } - - unsafe { - ffi::Py_DECREF(frame_ptr.cast()); - } + record_visible_scope(py, &mut self.writer, &snapshot, &mut recorded); Ok(CallbackOutcome::Continue) } @@ -538,34 +197,21 @@ impl Tracer for RuntimeTracer { _offset: i32, retval: &Bound<'_, PyAny>, ) -> CallbackResult { - self.ensure_started(py, code); + let is_active = self.activation.should_process_event(py, code); if matches!( self.should_trace_code(py, code), ShouldTrace::SkipAndDisable ) { return Ok(CallbackOutcome::DisableLocation); } - if !self.started { + if !is_active { return Ok(CallbackOutcome::Continue); } - match (code.filename(py), code.qualname(py)) { - (Ok(fname), Ok(qname)) => { - log::debug!("[RuntimeTracer] on_py_return: {} ({})", qname, fname) - } - _ => log::debug!("[RuntimeTracer] on_py_return"), - } + log_event(py, code, "on_py_return", None); - let is_activation_return = self - .activation_code_id - .map(|id| id == code.id()) - .unwrap_or(false); - - let val = self.encode_value(py, retval); - TraceWriter::register_return(&mut self.writer, val); - if is_activation_return { - self.started = false; - self.activation_done = true; + record_return_value(py, &mut self.writer, retval); + if self.activation.handle_return_event(code.id()) { log::debug!("[RuntimeTracer] deactivated on activation return"); } @@ -595,6 +241,7 @@ impl Tracer for RuntimeTracer { TraceWriter::finish_writing_trace_paths(&mut self.writer).map_err(to_py_err)?; TraceWriter::finish_writing_trace_events(&mut self.writer).map_err(to_py_err)?; self.ignored_code_ids.clear(); + self.function_ids.clear(); Ok(()) } } @@ -602,8 +249,8 @@ impl Tracer for RuntimeTracer { #[cfg(test)] mod tests { use super::*; - use crate::tracer::CallbackOutcome; - use pyo3::types::{PyCode, PyModule}; + use crate::monitoring::CallbackOutcome; + use pyo3::types::{PyAny, PyCode, PyModule}; use pyo3::wrap_pyfunction; use runtime_tracing::{FullValueRecord, TraceLowLevelEvent, ValueRecord}; use std::cell::Cell; @@ -716,6 +363,61 @@ finally: assert_eq!(last_outcome(), Some(CallbackOutcome::Continue)); } + #[test] + fn records_return_values_and_deactivates_activation() { + Python::with_gil(|py| { + ensure_test_module(py); + let tmp = tempfile::tempdir().expect("create temp dir"); + let script_path = tmp.path().join("activation_script.py"); + let script = format!( + "{PRELUDE}\n\n\ +def compute():\n emit_return(\"tail\")\n return \"tail\"\n\n\ +result = compute()\n" + ); + std::fs::write(&script_path, &script).expect("write script"); + + let program = script_path.to_string_lossy().into_owned(); + let mut tracer = RuntimeTracer::new( + &program, + &[], + TraceEventsFileFormat::Json, + Some(script_path.as_path()), + ); + + { + let _guard = ScopedTracer::new(&mut tracer); + LAST_OUTCOME.with(|cell| cell.set(None)); + let run_code = format!( + "import runpy\nrunpy.run_path(r\"{}\")", + script_path.display() + ); + let run_code_c = CString::new(run_code).expect("script contains nul byte"); + py.run(run_code_c.as_c_str(), None, None) + .expect("execute test script"); + } + + let returns: Vec = tracer + .writer + .events + .iter() + .filter_map(|event| match event { + TraceLowLevelEvent::Return(record) => { + Some(SimpleValue::from_value(&record.return_value)) + } + _ => None, + }) + .collect(); + + assert!( + returns.contains(&SimpleValue::String("tail".to_string())), + "expected recorded string return, got {:?}", + returns + ); + assert_eq!(last_outcome(), Some(CallbackOutcome::Continue)); + assert!(!tracer.activation.is_active()); + }); + } + #[pyfunction] fn capture_line(py: Python<'_>, code: Bound<'_, PyCode>, lineno: u32) -> PyResult<()> { ACTIVE_TRACER.with(|cell| -> PyResult<()> { @@ -738,9 +440,35 @@ finally: Ok(()) } + #[pyfunction] + fn capture_return_event( + py: Python<'_>, + code: Bound<'_, PyCode>, + value: Bound<'_, PyAny>, + ) -> PyResult<()> { + ACTIVE_TRACER.with(|cell| -> PyResult<()> { + let ptr = cell.get(); + if ptr.is_null() { + panic!("No active RuntimeTracer for capture_return_event"); + } + unsafe { + let tracer = &mut *ptr; + let wrapper = CodeObjectWrapper::new(py, &code); + match tracer.on_py_return(py, &wrapper, 0, &value) { + Ok(outcome) => { + LAST_OUTCOME.with(|cell| cell.set(Some(outcome))); + Ok(()) + } + Err(err) => Err(err), + } + } + })?; + Ok(()) + } + const PRELUDE: &str = r#" import inspect -from test_tracer import capture_line +from test_tracer import capture_line, capture_return_event def snapshot(line=None): frame = inspect.currentframe().f_back @@ -751,6 +479,11 @@ def snap(value): frame = inspect.currentframe().f_back capture_line(frame.f_code, frame.f_lineno) return value + +def emit_return(value): + frame = inspect.currentframe().f_back + capture_return_event(frame.f_code, value) + return value "#; #[derive(Debug, Clone, PartialEq)] @@ -830,6 +563,11 @@ def snap(value): module .add_function(wrap_pyfunction!(capture_line, &module).expect("wrap capture_line")) .expect("add function"); + module + .add_function( + wrap_pyfunction!(capture_return_event, &module).expect("wrap capture_return_event"), + ) + .expect("add return capture function"); py.import("sys") .expect("import sys") .getattr("modules") diff --git a/codetracer-python-recorder/src/runtime/output_paths.rs b/codetracer-python-recorder/src/runtime/output_paths.rs new file mode 100644 index 0000000..b6b3f82 --- /dev/null +++ b/codetracer-python-recorder/src/runtime/output_paths.rs @@ -0,0 +1,60 @@ +//! File-system helpers for trace output management. + +use std::error::Error; +use std::path::{Path, PathBuf}; + +use runtime_tracing::{Line, NonStreamingTraceWriter, TraceEventsFileFormat, TraceWriter}; + +/// File layout for a trace session. Encapsulates the metadata, event, and paths +/// files that need to be initialised alongside the runtime tracer. +#[derive(Debug, Clone)] +pub struct TraceOutputPaths { + events: PathBuf, + metadata: PathBuf, + paths: PathBuf, +} + +impl TraceOutputPaths { + /// Build output paths for a given directory. The directory is expected to + /// exist before initialisation; callers should ensure it is created. + pub fn new(root: &Path, format: TraceEventsFileFormat) -> Self { + let (events_name, metadata_name, paths_name) = match format { + TraceEventsFileFormat::Json => { + ("trace.json", "trace_metadata.json", "trace_paths.json") + } + _ => ("trace.bin", "trace_metadata.json", "trace_paths.json"), + }; + Self { + events: root.join(events_name), + metadata: root.join(metadata_name), + paths: root.join(paths_name), + } + } + + pub fn events(&self) -> &Path { + &self.events + } + + pub fn metadata(&self) -> &Path { + &self.metadata + } + + pub fn paths(&self) -> &Path { + &self.paths + } + + /// Wire the trace writer to the configured output files and record the + /// initial start location. + pub fn configure_writer( + &self, + writer: &mut NonStreamingTraceWriter, + start_path: &Path, + start_line: u32, + ) -> Result<(), Box> { + TraceWriter::begin_writing_trace_metadata(writer, self.metadata())?; + TraceWriter::begin_writing_trace_paths(writer, self.paths())?; + TraceWriter::begin_writing_trace_events(writer, self.events())?; + TraceWriter::start(writer, start_path, Line(start_line as i64)); + Ok(()) + } +} diff --git a/codetracer-python-recorder/src/runtime/value_capture.rs b/codetracer-python-recorder/src/runtime/value_capture.rs new file mode 100644 index 0000000..b3d36a1 --- /dev/null +++ b/codetracer-python-recorder/src/runtime/value_capture.rs @@ -0,0 +1,130 @@ +//! Helpers for capturing call arguments and variable scope for tracing callbacks. + +use std::collections::HashSet; + +use pyo3::exceptions::PyRuntimeError; +use pyo3::prelude::*; +use pyo3::types::PyString; + +use runtime_tracing::{FullValueRecord, NonStreamingTraceWriter, TraceWriter}; + +use crate::code_object::CodeObjectWrapper; +use crate::runtime::frame_inspector::{capture_frame, FrameSnapshot}; +use crate::runtime::value_encoder::encode_value; + +/// Capture Python call arguments for the provided code object and encode them +/// using the runtime tracer writer. +pub fn capture_call_arguments<'py>( + py: Python<'py>, + writer: &mut NonStreamingTraceWriter, + code: &CodeObjectWrapper, +) -> PyResult> { + let snapshot = capture_frame(py, code)?; + let locals = snapshot.locals(); + + let code_bound = code.as_bound(py); + let argcount = code.arg_count(py)? as usize; + let _posonly: usize = code_bound.getattr("co_posonlyargcount")?.extract()?; + let kwonly: usize = code_bound.getattr("co_kwonlyargcount")?.extract()?; + let flags = code.flags(py)?; + + const CO_VARARGS: u32 = 0x04; + const CO_VARKEYWORDS: u32 = 0x08; + + let varnames: Vec = code_bound.getattr("co_varnames")?.extract()?; + + let mut args: Vec = Vec::new(); + let mut idx = 0usize; + + let positional_take = std::cmp::min(argcount, varnames.len()); + for name in varnames.iter().take(positional_take) { + let value = locals + .get_item(name)? + .ok_or_else(|| PyRuntimeError::new_err(format!("missing positional arg '{name}'")))?; + let encoded = encode_value(py, writer, &value); + args.push(TraceWriter::arg(writer, name, encoded)); + idx += 1; + } + + if (flags & CO_VARARGS) != 0 && idx < varnames.len() { + let name = &varnames[idx]; + if let Some(value) = locals.get_item(name)? { + let encoded = encode_value(py, writer, &value); + args.push(TraceWriter::arg(writer, name, encoded)); + } + idx += 1; + } + + let kwonly_take = std::cmp::min(kwonly, varnames.len().saturating_sub(idx)); + for name in varnames.iter().skip(idx).take(kwonly_take) { + let value = locals + .get_item(name)? + .ok_or_else(|| PyRuntimeError::new_err(format!("missing kw-only arg '{name}'")))?; + let encoded = encode_value(py, writer, &value); + args.push(TraceWriter::arg(writer, name, encoded)); + } + idx = idx.saturating_add(kwonly_take); + + if (flags & CO_VARKEYWORDS) != 0 && idx < varnames.len() { + let name = &varnames[idx]; + if let Some(value) = locals.get_item(name)? { + let encoded = encode_value(py, writer, &value); + args.push(TraceWriter::arg(writer, name, encoded)); + } + } + + Ok(args) +} + +/// Record all visible variables from the provided frame snapshot into the writer. +pub fn record_visible_scope( + py: Python<'_>, + writer: &mut NonStreamingTraceWriter, + snapshot: &FrameSnapshot<'_>, + recorded: &mut HashSet, +) { + for (key, value) in snapshot.locals().iter() { + let name = match key.downcast::() { + Ok(pystr) => match pystr.to_str() { + Ok(raw) => raw.to_owned(), + Err(_) => continue, + }, + Err(_) => continue, + }; + let encoded = encode_value(py, writer, &value); + TraceWriter::register_variable_with_full_value(writer, &name, encoded); + recorded.insert(name); + } + + if snapshot.locals_is_globals() { + return; + } + + if let Some(globals_dict) = snapshot.globals() { + for (key, value) in globals_dict.iter() { + let name = match key.downcast::() { + Ok(pystr) => match pystr.to_str() { + Ok(raw) => raw, + Err(_) => continue, + }, + Err(_) => continue, + }; + if name == "__builtins__" || recorded.contains(name) { + continue; + } + let encoded = encode_value(py, writer, &value); + TraceWriter::register_variable_with_full_value(writer, name, encoded); + recorded.insert(name.to_owned()); + } + } +} + +/// Encode and record a return value for the active trace. +pub fn record_return_value( + py: Python<'_>, + writer: &mut NonStreamingTraceWriter, + value: &Bound<'_, PyAny>, +) { + let encoded = encode_value(py, writer, value); + TraceWriter::register_return(writer, encoded); +} diff --git a/codetracer-python-recorder/src/runtime/value_encoder.rs b/codetracer-python-recorder/src/runtime/value_encoder.rs new file mode 100644 index 0000000..ce8875e --- /dev/null +++ b/codetracer-python-recorder/src/runtime/value_encoder.rs @@ -0,0 +1,107 @@ +//! Encode Python values into `runtime_tracing` records. + +use pyo3::prelude::*; +use pyo3::types::{PyAny, PyDict, PyList, PyTuple}; +use runtime_tracing::{NonStreamingTraceWriter, TraceWriter, TypeKind, ValueRecord, NONE_VALUE}; + +/// Convert Python values into `ValueRecord` instances understood by +/// `runtime_tracing`. Nested containers are encoded recursively and reuse the +/// tracer's type registry to ensure deterministic identifiers. +pub fn encode_value<'py>( + py: Python<'py>, + writer: &mut NonStreamingTraceWriter, + value: &Bound<'py, PyAny>, +) -> ValueRecord { + if value.is_none() { + return NONE_VALUE; + } + + if let Ok(b) = value.extract::() { + let ty = TraceWriter::ensure_type_id(writer, TypeKind::Bool, "Bool"); + return ValueRecord::Bool { b, type_id: ty }; + } + + if let Ok(i) = value.extract::() { + let ty = TraceWriter::ensure_type_id(writer, TypeKind::Int, "Int"); + return ValueRecord::Int { i, type_id: ty }; + } + + if let Ok(s) = value.extract::() { + let ty = TraceWriter::ensure_type_id(writer, TypeKind::String, "String"); + return ValueRecord::String { + text: s, + type_id: ty, + }; + } + + if let Ok(tuple) = value.downcast::() { + let mut elements = Vec::with_capacity(tuple.len()); + for item in tuple.iter() { + elements.push(encode_value(py, writer, &item)); + } + let ty = TraceWriter::ensure_type_id(writer, TypeKind::Tuple, "Tuple"); + return ValueRecord::Tuple { + elements, + type_id: ty, + }; + } + + if let Ok(list) = value.downcast::() { + let mut elements = Vec::with_capacity(list.len()); + for item in list.iter() { + elements.push(encode_value(py, writer, &item)); + } + let ty = TraceWriter::ensure_type_id(writer, TypeKind::Seq, "List"); + return ValueRecord::Sequence { + elements, + is_slice: false, + type_id: ty, + }; + } + + if let Ok(dict) = value.downcast::() { + let seq_ty = TraceWriter::ensure_type_id(writer, TypeKind::Seq, "Dict"); + let tuple_ty = TraceWriter::ensure_type_id(writer, TypeKind::Tuple, "Tuple"); + let str_ty = TraceWriter::ensure_type_id(writer, TypeKind::String, "String"); + let mut elements = Vec::with_capacity(dict.len()); + for pair in dict.items().iter() { + if let Ok(pair_tuple) = pair.downcast::() { + if pair_tuple.len() == 2 { + let key = pair_tuple.get_item(0).unwrap(); + let value = pair_tuple.get_item(1).unwrap(); + let key_record = if let Ok(text) = key.extract::() { + ValueRecord::String { + text, + type_id: str_ty, + } + } else { + encode_value(py, writer, &key) + }; + let value_record = encode_value(py, writer, &value); + let pair_record = ValueRecord::Tuple { + elements: vec![key_record, value_record], + type_id: tuple_ty, + }; + elements.push(pair_record); + } + } + } + return ValueRecord::Sequence { + elements, + is_slice: false, + type_id: seq_ty, + }; + } + + let ty = TraceWriter::ensure_type_id(writer, TypeKind::Raw, "Object"); + match value.str() { + Ok(text) => ValueRecord::Raw { + r: text.to_string_lossy().into_owned(), + type_id: ty, + }, + Err(_) => ValueRecord::Error { + msg: "".to_string(), + type_id: ty, + }, + } +} diff --git a/codetracer-python-recorder/src/session.rs b/codetracer-python-recorder/src/session.rs new file mode 100644 index 0000000..bfcfa9a --- /dev/null +++ b/codetracer-python-recorder/src/session.rs @@ -0,0 +1,77 @@ +//! PyO3 entry points for starting and managing trace sessions. + +mod bootstrap; + +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, Ordering}; + +use pyo3::exceptions::PyRuntimeError; +use pyo3::prelude::*; + +use crate::logging::init_rust_logging_with_default; +use crate::monitoring::{flush_installed_tracer, install_tracer, uninstall_tracer}; +use crate::runtime::{RuntimeTracer, TraceOutputPaths}; +use bootstrap::TraceSessionBootstrap; + +/// Global flag tracking whether tracing is active. +static ACTIVE: AtomicBool = AtomicBool::new(false); + +/// Start tracing using sys.monitoring and runtime_tracing writer. +#[pyfunction] +pub fn start_tracing(path: &str, format: &str, activation_path: Option<&str>) -> PyResult<()> { + // Ensure logging is ready before any tracer logs might be emitted. + // Default only our crate to debug to avoid excessive verbosity from deps. + init_rust_logging_with_default("codetracer_python_recorder=debug"); + if ACTIVE.load(Ordering::SeqCst) { + return Err(PyRuntimeError::new_err("tracing already active")); + } + + let activation_path = activation_path.map(PathBuf::from); + + Python::with_gil(|py| { + let bootstrap = TraceSessionBootstrap::prepare( + py, + Path::new(path), + format, + activation_path.as_deref(), + )?; + + let outputs = TraceOutputPaths::new(bootstrap.trace_directory(), bootstrap.format()); + + let mut tracer = RuntimeTracer::new( + bootstrap.program(), + bootstrap.args(), + bootstrap.format(), + bootstrap.activation_path(), + ); + tracer.begin(&outputs, 1)?; + + // Install callbacks + install_tracer(py, Box::new(tracer))?; + ACTIVE.store(true, Ordering::SeqCst); + Ok(()) + }) +} + +/// Stop tracing by resetting the global flag. +#[pyfunction] +pub fn stop_tracing() -> PyResult<()> { + Python::with_gil(|py| { + // Uninstall triggers finish() on tracer implementation. + uninstall_tracer(py)?; + ACTIVE.store(false, Ordering::SeqCst); + Ok(()) + }) +} + +/// Query whether tracing is currently active. +#[pyfunction] +pub fn is_tracing() -> PyResult { + Ok(ACTIVE.load(Ordering::SeqCst)) +} + +/// Flush buffered trace data (best-effort, non-streaming formats only). +#[pyfunction] +pub fn flush_tracing() -> PyResult<()> { + Python::with_gil(|py| flush_installed_tracer(py)) +} diff --git a/codetracer-python-recorder/src/session/bootstrap.rs b/codetracer-python-recorder/src/session/bootstrap.rs new file mode 100644 index 0000000..ea257ba --- /dev/null +++ b/codetracer-python-recorder/src/session/bootstrap.rs @@ -0,0 +1,121 @@ +//! Helpers for preparing a tracing session before installing the runtime tracer. + +use std::fs; +use std::path::{Path, PathBuf}; + +use pyo3::exceptions::PyRuntimeError; +use pyo3::prelude::*; +use runtime_tracing::TraceEventsFileFormat; + +/// Basic metadata about the currently running Python program. +#[derive(Debug, Clone)] +pub struct ProgramMetadata { + pub program: String, + pub args: Vec, +} + +/// Collected data required to start a tracing session. +#[derive(Debug, Clone)] +pub struct TraceSessionBootstrap { + trace_directory: PathBuf, + format: TraceEventsFileFormat, + activation_path: Option, + metadata: ProgramMetadata, +} + +impl TraceSessionBootstrap { + /// Prepare a tracing session by validating the output directory, resolving the + /// requested format and capturing program metadata. + pub fn prepare( + py: Python<'_>, + trace_directory: &Path, + format: &str, + activation_path: Option<&Path>, + ) -> PyResult { + ensure_trace_directory(trace_directory)?; + let format = resolve_trace_format(format)?; + let metadata = collect_program_metadata(py)?; + Ok(Self { + trace_directory: trace_directory.to_path_buf(), + format, + activation_path: activation_path.map(|p| p.to_path_buf()), + metadata, + }) + } + + pub fn trace_directory(&self) -> &Path { + &self.trace_directory + } + + pub fn format(&self) -> TraceEventsFileFormat { + self.format + } + + pub fn activation_path(&self) -> Option<&Path> { + self.activation_path.as_deref() + } + + pub fn program(&self) -> &str { + &self.metadata.program + } + + pub fn args(&self) -> &[String] { + &self.metadata.args + } +} + +/// Ensure the requested trace directory exists and is writable. +pub fn ensure_trace_directory(path: &Path) -> PyResult<()> { + if path.exists() { + if !path.is_dir() { + return Err(PyRuntimeError::new_err( + "trace path exists and is not a directory", + )); + } + return Ok(()); + } + + fs::create_dir_all(path).map_err(|e| { + PyRuntimeError::new_err(format!( + "failed to create trace directory '{}': {e}", + path.display() + )) + }) +} + +/// Convert a user-provided format string into the runtime representation. +pub fn resolve_trace_format(value: &str) -> PyResult { + match value.to_ascii_lowercase().as_str() { + "json" => Ok(TraceEventsFileFormat::Json), + // Accept historical aliases for the binary format. + "binary" | "binaryv0" | "binary_v0" | "b0" => Ok(TraceEventsFileFormat::BinaryV0), + other => Err(PyRuntimeError::new_err(format!( + "unsupported trace format '{other}'. Expected one of: json, binary" + ))), + } +} + +/// Capture program name and arguments from `sys.argv` for metadata records. +pub fn collect_program_metadata(py: Python<'_>) -> PyResult { + let sys = py.import("sys")?; + let argv = sys.getattr("argv")?; + + let program = match argv.get_item(0) { + Ok(obj) => obj.extract::()?, + Err(_) => String::from(""), + }; + + let args = match argv.len() { + Ok(len) if len > 1 => { + let mut items = Vec::with_capacity(len.saturating_sub(1)); + for idx in 1..len { + let value: String = argv.get_item(idx)?.extract()?; + items.push(value); + } + items + } + _ => Vec::new(), + }; + + Ok(ProgramMetadata { program, args }) +} diff --git a/codetracer-python-recorder/test/test_codetracer_api.py b/codetracer-python-recorder/test/test_codetracer_api.py index 0614873..34f8e12 100644 --- a/codetracer-python-recorder/test/test_codetracer_api.py +++ b/codetracer-python-recorder/test/test_codetracer_api.py @@ -41,6 +41,20 @@ def test_environment_auto_start(self) -> None: out = subprocess.check_output([sys.executable, "-c", script], env=env) self.assertEqual(out.decode(), "True") + def test_start_rejects_unsupported_format(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + with self.assertRaises(ValueError): + codetracer.start(Path(tmpdir), format="yaml") + self.assertFalse(codetracer.is_tracing()) + + def test_start_rejects_file_path(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + file_path = Path(tmpdir) / "trace.bin" + file_path.write_text("placeholder") + with self.assertRaises(ValueError): + codetracer.start(file_path) + self.assertFalse(codetracer.is_tracing()) + if __name__ == "__main__": unittest.main() diff --git a/design-docs/adr/0001-file-level-single-responsibility.md b/design-docs/adr/0001-file-level-single-responsibility.md new file mode 100644 index 0000000..9252118 --- /dev/null +++ b/design-docs/adr/0001-file-level-single-responsibility.md @@ -0,0 +1,65 @@ +# ADR 0001: File-Level Single Responsibility Refactor + +- **Status:** Proposed +- **Date:** 2025-10-01 +- **Deciders:** Platform / Runtime Tracing Team +- **Consulted:** Python Tooling WG, Developer Experience WG + +## Context + +The codetracer Python recorder crate has evolved quickly and several source files now mix unrelated concerns: +- [`src/lib.rs`](../../codetracer-python-recorder/src/lib.rs) hosts PyO3 module wiring, global logging setup, tracing session state, and filesystem validation in one place. +- [`src/runtime_tracer.rs`](../../codetracer-python-recorder/src/runtime_tracer.rs) interleaves activation gating, writer lifecycle control, PyFrame helpers, and Python value encoding logic, making it challenging to test or extend any portion independently. +- [`src/tracer.rs`](../../codetracer-python-recorder/src/tracer.rs) combines sys.monitoring shim code with the `Tracer` trait, callback registration, and global caches. +- [`codetracer_python_recorder/api.py`](../../codetracer-python-recorder/codetracer_python_recorder/api.py) mixes format constants, backend interaction, context manager ergonomics, and environment based auto-start side effects. + +This violates the Single Responsibility Principle (SRP) at the file level, obscures ownership boundaries, and increases the risk of merge conflicts and regressions. Upcoming work on richer value capture and optional streaming writers will add more logic to these files unless we carve out cohesive modules now. + +## Decision + +We will reorganise both the Rust crate and supporting Python package so that each file covers a single cohesive topic and exposes a narrow interface. Concretely: +1. Restrict `src/lib.rs` to PyO3 module definition and `pub use` re-exports. Move logging configuration into `src/logging.rs` and tracing session lifecycle into `src/session.rs`. +2. Split the current runtime tracer into a `runtime` module directory with dedicated files for activation control, value encoding, and output file management. The façade in `runtime/mod.rs` will assemble these pieces and expose the existing `RuntimeTracer` API. +3. Introduce a `monitoring` module directory that separates sys.monitoring primitive bindings (`EventId`, `ToolId`, registration helpers) from the `Tracer` trait and callback dispatch logic. +4. Decompose the Python helper package by moving session state management into `session.py`, format constants and validation into `formats.py`, and environment auto-start into `auto_start.py`, while keeping public functions surfaced through `api.py` and `__init__.py`. + +These changes are mechanical reorganisations—no behavioural changes are expected. Public Rust and Python APIs must remain source compatible during the refactor. + +## Consequences + +- **Positive:** + - Easier onboarding for new contributors because each file advertises a single purpose. + - Improved unit testability; e.g., Python value encoding can be tested without instantiating the full tracer. + - Lower merge conflict risk: teams can edit activation logic without touching writer code. + - Clearer extension points for upcoming streaming writer and richer metadata work. +- **Negative / Risks:** + - Temporary churn in module paths may invalidate outstanding branches; mitigation is to stage work in small, reviewable PRs. + - Developers unfamiliar with Rust module hierarchies will need guidance to update `mod` declarations and `use` paths correctly. + - Python packaging changes require careful coordination to avoid circular imports when moving auto-start logic. + +## Implementation Guidelines for Junior Developers + +1. **Work Incrementally.** Aim for small PRs (≤500 LOC diff) that move one responsibility at a time. After each PR run `just test` and ensure all linters stay green. +2. **Preserve APIs.** When moving functions, re-export them from their new module so that existing callers (Rust and Python) compile without modification in the same PR. +3. **Add Focused Tests.** Whenever a helper is extracted (e.g., value encoding), add or migrate unit tests that cover its edge cases. +4. **Document Moves.** Update doc comments and module-level docs to reflect the new structure. Remove outdated TODOs or convert them into follow-up issues. +5. **Coordinate on Shared Types.** When splitting `runtime_tracer.rs`, agree on ownership for shared structs (e.g., `RuntimeTracer` remains in `runtime/mod.rs`). Use `pub(crate)` to keep internals encapsulated. +6. **Python Imports.** After splitting the Python modules, ensure `__all__` in `__init__.py` continues to export the public API. Use relative imports to avoid accidental circular dependencies. +7. **Parallel Work.** Follow the sequencing from `design-docs/file-level-srp-refactor-plan.md` to know when tasks can proceed in parallel. + +## Testing Strategy + +- Run `just test` locally before submitting each PR. +- Add targeted Rust tests for new modules (e.g., `activation` and `value_encoder`). +- Extend Python tests to cover auto-start logic and the context manager after extraction. +- Compare trace outputs against saved fixtures to ensure refactors do not alter serialized data. + +## Alternatives Considered + +- **Leave the layout as-is:** rejected because it impedes planned features and increases onboarding cost. +- **Large rewrite in a single PR:** rejected due to high risk and code review burden. + +## Follow-Up Actions + +- After completing the refactor, update architecture diagrams in `design-docs` to match the new module structure. +- Schedule knowledge-sharing sessions for new module owners to walk through their areas. diff --git a/design-docs/adr/0002-function-level-srp.md b/design-docs/adr/0002-function-level-srp.md new file mode 100644 index 0000000..df2b745 --- /dev/null +++ b/design-docs/adr/0002-function-level-srp.md @@ -0,0 +1,61 @@ +# ADR 0002: Function-Level Single Responsibility Refactor + +- **Status:** Proposed +- **Date:** 2025-10-15 +- **Deciders:** Platform / Runtime Tracing Team +- **Consulted:** Python Tooling WG, Developer Experience WG + +## Context + +The codetracer runtime currently exposes several high-traffic functions that blend unrelated concerns, making them difficult to understand, test, and evolve. + +- [`codetracer-python-recorder/src/session.rs:start_tracing`](../../codetracer-python-recorder/src/session.rs) performs logging setup, state guards, filesystem validation and creation, format parsing, Python metadata collection, tracer instantiation, and sys.monitoring installation within one 70+ line function. +- [`codetracer-python-recorder/src/runtime/mod.rs:on_py_start`](../../codetracer-python-recorder/src/runtime/mod.rs) handles activation gating, synthetic filename filtering, argument collection via unsafe PyFrame calls, error logging, and call registration in a single block. +- [`codetracer-python-recorder/src/runtime/mod.rs:on_line`](../../codetracer-python-recorder/src/runtime/mod.rs) interleaves activation checks, frame navigation, locals/globals materialisation, value encoding, variable registration, and memory hygiene for reference counted objects. +- [`codetracer-python-recorder/src/runtime/mod.rs:on_py_return`](../../codetracer-python-recorder/src/runtime/mod.rs) combines activation lifecycle management with value encoding and logging. +- [`codetracer-python-recorder/codetracer_python_recorder/session.py:start`](../../codetracer-python-recorder/codetracer_python_recorder/session.py) mixes backend state checks, path normalisation, format coercion, and PyO3 bridge calls. + +These hotspots violate the Single Responsibility Principle at the function level. When we add new formats, richer activation flows, or additional capture types, we risk regressions because each modification touches fragile, monolithic code blocks. + +## Decision + +We will refactor high-traffic functions so that each public entry point coordinates narrowly-scoped helpers, each owning a single concern. + +1. **Trace session start-up:** Introduce a `TraceSessionBootstrap` (Rust) that encapsulates directory preparation, format resolution, and program metadata gathering. `start_tracing` will delegate to helpers like `ensure_trace_directory`, `resolve_trace_format`, and `collect_program_metadata`. Python-side `start` will mirror this by delegating validation to dedicated helpers (`validate_trace_path`, `coerce_format`). +2. **Frame inspection & activation gating:** Extract frame traversal and activation decisions into dedicated helpers inside `runtime/frame_inspector.rs` and `runtime/activation.rs`. Callback methods (`on_py_start`, `on_line`, `on_py_return`) will orchestrate the helpers instead of performing raw pointer work inline. +3. **Value capture pipeline:** Move argument, locals, globals, and return value capture to a `runtime::value_capture` module that exposes high-level functions such as `capture_call_arguments(frame, code)` and `record_visible_scope(writer, frame)`. These helpers will own error handling and ensure reference counting invariants, allowing callbacks to focus on control flow. +4. **Logging and error reporting:** Concentrate logging into small, reusable functions (e.g., `log_trace_event(event_kind, code, lineno)`) so that callbacks do not perform ad hoc logging alongside functional work. +5. **Activation lifecycle:** Ensure `ActivationController` remains the single owner for activation state transitions. Callbacks will query `should_process_event` and `handle_deactivation` helpers instead of duplicating checks. + +The refactor maintains public APIs but reorganises internal call graphs to keep each function focused on orchestration. + +## Consequences + +- **Positive:** + - Smaller, intention-revealing functions improve readability and lower the mental load for reviewers modifying callback behaviour. + - Reusable helpers unlock targeted unit tests (e.g., for path validation or locals capture) without invoking the entire tracing stack. + - Error handling becomes consistent and auditable when concentrated in dedicated helpers. + - Future features (streaming writers, selective variable capture) can extend isolated helpers rather than modifying monoliths. +- **Negative / Risks:** + - Increased number of private helper modules/functions may introduce slight organisational overhead for newcomers. + - Extracting FFI-heavy logic requires careful lifetime management; mistakes could introduce reference leaks or double-frees. + - Interim refactors might temporarily duplicate logic until all call sites migrate to the new helpers. + +## Implementation Guidelines + +1. **Preserve semantics:** Validate each step with `just test` and targeted regression fixtures to ensure helper extraction does not change runtime behaviour. +2. **Guard unsafe code:** When moving PyFrame interactions, wrap unsafe blocks in documented helpers with clear preconditions and postconditions. +3. **Keep interfaces narrow:** Expose helper functions as `pub(crate)` or module-private to prevent leaking unstable APIs. +4. **Add focused tests:** Unit test helpers for error cases (e.g., invalid path, unsupported format, missing frame) and integrate them into existing test suites. +5. **Stage changes:** Land extractions in small PRs, updating the surrounding code incrementally to avoid giant rewrites. +6. **Document intent:** Update docstrings and module-level docs to describe helper responsibilities, keeping comments aligned with SRP boundaries. + +## Alternatives Considered + +- **Status quo:** Rejected; expanding functionality would keep bloating already-complex functions. +- **Entirely new tracer abstraction:** Unnecessary; existing `RuntimeTracer` shape is viable once responsibilities are modularised. + +## Follow-Up + +- Align sequencing with `design-docs/function-level-srp-refactor-plan.md`. +- Revisit performance benchmarks after extraction to ensure added indirection does not materially affect tracing overhead. diff --git a/design-docs/file-level-srp-refactor-plan.md b/design-docs/file-level-srp-refactor-plan.md new file mode 100644 index 0000000..476bfc6 --- /dev/null +++ b/design-docs/file-level-srp-refactor-plan.md @@ -0,0 +1,87 @@ +# File-Level Single Responsibility Refactor Plan + +## Goals +- Reshape the Rust crate and Python support package so that every source file encapsulates a single cohesive topic. +- Reduce the amount of ad-hoc cross-module knowledge currently required to understand tracing start-up, event handling, and encoding logic. +- Preserve the public Python API and Rust crate interfaces during the refactor to avoid disruptions for downstream tooling. + +## Current State Observations +- `src/lib.rs` is responsible for PyO3 module registration, lifecycle management for tracing sessions, global logging initialisation, and runtime format selection, which mixes unrelated concerns in one file. +- `src/runtime_tracer.rs` couples trace lifecycle control, activation toggling, and Python value encoding in a single module, making it difficult to unit test or substitute individual pieces. +- `src/tracer.rs` combines the `Tracer` trait definition, sys.monitoring shims, callback registration utilities, and thread-safe storage, meaning small changes can ripple through unrelated logic. +- `codetracer_python_recorder/api.py` interleaves environment based auto-start, context-manager ergonomics, backend state management, and format constants, leaving no clearly isolated entry-point for CLI or library callers. + +## Target Rust Module Layout +| Topic | Target file | Notes | +| --- | --- | --- | +| PyO3 module definition & re-exports | `src/lib.rs` | Limit to module wiring plus `pub use` statements. +| Global logging defaults | `src/logging.rs` | Provide helper to configure env_logger defaults reused by both lib.rs and tests. +| Tracing session lifecycle (`start_tracing`, `stop_tracing`, `flush_tracing`, `is_tracing`) | `src/session.rs` | Own global `ACTIVE` flag and filesystem validation. +| Runtime tracer orchestration (activation gating, writer plumbing) | `src/runtime/mod.rs` | Public `RuntimeTracer` facade constructed by session. +| Value encoding helpers | `src/runtime/value_encoder.rs` | Convert Python objects into `runtime_tracing::ValueRecord` values; unit test in isolation. +| Activation management (start-on-enter logic) | `src/runtime/activation.rs` | Encapsulate `activation_path`, `activation_code_id`, and toggling state. +| Writer initialisation and file path selection | `src/runtime/output_paths.rs` | Determine file names for JSON/Binary and wrap TraceWriter begin/finish. +| sys.monitoring integration utilities | `src/monitoring/mod.rs` | Provide `ToolId`, `EventId`, `MonitoringEvents`, `set_events`, etc. +| Tracer trait & callback dispatch | `src/monitoring/tracer.rs` | Define `Tracer` trait and per-event callbacks; depend on `monitoring::events`. +| Code object caching | `src/code_object.rs` | Remains focused on caching; consider relocating question comments to doc tests. + +The `runtime` and `monitoring` modules become directories with focused submodules, while `session.rs` consumes them via narrow interfaces. Any PyO3 FFI helper functions should live close to their domain (e.g., frame locals helpers inside `runtime/mod.rs`). + +## Target Python Package Layout +| Topic | Target file | Notes | +| --- | --- | --- | +| Public API surface (`start`, `stop`, `is_tracing`, constants) | `codetracer_python_recorder/api.py` | Keep the public signatures unchanged; delegate to new helpers. +| Session handle implementation | `codetracer_python_recorder/session.py` | Own `TraceSession` class and backend delegation logic. +| Auto-start via environment variables | `codetracer_python_recorder/auto_start.py` | Move `_auto_start_from_env` and constants needed only for boot-time configuration. +| Format constants & validation | `codetracer_python_recorder/formats.py` | Define `TRACE_BINARY`, `TRACE_JSON`, `DEFAULT_FORMAT`, and any helpers to negotiate format strings. +| Module-level `__init__` exports | `codetracer_python_recorder/__init__.py` | Re-export the API and trigger optional auto-start. + +Splitting the Python helper package along these lines isolates side-effectful auto-start logic from the plain API and simplifies targeted testing. + +## Implementation Roadmap + +1. **Stabilise tests and build scripts** + - Ensure `just test` passes to establish a green baseline. + - Capture benchmarks or representative trace outputs to validate parity later. + +2. **Introduce foundational Rust modules (serial)** + - Extract logging initialisation into `logging.rs` and update `lib.rs` to call the helper. + - Move session lifecycle logic from `lib.rs` into a new `session.rs`, keeping function signatures untouched and re-exporting via `lib.rs`. + - Update module declarations and adjust imports; verify tests. + +3. **Restructure runtime tracer internals (can parallelise subtasks)** + - Create `src/runtime/mod.rs` as façade exposing `RuntimeTracer`. + - **Task 3A (Team A)**: Extract activation control into `runtime/activation.rs`, exposing a small struct consumed by the tracer. + - **Task 3B (Team B)**: Extract value encoding routines into `runtime/value_encoder.rs`, providing unit tests and benchmarks. + - **Task 3C (Team C)**: Introduce `runtime/output_paths.rs` to encapsulate format-to-filename mapping and writer initialisation. + - Integrate submodules back into `runtime/mod.rs` sequentially once individual tasks are complete; resolve merge conflicts around struct fields. + +4. **Modularise sys.monitoring glue (partially parallel)** + - Add `monitoring/mod.rs` hosting shared types (`EventId`, `EventSet`, `ToolId`). + - Split trait and dispatcher logic into `monitoring/tracer.rs`; keep callback registration helpers near the sys.monitoring bindings. + - **Task 4A (Team A)**: Port OnceLock caches and registration helpers. + - **Task 4B (Team B)**: Move `Tracer` trait definition and default implementations, updating call sites in runtime tracer and tests. + +5. **Python package decomposition (parallel with Step 4 once Step 2 is merged)** + - Create `session.py`, `formats.py`, and `auto_start.py` with extracted logic. + - Update `api.py` to delegate to the new modules but maintain backward-compatible imports. + - Adjust `__init__.py` to import from `api` and trigger optional auto-start via the new helper. + - Update Python tests and examples to use the reorganised structure. + +6. **Clean-up and follow-up tasks** + - Remove obsolete comments (e.g., `//TODO AI!` placeholders) or move them into GitHub issues. + - Update documentation and diagrams to reflect the new module tree. + - Re-run `just test` and linting for both Rust and Python components; capture trace artifacts to confirm unchanged output format. + +## Parallelisation Notes +- Step 2 touches the global entry points and should complete before deeper refactors to minimise rebasing pain. +- Step 3 subtasks (activation, value encoding, output paths) operate on distinct sections of the existing `RuntimeTracer`; they can be implemented in parallel once `runtime/mod.rs` scaffolding exists. +- Step 4's subtasks can proceed concurrently with Step 3 once the new `monitoring` module is introduced; teams should coordinate on shared types but work on separate files. +- Step 5 (Python package) depends on Step 2 so that backend entry-points remain stable; it can overlap with late Step 3/4 work because it touches only the Python tree. +- Documentation updates and clean-up in Step 6 can be distributed among contributors after core refactors merge. + +## Testing & Verification Strategy +- Maintain existing integration and unit tests; add focused tests for newly separated modules (e.g., pure Rust tests for `value_encoder` conversions). +- Extend Python tests to cover environment auto-start logic now that it lives in its own module. +- For each phase, compare generated trace files against baseline fixtures to guarantee no behavioural regressions. +- Require code review sign-off from domain owners for each phase to ensure the single-responsibility intent is preserved. diff --git a/design-docs/file-level-srp-refactor-plan.status.md b/design-docs/file-level-srp-refactor-plan.status.md new file mode 100644 index 0000000..b49aaa0 --- /dev/null +++ b/design-docs/file-level-srp-refactor-plan.status.md @@ -0,0 +1,12 @@ +# File-Level SRP Refactor Status + +## Current Status +- ✅ Step 2 complete: introduced `src/logging.rs` for one-time logger initialisation and migrated tracing session lifecycle (`start_tracing`, `stop_tracing`, `is_tracing`, `flush_tracing`, `ACTIVE` flag) into `src/session.rs`, with `src/lib.rs` now limited to PyO3 wiring and re-exports. +- ✅ Step 3 complete: added `src/runtime/mod.rs` with focused `activation`, `value_encoder`, and `output_paths` submodules; `RuntimeTracer` now delegates activation gating, value encoding, and writer initialisation through the façade consumed by `session.rs`. +- ✅ Step 4 complete: introduced `src/monitoring/mod.rs` for sys.monitoring types/caches and `src/monitoring/tracer.rs` for the tracer trait plus callback dispatch; rewired `lib.rs`, `session.rs`, and `runtime/mod.rs`, and kept a top-level `tracer` re-export for API stability. +- ✅ Step 5 complete: split the Python package into dedicated `formats.py`, `session.py`, and `auto_start.py` modules, trimmed `api.py` to a thin façade, and moved the environment auto-start hook into `__init__.py`. +- ✅ Step 6 complete: resolved outstanding Rust TODOs (format validation, argv handling, function id stability), expanded module documentation so `cargo doc` reflects the architecture, and re-ran `just test` to confirm the refactor remains green. +- ✅ Test baseline: `just test` (nextest + pytest) passes with the UV cache scoped to the workspace; direct `cargo test` still requires CPython development symbols. + +## Next Task +- Plan complete. Identify any new follow-up items as separate tasks once additional requirements surface. diff --git a/design-docs/function-level-srp-refactor-plan.md b/design-docs/function-level-srp-refactor-plan.md new file mode 100644 index 0000000..ccc6007 --- /dev/null +++ b/design-docs/function-level-srp-refactor-plan.md @@ -0,0 +1,96 @@ +# Function-Level Single Responsibility Refactor Plan + +## Goals +- Ensure each public function in the tracer stack orchestrates a single concern, delegating specialised work to cohesive helpers. +- Reduce unsafe code surface inside high-level callbacks by centralising frame manipulation and activation logic. +- Improve testability by exposing narrow helper functions that can be unit tested without spinning up a full tracing session. + +## Hotspot Summary +| Function | Location | Current mixed responsibilities | +| --- | --- | --- | +| `start_tracing` | `codetracer-python-recorder/src/session.rs` | Logging bootstrap, active-session guard, filesystem validation/creation, format parsing, argv inspection, tracer construction, sys.monitoring registration | +| `start` | `codetracer_python_recorder/session.py` | Backend state guard, path coercion, format normalisation, activation path handling, PyO3 call | +| `RuntimeTracer::on_py_start` | `codetracer-python-recorder/src/runtime/mod.rs` | Activation gating, synthetic filename filtering, unsafe frame acquisition, argument capture, writer registration, logging | +| `RuntimeTracer::on_line` | `codetracer-python-recorder/src/runtime/mod.rs` | Activation gating, frame search, locals/globals materialisation, value encoding, variable registration, logging | +| `RuntimeTracer::on_py_return` | `codetracer-python-recorder/src/runtime/mod.rs` | Activation gating, return value encoding, activation state transition, logging | + +These functions currently exceed 60–120 lines and interleave control flow with low-level detail, making them brittle and difficult to extend. + +## Refactor Strategy +1. **Codify shared helpers before rewriting call sites.** Introduce new modules (`runtime::frame_inspector`, `runtime::value_capture`, `session::bootstrap`) that encapsulate filesystem, activation, and frame-handling behaviour. +2. **Convert complex functions into orchestration shells.** After helpers exist, shrink the hotspot functions to roughly 10–25 lines that call the helpers and translate their results into tracer actions. +3. **Add regression tests around extracted helpers** so that future changes to callbacks can lean on focused coverage instead of broad integration tests. +4. **Maintain behavioural parity** by running full `just test` plus targeted fixture comparisons after each stage. + +### Helper Module Map +- `runtime::frame_inspector` owns frame discovery and locals/globals snapshots through the `FrameSnapshot` abstraction. +- `runtime::value_capture` centralises argument, scope, and return-value recording, keeping encoding concerns outside the tracer façade. +- `runtime::logging` provides the `log_event` helper so callback logging stays consistent and format-agnostic. +- `session::bootstrap` deals with filesystem setup, format resolution, and program metadata collection for the Rust entrypoint. +- Python `session.py` mirrors the responsibilities with `_coerce_format`, `_validate_trace_path`, and `_normalize_activation_path` helpers. + +## Work Breakdown + +### Stage 0 – Baseline & Guardrails (1 PR) +- Confirm the repository is green (`just test`). +- Capture representative trace output fixtures (binary + JSON) to compare after refactors. +- Document current behaviour of `ActivationController` and frame traversal in quick notes for reviewers. + +### Stage 1 – Session Start-Up Decomposition (Rust + Python) (2 PRs) +1. **Rust bootstrap helper** + - Add `session/bootstrap.rs` (or equivalent module) exposing functions `ensure_trace_directory`, `resolve_trace_format`, `collect_program_metadata`. + - Refactor `start_tracing` to call these helpers; keep public signature unchanged. + - Unit test each helper for error cases (invalid path, unsupported format, argv fallback). + +2. **Python validation split** + - Extract `validate_trace_path` and `coerce_format` into private helpers in `session.py`. + - Update `start` to orchestrate helpers and call `_start_backend` only after validation succeeds. + - Extend Python tests for duplicate start attempts and invalid path/format scenarios. + +### Stage 2 – Frame Inspection & Activation Separation (Rust) (2 PRs) +1. **Frame locator module** + - Introduce `runtime/frame_inspector.rs` handling frame acquisition, locals/globals materialisation, and reference-count hygiene. + - Provide safe wrappers returning domain structs (e.g., `CapturedFrame { locals, globals, frame_ptr }`). + - Update `on_line` to use the new inspector while retaining existing behaviour. + +2. **Activation orchestration** + - Enrich `ActivationController` with methods `should_process(code)` and `handle_deactivation(code_id)` so callbacks can early-return without duplicating logic. + - Update `on_py_start`, `on_line`, and `on_py_return` to rely on these helpers. + +### Stage 3 – Value Capture Layer (Rust) (2 PRs) +1. **Argument capture helper** + - Create `runtime/value_capture.rs` (or expand existing module) exposing `capture_call_arguments(writer, frame, code)`. + - Refactor `on_py_start` to use it, ensuring error propagation remains explicit. + - Unit test for positional args, varargs, kwargs, non-string keys, and failure cases (e.g., failed locals sync). + +2. **Scope recording helper** + - Extract locals/globals iteration into `record_visible_scope(writer, captured_frame)`. + - Update `on_line` to delegate the loop and remove inline Set bookkeeping. + - Add tests covering overlapping names, `__builtins__` filtering, and locals==globals edge cases. + +### Stage 4 – Return Handling & Logging Harmonisation (Rust) (1 PR) +- Introduce small logging helpers (e.g., `log_event(event, code, lineno)`). +- Provide `record_return_value(writer, value)` in `value_capture`. +- Refactor `on_py_return` to call activation decision, logging helper, and value recorder sequentially. +- Ensure deactivation on activation return remains tested. + +### Stage 5 – Cleanup & Regression Sweep (1 PR) +- Remove obsolete inline comments / TODOs made redundant by helpers. +- Re-run `just test`, compare fixtures, and update docs referencing the old function shapes. +- Add final documentation pointing to the new helper modules for contributors. + +## Testing Strategy +- **Unit tests:** Add Rust tests for each new helper module using PyO3 `Python::with_gil` harnesses and synthetic frames. Add Python tests for new validation helpers. +- **Integration tests:** Continue running `just test` after each stage. Augment with targeted scripts that exercise activation path, async functions, and nested frames to confirm instrumentation parity. +- **Fixture diffs:** Compare generated trace outputs (binary + JSON) before and after the refactor to ensure no semantic drift. + +## Dependencies & Coordination +- Stage 1 must land before downstream stages to stabilise shared session APIs. +- Stages 2 and 3 can progress in parallel once bootstrap helpers are merged, but teams should sync on shared structs (e.g., `CapturedFrame`). +- Any changes to unsafe frame handling require review from at least one PyO3 domain expert. +- Update ADR 0002 status from “Proposed” to “Accepted” once Stages 1–4 merge successfully. + +## Risks & Mitigations +- **Unsafe code mistakes:** Wrap raw pointer usage in RAII helpers with debug assertions; add fuzz/ stress tests for recursion-heavy scripts. +- **Performance regressions:** Benchmark tracer overhead before and after major stages; inline trivial helpers where necessary, or mark with `#[inline]` as appropriate. +- **Merge conflicts:** Finish each stage quickly and rebase branches frequently; keep PRs focused (≤400 LOC diff) to ease review. diff --git a/design-docs/function-level-srp-refactor-plan.status.md b/design-docs/function-level-srp-refactor-plan.status.md new file mode 100644 index 0000000..0bea6ac --- /dev/null +++ b/design-docs/function-level-srp-refactor-plan.status.md @@ -0,0 +1,32 @@ +# Function-Level SRP Refactor Status + +## Stage 0 – Baseline & Guardrails +- ✅ `just test` (Rust + Python suites) passes; captured run via the top-level recipe. +- ✅ Generated JSON and binary reference traces from `examples/value_capture_all.py`; outputs stored in `artifacts/stage0/value-capture-json/` and `artifacts/stage0/value-capture-binary/`. +- ⏳ Summarise current `ActivationController` behaviour and frame traversal notes for reviewer context. + +## Stage 1 – Session Start-Up Decomposition +- ✅ Step 1 (Rust): Introduced `session::bootstrap` helpers and refactored `start_tracing` to delegate directory validation, format resolution, and program metadata collection. Tests remain green. +- ✅ Step 2 (Python): Extracted `_coerce_format`, `_validate_trace_path`, and `_normalize_activation_path` helpers; added tests covering invalid formats and conflicting paths. + +## Stage 2 – Frame Inspection & Activation Separation +- ✅ Step 1: Added `runtime::frame_inspector::capture_frame` to encapsulate frame lookup, locals/globals materialisation, and reference counting; `on_line` now delegates to the helper while preserving behaviour. +- ✅ Step 2: Extended `ActivationController` with `should_process_event`/`handle_return_event`, updated callbacks to rely on them, and removed direct state juggling from `RuntimeTracer`. + +## Stage 3 – Value Capture Layer +- ✅ Step 1: Introduced `runtime::value_capture::capture_call_arguments`; `on_py_start` now delegates to it, keeping the function focused on orchestration while reusing frame inspectors. +- ✅ Step 2: Added `record_visible_scope` helper and refactored `on_line` to delegate locals/globals registration through it. + +## Stage 4 – Return Handling & Logging Harmonisation +- ✅ Added `runtime::logging::log_event` to consolidate callback logging across start, line, and return handlers. +- ✅ Exposed `record_return_value` in `runtime::value_capture` and refactored `RuntimeTracer::on_py_return` to orchestrate activation checks, logging, and value recording. +- ✅ Extended runtime tests with explicit return capture coverage and activation deactivation assertions. + +## Stage 5 – Cleanup & Regression Sweep +- ✅ Audited runtime modules for obsolete inline comments or TODOs introduced pre-refactor; none remained after helper extraction. +- ✅ Documented the helper module map in `design-docs/function-level-srp-refactor-plan.md` for contributor onboarding. +- ✅ Re-ran `just test` (Rust `cargo nextest` + Python `pytest`) to confirm post-cleanup parity. + +## Next Actions +- Draft short notes on activation gating and frame search mechanics to complete Stage 0. +- Track Stage 5 fixture comparisons if we decide to snapshot JSON/Binary outputs post-refactor.