diff --git a/.agents/tasks/2025/08/14-1027-initial-python-api b/.agents/tasks/2025/08/14-1027-initial-python-api new file mode 100644 index 0000000..64c74b9 --- /dev/null +++ b/.agents/tasks/2025/08/14-1027-initial-python-api @@ -0,0 +1,3 @@ +Implement the Python API described in the design document for the Rust-based module. Write tests. Don't actually implement tracing using `runtime_tracing` yet, just add placeholders. +--- FOLLOW UP TASK --- +Implement the Python API described in the design document for the Rust-based module. Write tests. Don't actually implement tracing using runtime_tracing yet, just add placeholders. \ No newline at end of file diff --git a/codetracer/__init__.py b/codetracer/__init__.py new file mode 100644 index 0000000..99a30ec --- /dev/null +++ b/codetracer/__init__.py @@ -0,0 +1,155 @@ +"""High-level tracing API built on a Rust backend. + +This module exposes a minimal interface for starting and stopping +runtime traces. The heavy lifting is delegated to the +`codetracer_python_recorder` Rust extension which will eventually hook +into `runtime_tracing` and `sys.monitoring`. For now the Rust side only +maintains placeholder state and performs no actual tracing. +""" +from __future__ import annotations + +import contextlib +import os +from pathlib import Path +from typing import Iterable, Iterator, Optional + +from codetracer_python_recorder import ( + flush_tracing as _flush_backend, + is_tracing as _is_tracing_backend, + start_tracing as _start_backend, + stop_tracing as _stop_backend, +) + +TRACE_BINARY: str = "binary" +TRACE_JSON: str = "json" +DEFAULT_FORMAT: str = TRACE_BINARY + +_active_session: Optional["TraceSession"] = None + + +def _normalize_source_roots(source_roots: Iterable[os.PathLike | str] | None) -> Optional[list[str]]: + if source_roots is None: + return None + return [str(Path(p)) for p in source_roots] + + +def start( + path: os.PathLike | str, + *, + format: str = DEFAULT_FORMAT, + capture_values: bool = True, + source_roots: Iterable[os.PathLike | str] | None = None, +) -> "TraceSession": + """Start a global trace session. + + Parameters mirror the design document. The current implementation + merely records the active state on the Rust side and performs no + tracing. + """ + global _active_session + if _is_tracing_backend(): + raise RuntimeError("tracing already active") + + trace_path = Path(path) + _start_backend(str(trace_path), format, capture_values, _normalize_source_roots(source_roots)) + session = TraceSession(path=trace_path, format=format) + _active_session = session + return session + + +def stop() -> None: + """Stop the active trace session if one is running.""" + global _active_session + if not _is_tracing_backend(): + return + _stop_backend() + _active_session = None + + +def is_tracing() -> bool: + """Return ``True`` when a trace session is active.""" + return _is_tracing_backend() + + +def flush() -> None: + """Flush buffered trace data. + + With the current placeholder implementation this is a no-op but the + function is provided to match the planned public API. + """ + if _is_tracing_backend(): + _flush_backend() + + +@contextlib.contextmanager +def trace( + path: os.PathLike | str, + *, + format: str = DEFAULT_FORMAT, + capture_values: bool = True, + source_roots: Iterable[os.PathLike | str] | None = None, +) -> Iterator["TraceSession"]: + """Context manager helper for scoped tracing.""" + session = start( + path, + format=format, + capture_values=capture_values, + source_roots=source_roots, + ) + try: + yield session + finally: + session.stop() + + +class TraceSession: + """Handle representing a live tracing session.""" + + path: Path + format: str + + def __init__(self, path: Path, format: str) -> None: + self.path = path + self.format = format + + def stop(self) -> None: + """Stop this trace session.""" + if _active_session is self: + stop() + + def flush(self) -> None: + """Flush buffered trace data for this session.""" + flush() + + def __enter__(self) -> "TraceSession": + return self + + def __exit__(self, exc_type, exc, tb) -> None: # pragma: no cover - thin wrapper + self.stop() + + +def _auto_start_from_env() -> None: + path = os.getenv("CODETRACER_TRACE") + if not path: + return + fmt = os.getenv("CODETRACER_FORMAT", DEFAULT_FORMAT) + capture_env = os.getenv("CODETRACER_CAPTURE_VALUES") + capture = True + if capture_env is not None: + capture = capture_env.lower() not in {"0", "false", "no"} + start(path, format=fmt, capture_values=capture) + + +_auto_start_from_env() + +__all__ = [ + "TraceSession", + "DEFAULT_FORMAT", + "TRACE_BINARY", + "TRACE_JSON", + "start", + "stop", + "is_tracing", + "trace", + "flush", +] diff --git a/crates/codetracer-python-recorder/src/lib.rs b/crates/codetracer-python-recorder/src/lib.rs index 830e44f..a5c1ff2 100644 --- a/crates/codetracer-python-recorder/src/lib.rs +++ b/crates/codetracer-python-recorder/src/lib.rs @@ -1,17 +1,58 @@ +use std::sync::atomic::{AtomicBool, Ordering}; + +use pyo3::exceptions::PyRuntimeError; use pyo3::prelude::*; -/// codetracer_python_recorder -/// -/// Minimal placeholder for the Rust-backed recorder. This exposes a trivial -/// function to verify the module builds and imports successfully. +/// Global flag tracking whether tracing is active. +static ACTIVE: AtomicBool = AtomicBool::new(false); + +/// Start tracing. Placeholder implementation that simply flips the +/// global active flag and ignores all parameters. +#[pyfunction] +fn start_tracing( + _path: &str, + _format: &str, + _capture_values: bool, + _source_roots: Option>, +) -> PyResult<()> { + if ACTIVE.swap(true, Ordering::SeqCst) { + return Err(PyRuntimeError::new_err("tracing already active")); + } + Ok(()) +} + +/// Stop tracing by resetting the global flag. +#[pyfunction] +fn stop_tracing() -> PyResult<()> { + ACTIVE.store(false, Ordering::SeqCst); + Ok(()) +} + +/// Query whether tracing is currently active. +#[pyfunction] +fn is_tracing() -> PyResult { + Ok(ACTIVE.load(Ordering::SeqCst)) +} + +/// Flush buffered trace data. No-op placeholder for now. +#[pyfunction] +fn flush_tracing() -> PyResult<()> { + Ok(()) +} + +/// Trivial function kept for smoke tests verifying the module builds. #[pyfunction] fn hello() -> PyResult { Ok("Hello from codetracer-python-recorder (Rust)".to_string()) } +/// Python module definition. #[pymodule] fn codetracer_python_recorder(_py: Python<'_>, m: Bound<'_, PyModule>) -> PyResult<()> { - let hello_fn = wrap_pyfunction!(hello, &m)?; - m.add_function(hello_fn)?; + m.add_function(wrap_pyfunction!(start_tracing, &m)?)?; + m.add_function(wrap_pyfunction!(stop_tracing, &m)?)?; + m.add_function(wrap_pyfunction!(is_tracing, &m)?)?; + m.add_function(wrap_pyfunction!(flush_tracing, &m)?)?; + m.add_function(wrap_pyfunction!(hello, &m)?)?; Ok(()) } diff --git a/design-docs/design-001.md b/design-docs/design-001.md new file mode 100644 index 0000000..1945608 --- /dev/null +++ b/design-docs/design-001.md @@ -0,0 +1,321 @@ +# Python sys.monitoring Tracer Design + +## Overview + +This document outlines the design for integrating Python's `sys.monitoring` API with the `runtime_tracing` format. The goal is to produce CodeTracer-compatible traces for Python programs without modifying the interpreter. + +The tracer collects `sys.monitoring` events, converts them to `runtime_tracing` events, and streams them to `trace.json`/`trace.bin` along with metadata and source snapshots. + +## Architecture + +### Tool Initialization +- Acquire a tool identifier via `sys.monitoring.use_tool_id`; store it for the lifetime of the tracer. + ```rs + pub const MONITORING_TOOL_NAME: &str = "codetracer"; + pub struct ToolId { pub id: u8 } + pub fn acquire_tool_id() -> PyResult; + ``` +- Register one callback per event using `sys.monitoring.register_callback`. + ```rs + #[repr(transparent)] + pub struct EventId(pub u64); // Exact value loaded from sys.monitoring.events.* + + pub struct MonitoringEvents { + pub BRANCH: EventId, + pub CALL: EventId, + pub C_RAISE: EventId, + pub C_RETURN: EventId, + pub EXCEPTION_HANDLED: EventId, + pub INSTRUCTION: EventId, + pub JUMP: EventId, + pub LINE: EventId, + pub PY_RESUME: EventId, + pub PY_RETURN: EventId, + pub PY_START: EventId, + pub PY_THROW: EventId, + pub PY_UNWIND: EventId, + pub PY_YIELD: EventId, + pub RAISE: EventId, + pub RERAISE: EventId, + pub STOP_ITERATION: EventId, + } + + pub fn load_monitoring_events(py: Python<'_>) -> PyResult; + + // Python-level callback registered via sys.monitoring.register_callback + pub type CallbackFn = PyObject; + pub fn register_callback(tool: &ToolId, event: &EventId, cb: &CallbackFn) -> PyResult<()>; + ``` +- Enable all desired events by bitmask with `sys.monitoring.set_events`. + ```rs + #[derive(Clone, Copy)] + pub struct EventSet(pub u64); + + pub fn events_union(ids: &[EventId]) -> EventSet; + pub fn set_events(tool: &ToolId, set: EventSet) -> PyResult<()>; + ``` + +### Writer Management +- Open a `runtime_tracing` writer (`trace.json` or `trace.bin`) during `start_tracing`. + ```rs + pub enum OutputFormat { Json, Binary } + pub struct TraceWriter { pub format: OutputFormat } + pub fn start_tracing(path: &Path, format: OutputFormat) -> io::Result; + ``` +- Expose methods to append metadata and file copies using existing `runtime_tracing` helpers. + ```rs + pub fn append_metadata(writer: &mut TraceWriter, meta: &TraceMetadata); + pub fn copy_source_file(writer: &mut TraceWriter, path: &Path) -> io::Result<()>; + ``` +- Flush and close the writer when tracing stops. + ```rs + pub fn stop_tracing(writer: TraceWriter) -> io::Result<()>; + ``` + +### Frame and Thread Tracking +- Maintain a per-thread stack of activation identifiers to correlate `CALL`, `PY_START`, yields, and returns. Since `sys.monitoring` callbacks provide `CodeType` and offsets (not frames), we rely on the nesting order of events to track activations. + ```rs + pub type ActivationId = u64; + pub struct ThreadState { pub stack: Vec } + pub fn current_thread_state() -> &'static mut ThreadState; + ``` +- Associate activations with `CodeType` objects and instruction/line offsets as needed for cross-referencing, without depending on `PyFrameObject`. + ```rs + pub struct Activation { + pub id: ActivationId, + // Hold a GIL-independent handle to the CodeType object. + // Access required attributes via PyO3 attribute lookup (getattr) under the GIL. + pub code: PyObject, + } + ``` +- Record thread start/end events when a thread first emits a monitoring event and when it finishes. + ```rs + pub fn on_thread_start(thread_id: u64); + pub fn on_thread_stop(thread_id: u64); + ``` + +### Code Object Access Strategy (no reliance on PyCodeObject internals) +- Rationale: PyO3 exposes `ffi::PyCodeObject` as an opaque type. Instead of touching its unstable layout, treat code objects as generic Python objects and access only stable Python-level attributes via PyO3's `getattr` on `&PyAny`. + ```rs + use pyo3::{prelude::*, types::PyAny}; + + #[derive(Clone)] + pub struct CodeInfo { + pub filename: String, + pub qualname: String, + pub firstlineno: u32, + pub flags: u32, + } + + /// Stable identity for a code object during its lifetime. + /// Uses the object's address while GIL-held; equivalent to Python's id(code). + pub fn code_id(py: Python<'_>, code: &PyAny) -> usize { + code.as_ptr() as usize + } + + /// Extract just the attributes we need, via Python attribute access. + pub fn extract_code_info(py: Python<'_>, code: &PyAny) -> PyResult { + let filename: String = code.getattr("co_filename")?.extract()?; + // Prefer co_qualname if present, else fallback to co_name + let qualname: String = match code.getattr("co_qualname") { + Ok(q) => q.extract()?, + Err(_) => code.getattr("co_name")?.extract()?, + }; + let firstlineno: u32 = code.getattr("co_firstlineno")?.extract()?; + let flags: u32 = code.getattr("co_flags")?.extract()?; + Ok(CodeInfo { filename, qualname, firstlineno, flags }) + } + + /// Cache minimal info to avoid repeated getattr and to assign stable IDs. + pub struct CodeRegistry { + pub map: std::collections::HashMap, + } + + impl CodeRegistry { + pub fn new() -> Self { Self { map: Default::default() } } + pub fn intern(&mut self, py: Python<'_>, code: &PyAny) -> PyResult { + let id = code_id(py, code); + if !self.map.contains_key(&id) { + let info = extract_code_info(py, code)?; + self.map.insert(id, info); + } + Ok(id) + } + } + ``` +- Event handler inputs use `PyObject` for the `code` parameter. Borrow to `&PyAny` with `let code = code.bind(py);` when needed, then consult `CodeRegistry`. +- For line numbers: rely on the `LINE` event’s provided `line_number`. If instruction offsets need mapping, call `code.getattr("co_lines")()?.call0()?` and iterate lazily; avoid caching unless necessary. + +## Event Handling + +Each bullet below represents a low-level operation translating a single `sys.monitoring` event into the `runtime_tracing` stream. + +### Control Flow +- **PY_START** – Create a `Function` event for the code object and push a new activation ID onto the thread's stack. + ```rs + pub fn on_py_start(code: PyObject, instruction_offset: i32); + ``` +- **PY_RESUME** – Emit an `Event` log noting resumption and update the current activation's state. + ```rs + pub fn on_py_resume(code: PyObject, instruction_offset: i32); + ``` +- **PY_RETURN** – Pop the activation ID, write a `Return` event with the value (if retrievable), and link to the caller. + ```rs + pub struct ReturnRecord { pub activation: ActivationId, pub value: Option } + pub fn on_py_return(code: PyObject, instruction_offset: i32, retval: *mut PyObject); + ``` +- **PY_YIELD** – Record a `Return` event flagged as a yield and keep the activation on the stack for later resumes. + ```rs + pub fn on_py_yield(code: PyObject, instruction_offset: i32, retval: *mut PyObject); + ``` +- **STOP_ITERATION** – Emit an `Event` indicating iteration exhaustion for the current activation. + ```rs + pub fn on_stop_iteration(code: PyObject, instruction_offset: i32, exception: *mut PyObject); + ``` +- **PY_UNWIND** – Mark the beginning of stack unwinding and note the target handler in an `Event`. + ```rs + pub fn on_py_unwind(code: PyObject, instruction_offset: i32, exception: *mut PyObject); + ``` +- **PY_THROW** – Emit an `Event` describing the thrown value and the target generator/coroutine. + ```rs + pub fn on_py_throw(code: PyObject, instruction_offset: i32, exception: *mut PyObject); + ``` +- **RERAISE** – Log a re-raise event referencing the original exception. + ```rs + pub fn on_reraise(code: PyObject, instruction_offset: i32, exception: *mut PyObject); + ``` + +### Call and Line Tracking +- **CALL** – Record a `Call` event, capturing the `callable` and the first argument if available (`arg0` as provided by `sys.monitoring`), and associate a new activation. + ```rs + pub fn on_call(code: PyObject, instruction_offset: i32, callable: *mut PyObject, arg0: Option<*mut PyObject>) -> ActivationId; + ``` +- **LINE** – Write a `Step` event with current path and line number; ensure the path is registered. + ```rs + pub fn on_line(code: PyObject, line_number: u32); + ``` +- **INSTRUCTION** – Optionally emit a fine-grained `Event` keyed by `instruction_offset`. Opcode names can be derived from the `CodeType` if desired. + ```rs + pub fn on_instruction(code: PyObject, instruction_offset: i32); + ``` +- **JUMP** – Append an `Event` describing the jump target offset for control-flow visualization. + ```rs + pub fn on_jump(code: PyObject, instruction_offset: i32, destination_offset: i32); + ``` +- **BRANCH** – Record an `Event` with `destination_offset`; whether the branch was taken can be inferred by comparing to the fallthrough offset. + ```rs + pub fn on_branch(code: PyObject, instruction_offset: i32, destination_offset: i32); + ``` + _Note_: Current runtime_tracing doesn't support branching events, but instead relies on AST tree-sitter analysis. So for the initial version we will ignore them and can add support after modifications to the tracing format. + +### Exception Lifecycle +- **RAISE** – Emit an `Event` containing exception type and message when raised. + ```rs + pub fn on_raise(code: PyObject, instruction_offset: i32, exception: *mut PyObject); + ``` +- **EXCEPTION_HANDLED** – Log an `Event` marking when an exception is caught. + ```rs + pub fn on_exception_handled(code: PyObject, instruction_offset: i32, exception: *mut PyObject); + ``` + +### C API Boundary +- **C_RETURN** – On returning from a C function, emit a `Return` event tagged as foreign. Note: `sys.monitoring` does not provide the result object for `C_RETURN`. + ```rs + pub fn on_c_return(code: PyObject, instruction_offset: i32, callable: *mut PyObject, arg0: Option<*mut PyObject>); + ``` +- **C_RAISE** – When a C function raises, record an `Event` that a C-level callable raised. Note: `sys.monitoring` does not pass the exception object for `C_RAISE`. + ```rs + pub fn on_c_raise(code: PyObject, instruction_offset: i32, callable: *mut PyObject, arg0: Option<*mut PyObject>); + ``` + +### No Events +- **NO_EVENTS** – Special constant; used only to disable monitoring. No runtime event is produced. + ```rs + pub const NO_EVENTS: EventSet = EventSet(0); + ``` + +## Metadata and File Capture +- Collect the working directory, program name, and arguments and store them in `trace_metadata.json`. + ```rs + pub struct TraceMetadata { pub cwd: PathBuf, pub program: String, pub args: Vec } + pub fn write_metadata(writer: &mut TraceWriter, meta: &TraceMetadata); + ``` +- Track every file path referenced; copy each into the trace directory under `files/`. + ```rs + pub fn track_file(writer: &mut TraceWriter, path: &Path) -> io::Result<()>; + ``` +- Record `VariableName`, `Type`, and `Value` entries when variables are inspected or logged. + ```rs + pub struct VariableRecord { pub name: String, pub ty: TypeId, pub value: ValueRecord } + pub fn record_variable(writer: &mut TraceWriter, rec: VariableRecord); + ``` + +## Value Translation and Recording +- Maintain a type registry that maps Python `type` objects to `runtime_tracing` `Type` entries and assigns new `type_id` values on first encounter. + ```rs + pub type TypeId = u32; + pub type ValueId = u64; + pub enum ValueRecord { Int(i64), Float(f64), Bool(bool), None, Str(String), Raw(Vec), Sequence(Vec), Tuple(Vec), Struct(Vec<(String, ValueRecord)>), Reference(ValueId) } + pub struct TypeRegistry { next: TypeId, map: HashMap<*mut PyTypeObject, TypeId> } + pub fn intern_type(reg: &mut TypeRegistry, ty: *mut PyTypeObject) -> TypeId; + ``` +- Convert primitives (`int`, `float`, `bool`, `None`, `str`) directly to their corresponding `ValueRecord` variants. + ```rs + pub fn encode_primitive(obj: *mut PyObject) -> Option; + ``` +- Encode `bytes` and `bytearray` as `Raw` records containing base64 text to preserve binary data. + ```rs + pub fn encode_bytes(obj: *mut PyObject) -> ValueRecord; + ``` +- Represent lists and sets as `Sequence` records and tuples as `Tuple` records, converting each element recursively. + ```rs + pub fn encode_sequence(iter: &PySequence) -> ValueRecord; + pub fn encode_tuple(tuple: &PyTupleObject) -> ValueRecord; + ``` +- Serialize dictionaries as a `Sequence` of two-element `Tuple` records for key/value pairs to avoid fixed field layouts. + ```rs + pub fn encode_dict(dict: &PyDictObject) -> ValueRecord; + ``` +- For objects with accessible attributes, emit a `Struct` record with sorted field names; fall back to `Raw` with `repr(obj)` when inspection is unsafe. + ```rs + pub fn encode_object(obj: *mut PyObject) -> ValueRecord; + ``` +- Track object identities to detect cycles and reuse `Reference` records with `id(obj)` for repeated structures. + ```rs + pub struct SeenSet { map: HashMap } + pub fn record_reference(seen: &mut SeenSet, obj: *mut PyObject) -> Option; + ``` + +## Shutdown +- On `stop_tracing`, call `sys.monitoring.set_events` with `NO_EVENTS` for the tool ID. + ```rs + pub fn disable_events(tool: &ToolId); + ``` +- Unregister callbacks and free the tool ID with `sys.monitoring.free_tool_id`. + ```rs + pub fn unregister_callbacks(tool: ToolId); + pub fn free_tool_id(tool: ToolId); + ``` +- Close the writer and ensure all buffered events are flushed to disk. + ```rs + pub fn finalize(writer: TraceWriter) -> io::Result<()>; + ``` + +## Current Limitations +- **No structured support for threads or async tasks** – the trace format lacks explicit identifiers for concurrent execution. + Distinguishing events emitted by different Python threads or `asyncio` tasks requires ad hoc `Event` entries, complicating + analysis and preventing downstream tools from reasoning about scheduling. +- **Generic `Event` log** – several `sys.monitoring` notifications like resume, unwind, and branch outcomes have no dedicated + `runtime_tracing` variant. They must be encoded as free‑form `Event` logs, which reduces machine readability and hinders + automation. +- **Heavy value snapshots** – arguments and returns expect full `ValueRecord` structures. Serializing arbitrary Python objects is + expensive and often degrades to lossy string dumps, limiting the visibility of rich runtime state. +- **Append‑only path and function tables** – `runtime_tracing` assumes files and functions are discovered once and never change. + Dynamically generated code (`eval`, REPL snippets) forces extra bookkeeping and cannot update earlier entries, making + dynamic features awkward to trace. +- **No built‑in compression or streaming** – traces are written as monolithic JSON or binary files. Long sessions quickly grow in + size and cannot be streamed to remote consumers without additional tooling. + +## Future Extensions +- Add filtering to enable subsets of events for performance-sensitive scenarios. +- Support streaming traces over a socket for live debugging. diff --git a/design-docs/py-api-001.md b/design-docs/py-api-001.md new file mode 100644 index 0000000..5043824 --- /dev/null +++ b/design-docs/py-api-001.md @@ -0,0 +1,64 @@ +# Python sys.monitoring Tracer API + +## Overview +This document describes the user-facing Python API for the `codetracer` module built on top of `runtime_tracing` and `sys.monitoring`. The API exposes a minimal surface for starting and stopping traces, managing trace sessions, and integrating tracing into scripts or test suites. + +## Module `codetracer` + +### Constants +- `DEFAULT_FORMAT: str = "binary"` +- `TRACE_BINARY: str = "binary"` +- `TRACE_JSON: str = "json"` + +### Session Management +- Start a global trace; returns a `TraceSession`. + ```py + def start(path: str | os.PathLike, *, format: str = DEFAULT_FORMAT, + capture_values: bool = True, source_roots: Iterable[str | os.PathLike] | None = None) -> TraceSession + ``` +- Stop the active trace if any. + ```py + def stop() -> None + ``` +- Query whether tracing is active. + ```py + def is_tracing() -> bool + ``` +- Context manager helper for scoped tracing. + ```py + @contextlib.contextmanager + def trace(path: str | os.PathLike, *, format: str = DEFAULT_FORMAT, + capture_values: bool = True, source_roots: Iterable[str | os.PathLike] | None = None): + ... + ``` +- Flush buffered data to disk without ending the session. + ```py + def flush() -> None + ``` + +## Class `TraceSession` +Represents a live tracing session returned by `start()` and used by the context manager. + +```py +class TraceSession: + path: pathlib.Path + format: str + + def stop(self) -> None: ... + def flush(self) -> None: ... + def __enter__(self) -> TraceSession: ... + def __exit__(self, exc_type, exc, tb) -> None: ... +``` + +## Environment Integration +- Auto-start tracing when `CODETRACER_TRACE` is set; the value is interpreted as the output path. +- When `CODETRACER_FORMAT` is provided, it overrides the default output format. +- `CODETRACER_CAPTURE_VALUES` toggles value recording. + +## Usage Example +```py +import codetracer + +with codetracer.trace("trace.bin"): + run_application() +``` diff --git a/design-docs/test-design-001.md b/design-docs/test-design-001.md new file mode 100644 index 0000000..ed4133a --- /dev/null +++ b/design-docs/test-design-001.md @@ -0,0 +1,60 @@ +# Python sys.monitoring Tracer Test Design + +## Overview +This document outlines a test suite for validating the Python tracer built on `sys.monitoring` and `runtime_tracing`. Each test item corresponds to roughly 1–10 lines of implementation and exercises tracer behavior under typical and edge conditions. + +## Setup +- Establish a temporary directory for trace output and source snapshots. +- Install the tracer module and import helper utilities for running traced Python snippets. +- Provide fixtures that clear the trace buffer and reset global state between tests. + +## Tool Initialization +- Acquire a monitoring tool ID and ensure subsequent calls reuse the same identifier. +- Register callbacks for all enabled events and verify the resulting mask matches the design. +- Unregister callbacks on shutdown and confirm no events fire afterward. + +## Event Recording +### Control Flow Events +- Capture `PY_START` and `PY_RETURN` for a simple script and assert a start/stop pair is recorded. +- Resume and yield events within a generator function produce matching `PY_RESUME`/`PY_YIELD` entries. +- A `PY_THROW` followed by `RERAISE` generates the expected unwind and rethrow sequence. + +### Call Tracking +- Direct function calls record `CALL` and `PY_RETURN` with correct frame identifiers. +- Recursive calls nest frames correctly and unwind in LIFO order. +- Decorated functions ensure wrapper frames are recorded separately from wrapped frames. + +### Line and Branch Coverage +- A loop with conditional branches emits `LINE` events for each executed line and `BRANCH` for each branch taken or skipped. +- Jump statements such as `continue` and `break` produce `JUMP` events with source and destination line numbers. + +### Exception Handling +- Raising and catching an exception emits `RAISE` and `EXCEPTION_HANDLED` events with matching exception IDs. +- An uncaught exception records `RAISE` followed by `PY_UNWIND` and terminates the trace with a `PY_THROW`. + +### C API Boundary +- Calling a built-in like `len` results in `C_CALL` and `C_RETURN` events linked to the Python frame. +- A built-in that raises, such as `int("a")`, generates `C_RAISE` with the translated exception value. + +## Value Translation +- Primitive values (ints, floats, strings, bytes) round-trip through the value registry and appear in the trace as expected. +- Complex collections like lists of dicts are serialized recursively with cycle detection preventing infinite loops. +- Object references without safe representations fall back to `repr` with a stable identifier. + +## Metadata and Source Capture +- The trace writer copies the executing script into the output directory and records its SHA-256 hash. +- Traces include `ProcessMetadata` fields for Python version and platform. + +## Shutdown Behavior +- Normal interpreter exit flushes the trace and closes files without losing events. +- An abrupt shutdown via `os._exit` truncates the trace file but leaves previous events intact. + +## Error and Edge Cases +- Invalid event names in manual callback registration raise a clear `ValueError`. +- Attempting to trace after the writer is closed results in a no-op without raising. +- Large string values exceeding the configured limit are truncated with an explicit marker. + +## Performance and Stress +- Tracing a tight loop of 10⁶ iterations completes within an acceptable time budget. +- Concurrent threads each produce isolated traces with no frame ID collisions. + diff --git a/tests/test_codetracer_api.py b/tests/test_codetracer_api.py new file mode 100644 index 0000000..7713adc --- /dev/null +++ b/tests/test_codetracer_api.py @@ -0,0 +1,46 @@ +import os +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path + +import codetracer + + +class TracingApiTests(unittest.TestCase): + def setUp(self) -> None: # ensure clean state before each test + codetracer.stop() + + def test_start_stop_and_status(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "trace.bin" + session = codetracer.start(trace_path) + self.assertTrue(codetracer.is_tracing()) + self.assertIsInstance(session, codetracer.TraceSession) + self.assertEqual(session.path, trace_path) + self.assertEqual(session.format, codetracer.DEFAULT_FORMAT) + codetracer.flush() # should not raise + session.flush() # same + session.stop() + self.assertFalse(codetracer.is_tracing()) + + def test_context_manager(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "trace.bin" + with codetracer.trace(trace_path) as session: + self.assertTrue(codetracer.is_tracing()) + self.assertIsInstance(session, codetracer.TraceSession) + self.assertFalse(codetracer.is_tracing()) + + def test_environment_auto_start(self) -> None: + script = "import codetracer, sys; sys.stdout.write(str(codetracer.is_tracing()))" + with tempfile.TemporaryDirectory() as tmpdir: + env = os.environ.copy() + env["CODETRACER_TRACE"] = str(Path(tmpdir) / "trace.bin") + out = subprocess.check_output([sys.executable, "-c", script], env=env) + self.assertEqual(out.decode(), "True") + + +if __name__ == "__main__": + unittest.main()