diff --git a/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs b/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs index d8c0f53..8560f66 100644 --- a/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs +++ b/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs @@ -179,7 +179,7 @@ mod tests { use std::collections::BTreeMap; use std::ffi::CString; use std::fs; - use std::path::Path; + use std::path::{Path, PathBuf}; use std::sync::Arc; use std::thread; @@ -716,6 +716,30 @@ result = compute()\n" }); } + #[pyfunction] + fn capture_py_start(py: Python<'_>, code: Bound<'_, PyCode>, offset: i32) -> PyResult<()> { + ffi::wrap_pyfunction("test_capture_py_start", || { + ACTIVE_TRACER.with(|cell| -> PyResult<()> { + let ptr = cell.get(); + if ptr.is_null() { + panic!("No active RuntimeTracer for capture_py_start"); + } + unsafe { + let tracer = &mut *ptr; + let wrapper = CodeObjectWrapper::new(py, &code); + match tracer.on_py_start(py, &wrapper, offset) { + Ok(outcome) => { + LAST_OUTCOME.with(|cell| cell.set(Some(outcome))); + Ok(()) + } + Err(err) => Err(err), + } + } + })?; + Ok(()) + }) + } + #[pyfunction] fn capture_line(py: Python<'_>, code: Bound<'_, PyCode>, lineno: u32) -> PyResult<()> { ffi::wrap_pyfunction("test_capture_line", || { @@ -770,7 +794,7 @@ result = compute()\n" const PRELUDE: &str = r#" import inspect -from test_tracer import capture_line, capture_return_event +from test_tracer import capture_line, capture_return_event, capture_py_start def snapshot(line=None): frame = inspect.currentframe().f_back @@ -786,6 +810,10 @@ def emit_return(value): frame = inspect.currentframe().f_back capture_return_event(frame.f_code, value) return value + +def start_call(): + frame = inspect.currentframe().f_back + capture_py_start(frame.f_code, frame.f_lasti) "#; #[derive(Debug, Clone, PartialEq)] @@ -862,9 +890,14 @@ def emit_return(value): fn ensure_test_module(py: Python<'_>) { let module = PyModule::new(py, "test_tracer").expect("create module"); + module + .add_function( + wrap_pyfunction!(capture_py_start, &module).expect("wrap capture_py_start"), + ) + .expect("add py_start capture function"); module .add_function(wrap_pyfunction!(capture_line, &module).expect("wrap capture_line")) - .expect("add function"); + .expect("add line capture function"); module .add_function( wrap_pyfunction!(capture_return_event, &module).expect("wrap capture_return_event"), @@ -906,6 +939,25 @@ def emit_return(value): fs::write(path, contents.trim_start()).expect("write filter"); } + fn install_drop_everything_filter(project_root: &Path) -> PathBuf { + let filters_dir = project_root.join(".codetracer"); + fs::create_dir(&filters_dir).expect("create .codetracer"); + let drop_filter_path = filters_dir.join("drop-filter.toml"); + write_filter( + &drop_filter_path, + r#" + [meta] + name = "drop-all" + version = 1 + + [scope] + default_exec = "trace" + default_value_action = "drop" + "#, + ); + drop_filter_path + } + #[test] fn trace_filter_redacts_values() { Python::with_gil(|py| { @@ -1125,21 +1177,7 @@ sensitive("s3cr3t") let project = tempfile::tempdir().expect("project dir"); let project_root = project.path(); - let filters_dir = project_root.join(".codetracer"); - fs::create_dir(&filters_dir).expect("create .codetracer"); - let drop_filter_path = filters_dir.join("drop-filter.toml"); - write_filter( - &drop_filter_path, - r#" - [meta] - name = "drop-all" - version = 1 - - [scope] - default_exec = "trace" - default_value_action = "drop" - "#, - ); + let drop_filter_path = install_drop_everything_filter(project_root); let config = TraceFilterConfig::from_inline_and_paths( &[("builtin-default", BUILTIN_TRACE_FILTER)], @@ -1186,11 +1224,13 @@ dropper() } let mut variable_names: Vec = Vec::new(); - let mut return_events = 0usize; + let mut return_values: Vec = Vec::new(); for event in &tracer.writer.events { match event { TraceLowLevelEvent::VariableName(name) => variable_names.push(name.clone()), - TraceLowLevelEvent::Return(_) => return_events += 1, + TraceLowLevelEvent::Return(record) => { + return_values.push(record.return_value.clone()) + } _ => {} } } @@ -1199,9 +1239,89 @@ dropper() "expected no variables captured, found {:?}", variable_names ); + assert_eq!(return_values.len(), 1, "return event should remain balanced"); + match &return_values[0] { + ValueRecord::Error { msg, .. } => assert_eq!(msg, ""), + other => panic!("expected dropped sentinel return value, got {other:?}"), + } + }); + } + + #[test] + fn drop_filters_keep_call_return_pairs_balanced() { + Python::with_gil(|py| { + ensure_test_module(py); + + let project = tempfile::tempdir().expect("project dir"); + let project_root = project.path(); + let drop_filter_path = install_drop_everything_filter(project_root); + + let config = TraceFilterConfig::from_inline_and_paths( + &[("builtin-default", BUILTIN_TRACE_FILTER)], + &[drop_filter_path.clone()], + ) + .expect("load filter chain"); + let engine = Arc::new(TraceFilterEngine::new(config)); + + let app_dir = project_root.join("app"); + fs::create_dir_all(&app_dir).expect("create app dir"); + let script_path = app_dir.join("classes.py"); + let body = r#" +def initializer(label): + start_call() + return emit_return(label.upper()) + +class Alpha: + TOKEN = initializer("alpha") + +class Beta: + TOKEN = initializer("beta") + +class Gamma: + TOKEN = initializer("gamma") + +initializer("omega") +"#; + let script = format!("{PRELUDE}\n{body}", PRELUDE = PRELUDE, body = body); + fs::write(&script_path, script).expect("write script"); + + let mut tracer = RuntimeTracer::new( + script_path.to_string_lossy().as_ref(), + &[], + TraceEventsFileFormat::Json, + None, + Some(engine), + ); + + { + let _guard = ScopedTracer::new(&mut tracer); + LAST_OUTCOME.with(|cell| cell.set(None)); + let run_code = format!( + "import runpy, sys\nsys.path.insert(0, r\"{}\")\nrunpy.run_path(r\"{}\")", + project_root.display(), + script_path.display() + ); + let run_code_c = CString::new(run_code).expect("script contains nul byte"); + py.run(run_code_c.as_c_str(), None, None) + .expect("execute classes script"); + } + + let mut call_count = 0usize; + let mut return_count = 0usize; + for event in &tracer.writer.events { + match event { + TraceLowLevelEvent::Call(_) => call_count += 1, + TraceLowLevelEvent::Return(_) => return_count += 1, + _ => {} + } + } + assert!( + call_count >= 4, + "expected at least four call events, saw {call_count}" + ); assert_eq!( - return_events, 0, - "return value should be dropped instead of recorded" + call_count, return_count, + "drop filters must keep call/return pairs balanced" ); }); } diff --git a/codetracer-python-recorder/src/runtime/value_capture.rs b/codetracer-python-recorder/src/runtime/value_capture.rs index 650a79d..dd4805b 100644 --- a/codetracer-python-recorder/src/runtime/value_capture.rs +++ b/codetracer-python-recorder/src/runtime/value_capture.rs @@ -19,6 +19,7 @@ use crate::trace_filter::config::ValueAction; use crate::trace_filter::engine::{ValueKind, ValuePolicy}; const REDACTED_SENTINEL: &str = ""; +const DROPPED_SENTINEL: &str = ""; const VALUE_KIND_COUNT: usize = 5; @@ -54,6 +55,14 @@ fn redacted_value(writer: &mut NonStreamingTraceWriter) -> ValueRecord { } } +fn dropped_value(writer: &mut NonStreamingTraceWriter) -> ValueRecord { + let ty = TraceWriter::ensure_type_id(writer, TypeKind::Raw, "Dropped"); + ValueRecord::Error { + msg: DROPPED_SENTINEL.to_string(), + type_id: ty, + } +} + fn record_redaction(kind: ValueKind, candidate: &str, telemetry: Option<&mut ValueFilterStats>) { if let Some(stats) = telemetry { stats.record_redaction(kind); @@ -322,8 +331,7 @@ pub fn record_return_value( ValueKind::Return, name, telemetry.as_deref_mut(), - ); - if let Some(encoded) = encoded { - TraceWriter::register_return(writer, encoded); - } + ) + .unwrap_or_else(|| dropped_value(writer)); + TraceWriter::register_return(writer, encoded); } diff --git a/design-docs/US0028 - Configurable Python trace filters.md b/design-docs/US0028 - Configurable Python trace filters.md index 2179452..49fe267 100644 --- a/design-docs/US0028 - Configurable Python trace filters.md +++ b/design-docs/US0028 - Configurable Python trace filters.md @@ -159,7 +159,7 @@ Callers validate whether a parsed selector is legal in the current context (e.g. 1. Initialize the execution policy to `scope.default_exec` (or the inherited value when composing filters). 2. Walk `scope.rules` from top to bottom. Each rule whose selector matches the current frame updates the execution policy (`trace` vs `skip`) and the active default for value capture. Later matching rules replace earlier decisions because the traversal never rewinds. 3. For value capture inside a scope, start from the applicable default (`scope.default_value_action`, overridden by the scope rule’s `value_default` when provided). -4. Apply each `value_patterns` entry in order. The first pattern whose selector matches the variable or payload sets the decision to `allow` (serialize), `redact` (replace with ``), or `drop` (omit entirely) and stops further evaluation for that value. +4. Apply each `value_patterns` entry in order. The first pattern whose selector matches the variable or payload sets the decision to `allow` (serialize), `redact` (replace with ``), or `drop` (omit entirely; return-value drops still emit a structural return edge with a `` placeholder) and stops further evaluation for that value. 5. If no pattern matches, fall back to the current default value action. ## Sample Filters (TOML) diff --git a/design-docs/prototypes/module_import_events.py b/design-docs/prototypes/module_import_events.py new file mode 100644 index 0000000..665c474 --- /dev/null +++ b/design-docs/prototypes/module_import_events.py @@ -0,0 +1,302 @@ +#!/usr/bin/env python3 +""" +Prototype helper that shows which `sys.monitoring` events fire while a module +import runs to completion. The goal is to understand which callbacks the Rust +runtime should listen to in order to balance `` start and finish events. + +Usage examples: + + # Inspect a concrete file that is not on sys.path. + python design-docs/prototypes/module_import_events.py \\ + --module-path /tmp/demo_module.py + + # Inspect an importable module that already lives on sys.path. + python design-docs/prototypes/module_import_events.py \\ + --module json + +Pass ``--include-lines`` to log `LINE` events as well, and ``--show-all`` to dump +events for every file that executed during the import (not just the target). +""" + +from __future__ import annotations + +import argparse +import contextlib +import importlib +import importlib.util +import itertools +import sys +from dataclasses import dataclass +from pathlib import Path +from types import CodeType, ModuleType +from typing import Dict, Iterable, Iterator, List, Optional, Tuple + + +_MODULE_ALIAS_COUNTER = itertools.count() + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Record sys.monitoring events for a module import." + ) + target = parser.add_mutually_exclusive_group(required=True) + target.add_argument( + "--module", + help="Importable module name (e.g., 'json'). Must already be on sys.path.", + ) + target.add_argument( + "--module-path", + type=Path, + help="Path to a Python file to import via spec_from_file_location.", + ) + parser.add_argument( + "--alias", + help=( + "Optional module alias when using --module-path. " + "Defaults to an auto-generated unique name." + ), + ) + parser.add_argument( + "--include-lines", + action="store_true", + help="Record LINE events in addition to start/finish callbacks.", + ) + parser.add_argument( + "--show-all", + action="store_true", + help="Print every recorded event, not just the ones for the target module.", + ) + return parser.parse_args() + + +@dataclass +class EventRecord: + index: int + event: str + code_name: str + filename: str + detail: str + + +class MonitoringProbe: + """Capture sys.monitoring callbacks and store them as EventRecord entries.""" + + def __init__(self) -> None: + self._records: List[EventRecord] = [] + self._counter = itertools.count() + + def _add(self, event: str, code: CodeType, detail: str) -> None: + self._records.append( + EventRecord( + index=next(self._counter), + event=event, + code_name=code.co_name, + filename=code.co_filename, + detail=detail, + ) + ) + + def on_py_start(self, code: CodeType, offset: int) -> None: + self._add("PY_START", code, f"offset={offset}") + + def on_py_return(self, code: CodeType, offset: int, retval: object) -> None: + self._add( + "PY_RETURN", + code, + f"offset={offset}, retval={describe_value(retval)}", + ) + + def on_py_unwind(self, code: CodeType, offset: int, exc: object) -> None: + self._add( + "PY_UNWIND", + code, + f"offset={offset}, exception={describe_value(exc)}", + ) + + def on_py_yield(self, code: CodeType, offset: int, value: object) -> None: + self._add( + "PY_YIELD", + code, + f"offset={offset}, yielded={describe_value(value)}", + ) + + def on_py_resume(self, code: CodeType, offset: int) -> None: + self._add("PY_RESUME", code, f"offset={offset}") + + def on_py_throw(self, code: CodeType, offset: int, exc: object) -> None: + self._add( + "PY_THROW", + code, + f"offset={offset}, exception={describe_value(exc)}", + ) + + def on_line(self, code: CodeType, line: int) -> None: + self._add("LINE", code, f"line={line}") + + @property + def records(self) -> List[EventRecord]: + return list(self._records) + + def records_for(self, focus: Optional[Path]) -> List[EventRecord]: + if focus is None: + return list(self._records) + focus_norm = normalize_path(focus) + matches: List[EventRecord] = [] + for record in self._records: + filename = record.filename + if filename.startswith("<") and filename.endswith(">"): + continue + if normalize_path(filename) == focus_norm: + matches.append(record) + return matches + + +def describe_value(value: object, limit: int = 80) -> str: + """Return a safe, trimmed repr for value payloads recorded in callbacks.""" + try: + text = repr(value) + except Exception as exc: # pragma: no cover - prototyping aid + text = f"" + if len(text) > limit: + text = text[: limit - 3] + "..." + return f"{type(value).__name__}={text}" + + +def normalize_path(value: Path | str) -> str: + raw = str(value) + if raw.startswith("<") and raw.endswith(">"): + return raw + return str(Path(raw).resolve()) + + +def acquire_tool_id(name: str) -> Tuple[int, object]: + """Reserve a monitoring tool id, trying the 6 CPython slots.""" + monitoring = sys.monitoring + for candidate in range(6): + try: + monitoring.use_tool_id(candidate, name) + except (RuntimeError, ValueError): + continue + return candidate, monitoring + raise RuntimeError("all sys.monitoring tool ids are already in use") + + +@contextlib.contextmanager +def monitor_events(probe: MonitoringProbe, include_lines: bool) -> Iterator[None]: + tool_id, monitoring = acquire_tool_id("module-import-events") + events = monitoring.events + callbacks: Dict[int, object] = { + events.PY_START: probe.on_py_start, + events.PY_RETURN: probe.on_py_return, + events.PY_UNWIND: probe.on_py_unwind, + events.PY_YIELD: probe.on_py_yield, + events.PY_RESUME: probe.on_py_resume, + events.PY_THROW: probe.on_py_throw, + } + if include_lines: + callbacks[events.LINE] = probe.on_line + + mask = 0 + for event_id, handler in callbacks.items(): + monitoring.register_callback(tool_id, event_id, handler) + mask |= event_id + monitoring.set_events(tool_id, mask) + + try: + yield + finally: + monitoring.set_events(tool_id, 0) + for event_id in callbacks: + monitoring.register_callback(tool_id, event_id, None) + monitoring.free_tool_id(tool_id) + + +def import_target(args: argparse.Namespace) -> Tuple[ModuleType, Optional[Path]]: + if args.module: + module = importlib.import_module(args.module) + file_attr = getattr(module, "__file__", None) + module_path = Path(file_attr).resolve() if file_attr else None + return module, module_path + + assert args.module_path is not None + module_path = args.module_path.resolve() + module_name = args.alias or f"import_probe_{module_path.stem}_{next(_MODULE_ALIAS_COUNTER)}" + spec = importlib.util.spec_from_file_location(module_name, module_path) + if spec is None or spec.loader is None: + raise RuntimeError(f"failed to load module spec for {module_path}") + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module, module_path + + +def print_report( + probe: MonitoringProbe, focus: Optional[Path], show_all: bool, include_lines: bool +) -> None: + total = len(probe.records) + section = "=" * 72 + print(f"\n{section}") + print("Module import monitoring session") + print(f"Recorded events: {total}") + print(f"Line events included: {include_lines}") + if focus: + print(f"Target path: {focus}") + print(section) + + focus_records = probe.records_for(focus) + title = ( + "Events for target module" + if focus + else "Events captured during import" + ) + print(f"\n{title}:") + _print_records(focus_records if focus else probe.records) + + if focus and show_all: + print("\nAll recorded events (including other modules):") + _print_records(probe.records) + elif focus and len(focus_records) < total: + remainder = total - len(focus_records) + print( + f"\nNOTE: {remainder} additional events came from other files " + "during this import. Use --show-all to display them." + ) + + +def _print_records(records: Iterable[EventRecord]) -> None: + records = list(records) + if not records: + print(" (no events)") + return + for record in records: + path_display = record.filename + if len(path_display) > 60: + path_display = "..." + path_display[-57:] + print( + f" #{record.index:03d} {record.event:<10} " + f"{record.code_name:<20} {record.detail} [{path_display}]" + ) + + +def main() -> None: + args = parse_args() + probe = MonitoringProbe() + focus_path: Optional[Path] = args.module_path.resolve() if args.module_path else None + import_error: Optional[BaseException] = None + + with monitor_events(probe, include_lines=args.include_lines): + try: + _, loaded_path = import_target(args) + if loaded_path is not None: + focus_path = loaded_path + except BaseException as exc: # keep prototype noise visible + import_error = exc + + print_report(probe, focus_path, args.show_all, args.include_lines) + + if import_error: + raise import_error + + +if __name__ == "__main__": + main() diff --git a/docs/onboarding/trace-filters.md b/docs/onboarding/trace-filters.md index 6337caa..02d5780 100644 --- a/docs/onboarding/trace-filters.md +++ b/docs/onboarding/trace-filters.md @@ -17,6 +17,7 @@ - Optional `value_default` override (`"allow"`/`"redact"`/`"drop"`). - Optional `reason` string stored in telemetry. - `[[scope.rules.value_patterns]]` entries that refine value capture by selector. +- `drop` removes the payload for locals/globals/args/attrs entirely. When a `ret:*` selector resolves to `drop`, the recorder still emits the structural return event with a `` placeholder so call/return pairs remain balanced. - Example: ```toml [meta] diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/examples/__init__.py @@ -0,0 +1 @@ + diff --git a/examples/abc.py b/examples/abc.py new file mode 100644 index 0000000..02aca1c --- /dev/null +++ b/examples/abc.py @@ -0,0 +1 @@ +print("HELLO") diff --git a/examples/abcd.py b/examples/abcd.py new file mode 100644 index 0000000..b2d1929 --- /dev/null +++ b/examples/abcd.py @@ -0,0 +1,17 @@ +print("HELLO") + +def g(): + print("GG") + +def h(): + print("HH") + g() + +h() + + +class B: + def __init__(self): + print("HOOO") + +print("DONE") diff --git a/examples/hello.py b/examples/hello.py index c688b47..2a3fa08 100644 --- a/examples/hello.py +++ b/examples/hello.py @@ -1,8 +1,13 @@ +print("A") +import abcd +print("B") def f(a, b): if a>0: - return f(a-1, b+1) + out = f(a-1, b+1) + return out else: return b print(f(5,1)) +print("BYE")