Skip to content

Commit 2c7bcef

Browse files
authored
Value capture (#40)
* We snapshot all variables at each step of the script * We don't step through internal Python code which is not associated with a real Python file, thus reducing significantly the size of the trace.
2 parents 1e7df2d + 00bf623 commit 2c7bcef

File tree

16 files changed

+2272
-323
lines changed

16 files changed

+2272
-323
lines changed

.config/nextest.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[profile.default]
2+
# Hard cap on wall-clock time for each individual test.
3+
test-timeout = "60s"

Justfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ test: cargo-test py-test
3838

3939
# Run Rust unit tests without default features to link Python C library
4040
cargo-test:
41-
uv run cargo test --manifest-path codetracer-python-recorder/Cargo.toml --no-default-features
41+
uv run cargo nextest run --manifest-path codetracer-python-recorder/Cargo.toml --no-default-features
4242

4343
py-test:
4444
uv run --group dev --group test pytest

codetracer-python-recorder/Cargo.lock

Lines changed: 49 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

codetracer-python-recorder/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ env_logger = "0.11"
2525

2626
[dev-dependencies]
2727
pyo3 = { version = "0.25.1", features = ["auto-initialize"] }
28+
tempfile = "3.10"

codetracer-python-recorder/src/code_object.rs

Lines changed: 41 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1+
use dashmap::DashMap;
12
use once_cell::sync::OnceCell;
23
use pyo3::prelude::*;
34
use pyo3::types::PyCode;
4-
use dashmap::DashMap;
55
use std::sync::Arc;
66

77
/// A wrapper around Python `code` objects providing cached access to
@@ -50,73 +50,73 @@ impl CodeObjectWrapper {
5050
}
5151

5252
pub fn filename<'py>(&'py self, py: Python<'py>) -> PyResult<&'py str> {
53-
let value = self.cache.filename.get_or_try_init(|| -> PyResult<String> {
54-
let s: String = self
55-
.as_bound(py)
56-
.getattr("co_filename")?
57-
.extract()?;
58-
Ok(s)
59-
})?;
53+
let value = self
54+
.cache
55+
.filename
56+
.get_or_try_init(|| -> PyResult<String> {
57+
let s: String = self.as_bound(py).getattr("co_filename")?.extract()?;
58+
Ok(s)
59+
})?;
6060
Ok(value.as_str())
6161
}
6262

6363
pub fn qualname<'py>(&'py self, py: Python<'py>) -> PyResult<&'py str> {
64-
let value = self.cache.qualname.get_or_try_init(|| -> PyResult<String> {
65-
let s: String = self
66-
.as_bound(py)
67-
.getattr("co_qualname")?
68-
.extract()?;
69-
Ok(s)
70-
})?;
64+
let value = self
65+
.cache
66+
.qualname
67+
.get_or_try_init(|| -> PyResult<String> {
68+
let s: String = self.as_bound(py).getattr("co_qualname")?.extract()?;
69+
Ok(s)
70+
})?;
7171
Ok(value.as_str())
7272
}
7373

7474
pub fn first_line(&self, py: Python<'_>) -> PyResult<u32> {
75-
let value = *self.cache.firstlineno.get_or_try_init(|| -> PyResult<u32> {
76-
let v: u32 = self
77-
.as_bound(py)
78-
.getattr("co_firstlineno")?
79-
.extract()?;
80-
Ok(v)
81-
})?;
75+
let value = *self
76+
.cache
77+
.firstlineno
78+
.get_or_try_init(|| -> PyResult<u32> {
79+
let v: u32 = self.as_bound(py).getattr("co_firstlineno")?.extract()?;
80+
Ok(v)
81+
})?;
8282
Ok(value)
8383
}
8484

8585
pub fn arg_count(&self, py: Python<'_>) -> PyResult<u16> {
8686
let value = *self.cache.argcount.get_or_try_init(|| -> PyResult<u16> {
87-
let v: u16 = self
88-
.as_bound(py)
89-
.getattr("co_argcount")?
90-
.extract()?;
87+
let v: u16 = self.as_bound(py).getattr("co_argcount")?.extract()?;
9188
Ok(v)
9289
})?;
9390
Ok(value)
9491
}
9592

9693
pub fn flags(&self, py: Python<'_>) -> PyResult<u32> {
9794
let value = *self.cache.flags.get_or_try_init(|| -> PyResult<u32> {
98-
let v: u32 = self
99-
.as_bound(py)
100-
.getattr("co_flags")?
101-
.extract()?;
95+
let v: u32 = self.as_bound(py).getattr("co_flags")?.extract()?;
10296
Ok(v)
10397
})?;
10498
Ok(value)
10599
}
106100

107101
fn lines<'py>(&'py self, py: Python<'py>) -> PyResult<&'py [LineEntry]> {
108-
let vec = self.cache.lines.get_or_try_init(|| -> PyResult<Vec<LineEntry>> {
109-
let mut entries = Vec::new();
110-
let iter = self.as_bound(py).call_method0("co_lines")?;
111-
let iter = iter.try_iter()?;
112-
for item in iter {
113-
let (start, _end, line): (u32, u32, Option<u32>) = item?.extract()?;
114-
if let Some(line) = line {
115-
entries.push(LineEntry { offset: start, line });
102+
let vec = self
103+
.cache
104+
.lines
105+
.get_or_try_init(|| -> PyResult<Vec<LineEntry>> {
106+
let mut entries = Vec::new();
107+
let iter = self.as_bound(py).call_method0("co_lines")?;
108+
let iter = iter.try_iter()?;
109+
for item in iter {
110+
let (start, _end, line): (u32, u32, Option<u32>) = item?.extract()?;
111+
if let Some(line) = line {
112+
entries.push(LineEntry {
113+
offset: start,
114+
line,
115+
});
116+
}
116117
}
117-
}
118-
Ok(entries)
119-
})?;
118+
Ok(entries)
119+
})?;
120120
Ok(vec.as_slice())
121121
}
122122

@@ -161,4 +161,3 @@ impl CodeObjectRegistry {
161161
self.map.clear();
162162
}
163163
}
164-

codetracer-python-recorder/src/lib.rs

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
1+
//! Runtime tracing module backed by PyO3.
2+
//!
3+
//! Tracer implementations must return `CallbackResult` from every callback so they can
4+
//! signal when CPython should disable further monitoring for a location by propagating
5+
//! the `sys.monitoring.DISABLE` sentinel.
6+
17
use std::fs;
2-
use std::path::{PathBuf, Path};
8+
use std::path::Path;
39
use std::sync::atomic::{AtomicBool, Ordering};
410
use std::sync::Once;
511

612
use pyo3::exceptions::PyRuntimeError;
713
use pyo3::prelude::*;
8-
use std::fmt;
9-
1014
pub mod code_object;
11-
pub mod tracer;
1215
mod runtime_tracer;
16+
pub mod tracer;
1317
pub use crate::code_object::{CodeObjectRegistry, CodeObjectWrapper};
14-
pub use crate::tracer::{install_tracer, uninstall_tracer, EventSet, Tracer};
18+
pub use crate::tracer::{
19+
install_tracer, uninstall_tracer, CallbackOutcome, CallbackResult, EventSet, Tracer,
20+
};
1521

1622
/// Global flag tracking whether tracing is active.
1723
static ACTIVE: AtomicBool = AtomicBool::new(false);
@@ -25,20 +31,14 @@ fn init_rust_logging_with_default(default_filter: &str) {
2531
let env = env_logger::Env::default().default_filter_or(default_filter);
2632
// Use a compact format with timestamps and targets to aid debugging.
2733
let mut builder = env_logger::Builder::from_env(env);
28-
builder
29-
.format_timestamp_micros()
30-
.format_target(true);
34+
builder.format_timestamp_micros().format_target(true);
3135
let _ = builder.try_init();
3236
});
3337
}
3438

3539
/// Start tracing using sys.monitoring and runtime_tracing writer.
3640
#[pyfunction]
37-
fn start_tracing(
38-
path: &str,
39-
format: &str,
40-
activation_path: Option<&str>,
41-
) -> PyResult<()> {
41+
fn start_tracing(path: &str, format: &str, activation_path: Option<&str>) -> PyResult<()> {
4242
// Ensure logging is ready before any tracer logs might be emitted.
4343
// Default only our crate to debug to avoid excessive verbosity from deps.
4444
init_rust_logging_with_default("codetracer_python_recorder=debug");
@@ -49,26 +49,31 @@ fn start_tracing(
4949
// Interpret `path` as a directory where trace files will be written.
5050
let out_dir = Path::new(path);
5151
if out_dir.exists() && !out_dir.is_dir() {
52-
return Err(PyRuntimeError::new_err("trace path exists and is not a directory"));
52+
return Err(PyRuntimeError::new_err(
53+
"trace path exists and is not a directory",
54+
));
5355
}
5456
if !out_dir.exists() {
5557
// Best-effort create the directory tree
56-
fs::create_dir_all(&out_dir)
57-
.map_err(|e| PyRuntimeError::new_err(format!("failed to create trace directory: {}", e)))?;
58+
fs::create_dir_all(&out_dir).map_err(|e| {
59+
PyRuntimeError::new_err(format!("failed to create trace directory: {}", e))
60+
})?;
5861
}
5962

6063
// Map format string to enum
6164
let fmt = match format.to_lowercase().as_str() {
6265
"json" => runtime_tracing::TraceEventsFileFormat::Json,
6366
// Use BinaryV0 for "binary" to avoid streaming writer here.
64-
"binary" | "binaryv0" | "binary_v0" | "b0" => runtime_tracing::TraceEventsFileFormat::BinaryV0,
67+
"binary" | "binaryv0" | "binary_v0" | "b0" => {
68+
runtime_tracing::TraceEventsFileFormat::BinaryV0
69+
}
6570
//TODO AI! We need to assert! that the format is among the known values.
6671
other => {
6772
eprintln!("Unknown format '{}', defaulting to binary (v0)", other);
6873
runtime_tracing::TraceEventsFileFormat::BinaryV0
6974
}
7075
};
71-
76+
7277
// Build output file paths inside the directory.
7378
let (events_path, meta_path, paths_path) = match fmt {
7479
runtime_tracing::TraceEventsFileFormat::Json => (
@@ -90,17 +95,10 @@ fn start_tracing(
9095
// Program and args: keep minimal; Python-side API stores full session info if needed
9196
let sys = py.import("sys")?;
9297
let argv = sys.getattr("argv")?;
93-
let program: String = argv
94-
.get_item(0)?
95-
.extract::<String>()?;
98+
let program: String = argv.get_item(0)?.extract::<String>()?;
9699
//TODO: Error-handling. What to do if argv is empty? Does this ever happen?
97100

98-
let mut tracer = runtime_tracer::RuntimeTracer::new(
99-
&program,
100-
&[],
101-
fmt,
102-
activation_path,
103-
);
101+
let mut tracer = runtime_tracer::RuntimeTracer::new(&program, &[], fmt, activation_path);
104102

105103
// Start location: prefer activation path, otherwise best-effort argv[0]
106104
let start_path: &Path = activation_path.unwrap_or(Path::new(&program));

0 commit comments

Comments
 (0)