Skip to content

Commit 64dd247

Browse files
committed
refactor: Step 6 - Clean-up and follow-up tasks
- Remove obsolete comments (e.g., `//TODO AI!` placeholders) or move them into GitHub issues. - Update documentation and diagrams to reflect the new module tree. - Re-run `just test` and linting for both Rust and Python components; capture trace artifacts to confirm unchanged output format.
1 parent 68448f5 commit 64dd247

File tree

10 files changed

+85
-30
lines changed

10 files changed

+85
-30
lines changed

codetracer-python-recorder/src/code_object.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Shared code-object caching utilities for sys.monitoring callbacks.
2+
13
use dashmap::DashMap;
24
use once_cell::sync::OnceCell;
35
use pyo3::prelude::*;
@@ -148,7 +150,8 @@ impl CodeObjectRegistry {
148150
self.map
149151
.entry(id)
150152
.or_insert_with(|| Arc::new(CodeObjectWrapper::new(py, code)))
151-
.clone() //AI? Why do we need to clone here?
153+
// Clone the `Arc` so each caller receives its own reference-counted handle.
154+
.clone()
152155
}
153156

154157
/// Remove the wrapper for a given code id, if present.

codetracer-python-recorder/src/logging.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Process-wide logging helpers shared by the PyO3 entry points and tests.
2+
13
use std::sync::Once;
24

35
/// Initialise the process-wide Rust logger with a default filter.

codetracer-python-recorder/src/monitoring/mod.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Helpers around CPython's `sys.monitoring` API.
2+
13
use pyo3::prelude::*;
24
use pyo3::types::PyCFunction;
35
use std::sync::OnceLock;
@@ -8,11 +10,13 @@ pub use tracer::{flush_installed_tracer, install_tracer, uninstall_tracer, Trace
810

911
const MONITORING_TOOL_NAME: &str = "codetracer";
1012

13+
/// Identifier for a monitoring event bit mask.
1114
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1215
#[repr(transparent)]
1316
pub struct EventId(pub i32);
1417

1518
#[allow(non_snake_case)]
19+
/// Structured access to CPython's `sys.monitoring.events` values.
1620
#[derive(Clone, Copy, Debug)]
1721
pub struct MonitoringEvents {
1822
pub BRANCH: EventId,
@@ -34,16 +38,19 @@ pub struct MonitoringEvents {
3438
//pub STOP_ITERATION: EventId, //See comment in Tracer trait
3539
}
3640

41+
/// Wrapper returned by `sys.monitoring.use_tool_id`.
3742
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
3843
pub struct ToolId {
3944
pub id: u8,
4045
}
4146

4247
pub type CallbackFn<'py> = Bound<'py, PyCFunction>;
4348

49+
/// Bit-set describing which events are enabled for a tool.
4450
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
4551
pub struct EventSet(pub i32);
4652

53+
/// Convenience constant representing an empty event mask.
4754
pub const NO_EVENTS: EventSet = EventSet(0);
4855

4956
/// Outcome returned by tracer callbacks to control CPython monitoring.
@@ -62,22 +69,26 @@ pub type CallbackResult = PyResult<CallbackOutcome>;
6269
static MONITORING_EVENTS: OnceLock<MonitoringEvents> = OnceLock::new();
6370

6471
impl EventSet {
72+
/// Create an empty event mask.
6573
pub const fn empty() -> Self {
6674
NO_EVENTS
6775
}
6876

77+
/// Return true when the set includes the provided event identifier.
6978
pub fn contains(&self, ev: &EventId) -> bool {
7079
(self.0 & ev.0) != 0
7180
}
7281
}
7382

83+
/// Acquire a monitoring tool id for Codetracer.
7484
pub fn acquire_tool_id(py: Python<'_>) -> PyResult<ToolId> {
7585
let monitoring = py.import("sys")?.getattr("monitoring")?;
7686
const FALLBACK_ID: u8 = 5;
7787
monitoring.call_method1("use_tool_id", (FALLBACK_ID, MONITORING_TOOL_NAME))?;
7888
Ok(ToolId { id: FALLBACK_ID })
7989
}
8090

91+
/// Load monitoring event identifiers from CPython.
8192
pub fn load_monitoring_events(py: Python<'_>) -> PyResult<MonitoringEvents> {
8293
let monitoring = py.import("sys")?.getattr("monitoring")?;
8394
let events = monitoring.getattr("events")?;
@@ -102,6 +113,7 @@ pub fn load_monitoring_events(py: Python<'_>) -> PyResult<MonitoringEvents> {
102113
})
103114
}
104115

116+
/// Cache and return the monitoring event structure for the current interpreter.
105117
pub fn monitoring_events(py: Python<'_>) -> PyResult<&'static MonitoringEvents> {
106118
if let Some(ev) = MONITORING_EVENTS.get() {
107119
return Ok(ev);
@@ -111,6 +123,7 @@ pub fn monitoring_events(py: Python<'_>) -> PyResult<&'static MonitoringEvents>
111123
Ok(MONITORING_EVENTS.get().unwrap())
112124
}
113125

126+
/// Register or unregister a single callback for the provided event.
114127
pub fn register_callback(
115128
py: Python<'_>,
116129
tool: &ToolId,
@@ -129,6 +142,7 @@ pub fn register_callback(
129142
Ok(())
130143
}
131144

145+
/// Combine multiple event ids into a single bit mask.
132146
pub fn events_union(ids: &[EventId]) -> EventSet {
133147
let mut bits = 0i32;
134148
for id in ids {
@@ -137,12 +151,14 @@ pub fn events_union(ids: &[EventId]) -> EventSet {
137151
EventSet(bits)
138152
}
139153

154+
/// Enable events for the given tool id.
140155
pub fn set_events(py: Python<'_>, tool: &ToolId, set: EventSet) -> PyResult<()> {
141156
let monitoring = py.import("sys")?.getattr("monitoring")?;
142157
monitoring.call_method1("set_events", (tool.id, set.0))?;
143158
Ok(())
144159
}
145160

161+
/// Release a previously acquired monitoring tool id.
146162
pub fn free_tool_id(py: Python<'_>, tool: &ToolId) -> PyResult<()> {
147163
let monitoring = py.import("sys")?.getattr("monitoring")?;
148164
monitoring.call_method1("free_tool_id", (tool.id,))?;

codetracer-python-recorder/src/monitoring/tracer.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Tracer trait and sys.monitoring callback plumbing.
2+
13
use std::any::Any;
24
use std::sync::Mutex;
35

codetracer-python-recorder/src/runtime/activation.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Activation gating for the runtime tracer.
2+
13
use std::path::{Path, PathBuf};
24

35
use pyo3::Python;

codetracer-python-recorder/src/runtime/mod.rs

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Runtime tracer facade translating sys.monitoring callbacks into `runtime_tracing` records.
2+
13
mod activation;
24
mod output_paths;
35
mod value_encoder;
@@ -7,7 +9,7 @@ pub use output_paths::TraceOutputPaths;
79
use activation::ActivationController;
810
use value_encoder::encode_value;
911

10-
use std::collections::HashSet;
12+
use std::collections::{hash_map::Entry, HashMap, HashSet};
1113
use std::path::{Path, PathBuf};
1214

1315
use pyo3::prelude::*;
@@ -37,6 +39,7 @@ pub struct RuntimeTracer {
3739
activation: ActivationController,
3840
program_path: PathBuf,
3941
ignored_code_ids: HashSet<usize>,
42+
function_ids: HashMap<usize, runtime_tracing::FunctionId>,
4043
}
4144

4245
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
@@ -67,6 +70,7 @@ impl RuntimeTracer {
6770
activation,
6871
program_path,
6972
ignored_code_ids: HashSet::new(),
73+
function_ids: HashMap::new(),
7074
}
7175
}
7276

@@ -90,17 +94,21 @@ impl RuntimeTracer {
9094
py: Python<'_>,
9195
code: &CodeObjectWrapper,
9296
) -> PyResult<runtime_tracing::FunctionId> {
93-
//TODO AI! current runtime tracer logic expects that `name` is unique and is used as a key for the function.
94-
//This is wrong. We need to write a test that exposes this issue
95-
let name = code.qualname(py)?;
96-
let filename = code.filename(py)?;
97-
let first_line = code.first_line(py)?;
98-
Ok(TraceWriter::ensure_function_id(
99-
&mut self.writer,
100-
name,
101-
Path::new(filename),
102-
Line(first_line as i64),
103-
))
97+
match self.function_ids.entry(code.id()) {
98+
Entry::Occupied(entry) => Ok(*entry.get()),
99+
Entry::Vacant(slot) => {
100+
let name = code.qualname(py)?;
101+
let filename = code.filename(py)?;
102+
let first_line = code.first_line(py)?;
103+
let function_id = TraceWriter::ensure_function_id(
104+
&mut self.writer,
105+
name,
106+
Path::new(filename),
107+
Line(first_line as i64),
108+
);
109+
Ok(*slot.insert(function_id))
110+
}
111+
}
104112
}
105113

106114
fn should_trace_code(&mut self, py: Python<'_>, code: &CodeObjectWrapper) -> ShouldTrace {
@@ -442,6 +450,7 @@ impl Tracer for RuntimeTracer {
442450
TraceWriter::finish_writing_trace_paths(&mut self.writer).map_err(to_py_err)?;
443451
TraceWriter::finish_writing_trace_events(&mut self.writer).map_err(to_py_err)?;
444452
self.ignored_code_ids.clear();
453+
self.function_ids.clear();
445454
Ok(())
446455
}
447456
}

codetracer-python-recorder/src/runtime/output_paths.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! File-system helpers for trace output management.
2+
13
use std::error::Error;
24
use std::path::{Path, PathBuf};
35

codetracer-python-recorder/src/runtime/value_encoder.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Encode Python values into `runtime_tracing` records.
2+
13
use pyo3::prelude::*;
24
use pyo3::types::{PyAny, PyDict, PyList, PyTuple};
35
use runtime_tracing::{NonStreamingTraceWriter, TraceWriter, TypeKind, ValueRecord, NONE_VALUE};

codetracer-python-recorder/src/session.rs

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! PyO3 entry points for starting and managing trace sessions.
2+
13
use std::fs;
24
use std::path::Path;
35
use std::sync::atomic::{AtomicBool, Ordering};
@@ -8,10 +10,23 @@ use pyo3::prelude::*;
810
use crate::logging::init_rust_logging_with_default;
911
use crate::monitoring::{flush_installed_tracer, install_tracer, uninstall_tracer};
1012
use crate::runtime::{RuntimeTracer, TraceOutputPaths};
13+
use runtime_tracing::TraceEventsFileFormat;
1114

1215
/// Global flag tracking whether tracing is active.
1316
static ACTIVE: AtomicBool = AtomicBool::new(false);
1417

18+
/// Map human-friendly strings to `TraceEventsFileFormat` variants.
19+
fn parse_trace_format(format: &str) -> PyResult<TraceEventsFileFormat> {
20+
match format.to_ascii_lowercase().as_str() {
21+
"json" => Ok(TraceEventsFileFormat::Json),
22+
// Accept historical aliases that may still be referenced by callers.
23+
"binary" | "binaryv0" | "binary_v0" | "b0" => Ok(TraceEventsFileFormat::BinaryV0),
24+
other => Err(PyRuntimeError::new_err(format!(
25+
"unsupported trace format '{other}'. Expected one of: json, binary"
26+
))),
27+
}
28+
}
29+
1530
/// Start tracing using sys.monitoring and runtime_tracing writer.
1631
#[pyfunction]
1732
pub fn start_tracing(path: &str, format: &str, activation_path: Option<&str>) -> PyResult<()> {
@@ -36,19 +51,7 @@ pub fn start_tracing(path: &str, format: &str, activation_path: Option<&str>) ->
3651
})?;
3752
}
3853

39-
// Map format string to enum
40-
let fmt = match format.to_lowercase().as_str() {
41-
"json" => runtime_tracing::TraceEventsFileFormat::Json,
42-
// Use BinaryV0 for "binary" to avoid streaming writer here.
43-
"binary" | "binaryv0" | "binary_v0" | "b0" => {
44-
runtime_tracing::TraceEventsFileFormat::BinaryV0
45-
}
46-
//TODO AI! We need to assert! that the format is among the known values.
47-
other => {
48-
eprintln!("Unknown format '{}', defaulting to binary (v0)", other);
49-
runtime_tracing::TraceEventsFileFormat::BinaryV0
50-
}
51-
};
54+
let fmt = parse_trace_format(format)?;
5255

5356
let outputs = TraceOutputPaths::new(out_dir, fmt);
5457

@@ -59,10 +62,23 @@ pub fn start_tracing(path: &str, format: &str, activation_path: Option<&str>) ->
5962
// Program and args: keep minimal; Python-side API stores full session info if needed
6063
let sys = py.import("sys")?;
6164
let argv = sys.getattr("argv")?;
62-
let program: String = argv.get_item(0)?.extract::<String>()?;
63-
//TODO: Error-handling. What to do if argv is empty? Does this ever happen?
65+
let program = match argv.get_item(0) {
66+
Ok(obj) => obj.extract::<String>()?,
67+
Err(_) => String::from("<unknown>"),
68+
};
69+
let args = match argv.len() {
70+
Ok(len) if len > 1 => {
71+
let mut items = Vec::with_capacity(len.saturating_sub(1));
72+
for idx in 1..len {
73+
let value: String = argv.get_item(idx)?.extract()?;
74+
items.push(value);
75+
}
76+
items
77+
}
78+
_ => Vec::new(),
79+
};
6480

65-
let mut tracer = RuntimeTracer::new(&program, &[], fmt, activation_path);
81+
let mut tracer = RuntimeTracer::new(&program, &args, fmt, activation_path);
6682
tracer.begin(&outputs, 1)?;
6783

6884
// Install callbacks

design-docs/file-level-srp-refactor-plan.status.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
- ✅ Step 3 complete: added `src/runtime/mod.rs` with focused `activation`, `value_encoder`, and `output_paths` submodules; `RuntimeTracer` now delegates activation gating, value encoding, and writer initialisation through the façade consumed by `session.rs`.
66
- ✅ Step 4 complete: introduced `src/monitoring/mod.rs` for sys.monitoring types/caches and `src/monitoring/tracer.rs` for the tracer trait plus callback dispatch; rewired `lib.rs`, `session.rs`, and `runtime/mod.rs`, and kept a top-level `tracer` re-export for API stability.
77
- ✅ Step 5 complete: split the Python package into dedicated `formats.py`, `session.py`, and `auto_start.py` modules, trimmed `api.py` to a thin façade, and moved the environment auto-start hook into `__init__.py`.
8+
- ✅ Step 6 complete: resolved outstanding Rust TODOs (format validation, argv handling, function id stability), expanded module documentation so `cargo doc` reflects the architecture, and re-ran `just test` to confirm the refactor remains green.
89
- ✅ Test baseline: `just test` (nextest + pytest) passes with the UV cache scoped to the workspace; direct `cargo test` still requires CPython development symbols.
910

1011
## Next Task
11-
- Step 6: Clean up follow-up items (resolve TODOs, refresh docs/diagrams, and re-run the test/lint suite) to close out the refactor roadmap.
12+
- Plan complete. Identify any new follow-up items as separate tasks once additional requirements surface.

0 commit comments

Comments
 (0)