Skip to content

Commit 4e8a4ed

Browse files
authored
Fix dropped return events (#60)
Before if according to our filter we needed to drop the return value of a function call, we would end up not emitting a "Return" event to the trace at all. This lead to an unbalanced trace file. After this fix we emit Return events but we replace the return value in the event with the string "<dropped>"
2 parents 442043f + c6d0106 commit 4e8a4ed

File tree

9 files changed

+483
-28
lines changed

9 files changed

+483
-28
lines changed

codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs

Lines changed: 142 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ mod tests {
179179
use std::collections::BTreeMap;
180180
use std::ffi::CString;
181181
use std::fs;
182-
use std::path::Path;
182+
use std::path::{Path, PathBuf};
183183
use std::sync::Arc;
184184
use std::thread;
185185

@@ -716,6 +716,30 @@ result = compute()\n"
716716
});
717717
}
718718

719+
#[pyfunction]
720+
fn capture_py_start(py: Python<'_>, code: Bound<'_, PyCode>, offset: i32) -> PyResult<()> {
721+
ffi::wrap_pyfunction("test_capture_py_start", || {
722+
ACTIVE_TRACER.with(|cell| -> PyResult<()> {
723+
let ptr = cell.get();
724+
if ptr.is_null() {
725+
panic!("No active RuntimeTracer for capture_py_start");
726+
}
727+
unsafe {
728+
let tracer = &mut *ptr;
729+
let wrapper = CodeObjectWrapper::new(py, &code);
730+
match tracer.on_py_start(py, &wrapper, offset) {
731+
Ok(outcome) => {
732+
LAST_OUTCOME.with(|cell| cell.set(Some(outcome)));
733+
Ok(())
734+
}
735+
Err(err) => Err(err),
736+
}
737+
}
738+
})?;
739+
Ok(())
740+
})
741+
}
742+
719743
#[pyfunction]
720744
fn capture_line(py: Python<'_>, code: Bound<'_, PyCode>, lineno: u32) -> PyResult<()> {
721745
ffi::wrap_pyfunction("test_capture_line", || {
@@ -770,7 +794,7 @@ result = compute()\n"
770794

771795
const PRELUDE: &str = r#"
772796
import inspect
773-
from test_tracer import capture_line, capture_return_event
797+
from test_tracer import capture_line, capture_return_event, capture_py_start
774798
775799
def snapshot(line=None):
776800
frame = inspect.currentframe().f_back
@@ -786,6 +810,10 @@ def emit_return(value):
786810
frame = inspect.currentframe().f_back
787811
capture_return_event(frame.f_code, value)
788812
return value
813+
814+
def start_call():
815+
frame = inspect.currentframe().f_back
816+
capture_py_start(frame.f_code, frame.f_lasti)
789817
"#;
790818

791819
#[derive(Debug, Clone, PartialEq)]
@@ -862,9 +890,14 @@ def emit_return(value):
862890

863891
fn ensure_test_module(py: Python<'_>) {
864892
let module = PyModule::new(py, "test_tracer").expect("create module");
893+
module
894+
.add_function(
895+
wrap_pyfunction!(capture_py_start, &module).expect("wrap capture_py_start"),
896+
)
897+
.expect("add py_start capture function");
865898
module
866899
.add_function(wrap_pyfunction!(capture_line, &module).expect("wrap capture_line"))
867-
.expect("add function");
900+
.expect("add line capture function");
868901
module
869902
.add_function(
870903
wrap_pyfunction!(capture_return_event, &module).expect("wrap capture_return_event"),
@@ -906,6 +939,25 @@ def emit_return(value):
906939
fs::write(path, contents.trim_start()).expect("write filter");
907940
}
908941

942+
fn install_drop_everything_filter(project_root: &Path) -> PathBuf {
943+
let filters_dir = project_root.join(".codetracer");
944+
fs::create_dir(&filters_dir).expect("create .codetracer");
945+
let drop_filter_path = filters_dir.join("drop-filter.toml");
946+
write_filter(
947+
&drop_filter_path,
948+
r#"
949+
[meta]
950+
name = "drop-all"
951+
version = 1
952+
953+
[scope]
954+
default_exec = "trace"
955+
default_value_action = "drop"
956+
"#,
957+
);
958+
drop_filter_path
959+
}
960+
909961
#[test]
910962
fn trace_filter_redacts_values() {
911963
Python::with_gil(|py| {
@@ -1125,21 +1177,7 @@ sensitive("s3cr3t")
11251177

11261178
let project = tempfile::tempdir().expect("project dir");
11271179
let project_root = project.path();
1128-
let filters_dir = project_root.join(".codetracer");
1129-
fs::create_dir(&filters_dir).expect("create .codetracer");
1130-
let drop_filter_path = filters_dir.join("drop-filter.toml");
1131-
write_filter(
1132-
&drop_filter_path,
1133-
r#"
1134-
[meta]
1135-
name = "drop-all"
1136-
version = 1
1137-
1138-
[scope]
1139-
default_exec = "trace"
1140-
default_value_action = "drop"
1141-
"#,
1142-
);
1180+
let drop_filter_path = install_drop_everything_filter(project_root);
11431181

11441182
let config = TraceFilterConfig::from_inline_and_paths(
11451183
&[("builtin-default", BUILTIN_TRACE_FILTER)],
@@ -1186,11 +1224,13 @@ dropper()
11861224
}
11871225

11881226
let mut variable_names: Vec<String> = Vec::new();
1189-
let mut return_events = 0usize;
1227+
let mut return_values: Vec<ValueRecord> = Vec::new();
11901228
for event in &tracer.writer.events {
11911229
match event {
11921230
TraceLowLevelEvent::VariableName(name) => variable_names.push(name.clone()),
1193-
TraceLowLevelEvent::Return(_) => return_events += 1,
1231+
TraceLowLevelEvent::Return(record) => {
1232+
return_values.push(record.return_value.clone())
1233+
}
11941234
_ => {}
11951235
}
11961236
}
@@ -1199,9 +1239,89 @@ dropper()
11991239
"expected no variables captured, found {:?}",
12001240
variable_names
12011241
);
1242+
assert_eq!(return_values.len(), 1, "return event should remain balanced");
1243+
match &return_values[0] {
1244+
ValueRecord::Error { msg, .. } => assert_eq!(msg, "<dropped>"),
1245+
other => panic!("expected dropped sentinel return value, got {other:?}"),
1246+
}
1247+
});
1248+
}
1249+
1250+
#[test]
1251+
fn drop_filters_keep_call_return_pairs_balanced() {
1252+
Python::with_gil(|py| {
1253+
ensure_test_module(py);
1254+
1255+
let project = tempfile::tempdir().expect("project dir");
1256+
let project_root = project.path();
1257+
let drop_filter_path = install_drop_everything_filter(project_root);
1258+
1259+
let config = TraceFilterConfig::from_inline_and_paths(
1260+
&[("builtin-default", BUILTIN_TRACE_FILTER)],
1261+
&[drop_filter_path.clone()],
1262+
)
1263+
.expect("load filter chain");
1264+
let engine = Arc::new(TraceFilterEngine::new(config));
1265+
1266+
let app_dir = project_root.join("app");
1267+
fs::create_dir_all(&app_dir).expect("create app dir");
1268+
let script_path = app_dir.join("classes.py");
1269+
let body = r#"
1270+
def initializer(label):
1271+
start_call()
1272+
return emit_return(label.upper())
1273+
1274+
class Alpha:
1275+
TOKEN = initializer("alpha")
1276+
1277+
class Beta:
1278+
TOKEN = initializer("beta")
1279+
1280+
class Gamma:
1281+
TOKEN = initializer("gamma")
1282+
1283+
initializer("omega")
1284+
"#;
1285+
let script = format!("{PRELUDE}\n{body}", PRELUDE = PRELUDE, body = body);
1286+
fs::write(&script_path, script).expect("write script");
1287+
1288+
let mut tracer = RuntimeTracer::new(
1289+
script_path.to_string_lossy().as_ref(),
1290+
&[],
1291+
TraceEventsFileFormat::Json,
1292+
None,
1293+
Some(engine),
1294+
);
1295+
1296+
{
1297+
let _guard = ScopedTracer::new(&mut tracer);
1298+
LAST_OUTCOME.with(|cell| cell.set(None));
1299+
let run_code = format!(
1300+
"import runpy, sys\nsys.path.insert(0, r\"{}\")\nrunpy.run_path(r\"{}\")",
1301+
project_root.display(),
1302+
script_path.display()
1303+
);
1304+
let run_code_c = CString::new(run_code).expect("script contains nul byte");
1305+
py.run(run_code_c.as_c_str(), None, None)
1306+
.expect("execute classes script");
1307+
}
1308+
1309+
let mut call_count = 0usize;
1310+
let mut return_count = 0usize;
1311+
for event in &tracer.writer.events {
1312+
match event {
1313+
TraceLowLevelEvent::Call(_) => call_count += 1,
1314+
TraceLowLevelEvent::Return(_) => return_count += 1,
1315+
_ => {}
1316+
}
1317+
}
1318+
assert!(
1319+
call_count >= 4,
1320+
"expected at least four call events, saw {call_count}"
1321+
);
12021322
assert_eq!(
1203-
return_events, 0,
1204-
"return value should be dropped instead of recorded"
1323+
call_count, return_count,
1324+
"drop filters must keep call/return pairs balanced"
12051325
);
12061326
});
12071327
}

codetracer-python-recorder/src/runtime/value_capture.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use crate::trace_filter::config::ValueAction;
1919
use crate::trace_filter::engine::{ValueKind, ValuePolicy};
2020

2121
const REDACTED_SENTINEL: &str = "<redacted>";
22+
const DROPPED_SENTINEL: &str = "<dropped>";
2223

2324
const VALUE_KIND_COUNT: usize = 5;
2425

@@ -54,6 +55,14 @@ fn redacted_value(writer: &mut NonStreamingTraceWriter) -> ValueRecord {
5455
}
5556
}
5657

58+
fn dropped_value(writer: &mut NonStreamingTraceWriter) -> ValueRecord {
59+
let ty = TraceWriter::ensure_type_id(writer, TypeKind::Raw, "Dropped");
60+
ValueRecord::Error {
61+
msg: DROPPED_SENTINEL.to_string(),
62+
type_id: ty,
63+
}
64+
}
65+
5766
fn record_redaction(kind: ValueKind, candidate: &str, telemetry: Option<&mut ValueFilterStats>) {
5867
if let Some(stats) = telemetry {
5968
stats.record_redaction(kind);
@@ -322,8 +331,7 @@ pub fn record_return_value(
322331
ValueKind::Return,
323332
name,
324333
telemetry.as_deref_mut(),
325-
);
326-
if let Some(encoded) = encoded {
327-
TraceWriter::register_return(writer, encoded);
328-
}
334+
)
335+
.unwrap_or_else(|| dropped_value(writer));
336+
TraceWriter::register_return(writer, encoded);
329337
}

design-docs/US0028 - Configurable Python trace filters.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ Callers validate whether a parsed selector is legal in the current context (e.g.
159159
1. Initialize the execution policy to `scope.default_exec` (or the inherited value when composing filters).
160160
2. Walk `scope.rules` from top to bottom. Each rule whose selector matches the current frame updates the execution policy (`trace` vs `skip`) and the active default for value capture. Later matching rules replace earlier decisions because the traversal never rewinds.
161161
3. For value capture inside a scope, start from the applicable default (`scope.default_value_action`, overridden by the scope rule’s `value_default` when provided).
162-
4. Apply each `value_patterns` entry in order. The first pattern whose selector matches the variable or payload sets the decision to `allow` (serialize), `redact` (replace with `<redacted>`), or `drop` (omit entirely) and stops further evaluation for that value.
162+
4. Apply each `value_patterns` entry in order. The first pattern whose selector matches the variable or payload sets the decision to `allow` (serialize), `redact` (replace with `<redacted>`), or `drop` (omit entirely; return-value drops still emit a structural return edge with a `<dropped>` placeholder) and stops further evaluation for that value.
163163
5. If no pattern matches, fall back to the current default value action.
164164

165165
## Sample Filters (TOML)

0 commit comments

Comments
 (0)