diff --git a/README.md b/README.md index 8983ebc..078310f 100644 --- a/README.md +++ b/README.md @@ -41,12 +41,13 @@ All subclasses carry the same attributes, so existing handlers can migrate by ca `python -m codetracer_python_recorder` returns: -- `0` when tracing and the target script succeed. -- The script's own exit code when it calls `sys.exit()`. -- `1` when a `RecorderError` bubbles out of startup or shutdown. -- `2` when the CLI arguments are incomplete. +- `0` when the recorder finishes cleanly, even if the traced script exits non-zero. The script's status is still recorded in `trace_metadata.json`, and a warning on stderr highlights the suppressed status. +- `1` when a `RecorderError` bubbles out of startup or shutdown (policy failures, `require_trace`, flush/stop issues). +- `2` when the CLI arguments are incomplete or invalid. -Pass `--codetracer-json-errors` (or set the policy via `configure_policy(json_errors=True)`) to stream a one-line JSON trailer on stderr. The payload includes `run_id`, `trace_id`, `error_code`, `error_kind`, `message`, and the `context` map so downstream tooling can log failures without scraping text. +Opt into mirroring the script's exit code with `--propagate-script-exit` (or `CODETRACER_PROPAGATE_SCRIPT_EXIT=true`). Use `--no-propagate-script-exit` to force suppression, even if the environment enables mirroring. + +Pass `--json-errors` (or set the policy via `configure_policy(json_errors=True)`) to stream a one-line JSON trailer on stderr. The payload includes `run_id`, `trace_id`, `error_code`, `error_kind`, `message`, and the `context` map so downstream tooling can log failures without scraping text. ### IO capture configuration diff --git a/codetracer-python-recorder/CHANGELOG.md b/codetracer-python-recorder/CHANGELOG.md index 10c0710..2f06262 100644 --- a/codetracer-python-recorder/CHANGELOG.md +++ b/codetracer-python-recorder/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) ### Changed - Module-level call events now prefer the frame's `__name__`, fall back to filter hints, `sys.path`, and package markers, and no longer depend on the legacy resolver/cache. The globals-derived naming flag now defaults to enabled so direct scripts record `<__main__>` while package imports emit ``, with CLI and environment overrides available for the legacy resolver. +- The CLI now exits with `0` when recording succeeds regardless of the traced script’s status, records a warning when suppressing non-zero script exits, and exposes `--propagate-script-exit` / `CODETRACER_PROPAGATE_SCRIPT_EXIT` / `configure_policy(propagate_script_exit=True)` to restore passthrough semantics. ## [0.2.0] - 2025-10-17 ### Added diff --git a/codetracer-python-recorder/codetracer_python_recorder/cli.py b/codetracer-python-recorder/codetracer_python_recorder/cli.py index 9e72257..2543cd1 100644 --- a/codetracer-python-recorder/codetracer_python_recorder/cli.py +++ b/codetracer-python-recorder/codetracer_python_recorder/cli.py @@ -11,7 +11,7 @@ from pathlib import Path from typing import Iterable, Sequence -from . import flush, start, stop +from . import flush, policy_snapshot, start, stop from .auto_start import ENV_TRACE_FILTER from .formats import DEFAULT_FORMAT, SUPPORTED_FORMATS, normalize_format @@ -129,6 +129,15 @@ def _parse_args(argv: Sequence[str]) -> RecorderCLIConfig: "Use '--no-module-name-from-globals' to fall back to the legacy resolver." ), ) + parser.add_argument( + "--propagate-script-exit", + action=argparse.BooleanOptionalAction, + default=None, + help=( + "Mirror the traced script's exit status when the recorder succeeds (default: disabled). " + "Use '--no-propagate-script-exit' to force a zero exit status." + ), + ) known, remainder = parser.parse_known_args(argv) pending: list[str] = list(remainder) @@ -192,6 +201,8 @@ def _parse_args(argv: Sequence[str]) -> RecorderCLIConfig: parser.error(f"unsupported io-capture mode '{other}'") if known.module_name_from_globals is not None: policy["module_name_from_globals"] = known.module_name_from_globals + if known.propagate_script_exit is not None: + policy["propagate_script_exit"] = known.propagate_script_exit return RecorderCLIConfig( trace_dir=trace_dir, @@ -286,7 +297,11 @@ def main(argv: Iterable[str] | None = None) -> int: sys.argv = old_argv return 1 + snapshot = policy_snapshot() + propagate_script_exit = bool(snapshot.get("propagate_script_exit")) + exit_code: int | None = None + recorder_failed = False try: try: runpy.run_path(str(script_path), run_name="__main__") @@ -297,13 +312,35 @@ def main(argv: Iterable[str] | None = None) -> int: finally: try: flush() + except Exception as exc: + recorder_failed = True + sys.stderr.write(f"Failed to flush Codetracer session: {exc}\n") finally: - stop(exit_code=exit_code) - sys.argv = old_argv + try: + stop(exit_code=exit_code) + except Exception as exc: + recorder_failed = True + sys.stderr.write(f"Failed to stop Codetracer session: {exc}\n") + finally: + sys.argv = old_argv _serialise_metadata(trace_dir, script=script_path) - return exit_code if exit_code is not None else 0 + script_exit_code = exit_code if exit_code is not None else 0 + + if recorder_failed: + return 1 + + if propagate_script_exit: + return script_exit_code + + if script_exit_code != 0: + sys.stderr.write( + f"Script exited with status {script_exit_code}; returning 0. " + "Use '--propagate-script-exit' to mirror the script exit code.\n" + ) + + return 0 __all__ = ("main", "RecorderCLIConfig") diff --git a/codetracer-python-recorder/codetracer_python_recorder/session.py b/codetracer-python-recorder/codetracer_python_recorder/session.py index f7604aa..fefe8bb 100644 --- a/codetracer-python-recorder/codetracer_python_recorder/session.py +++ b/codetracer-python-recorder/codetracer_python_recorder/session.py @@ -89,7 +89,8 @@ def start( policy: Optional mapping of runtime policy overrides forwarded to :func:`configure_policy` before tracing begins. Keys match the policy - keyword arguments (``on_recorder_error``, ``require_trace``, etc.). + keyword arguments (``on_recorder_error``, ``require_trace``, + ``propagate_script_exit``, etc.). apply_env_policy: When ``True`` (default), refresh policy settings from environment variables via :func:`configure_policy_from_env` prior to applying diff --git a/codetracer-python-recorder/src/policy.rs b/codetracer-python-recorder/src/policy.rs index a6dfd66..bff36f4 100644 --- a/codetracer-python-recorder/src/policy.rs +++ b/codetracer-python-recorder/src/policy.rs @@ -8,7 +8,7 @@ mod model; pub use env::{ configure_policy_from_env, ENV_CAPTURE_IO, ENV_JSON_ERRORS, ENV_KEEP_PARTIAL_TRACE, ENV_LOG_FILE, ENV_LOG_LEVEL, ENV_MODULE_NAME_FROM_GLOBALS, ENV_ON_RECORDER_ERROR, - ENV_REQUIRE_TRACE, + ENV_PROPAGATE_SCRIPT_EXIT, ENV_REQUIRE_TRACE, }; #[allow(unused_imports)] pub use ffi::{configure_policy_py, py_configure_policy_from_env, py_policy_snapshot}; @@ -43,6 +43,7 @@ mod tests { assert!(snap.io_capture.line_proxies); assert!(!snap.io_capture.fd_fallback); assert!(snap.module_name_from_globals); + assert!(!snap.propagate_script_exit); } #[test] @@ -58,6 +59,7 @@ mod tests { update.io_capture_line_proxies = Some(true); update.io_capture_fd_fallback = Some(true); update.module_name_from_globals = Some(true); + update.propagate_script_exit = Some(true); apply_policy_update(update); @@ -71,6 +73,7 @@ mod tests { assert!(snap.io_capture.line_proxies); assert!(snap.io_capture.fd_fallback); assert!(snap.module_name_from_globals); + assert!(snap.propagate_script_exit); reset_policy(); } @@ -86,6 +89,7 @@ mod tests { std::env::set_var(ENV_JSON_ERRORS, "yes"); std::env::set_var(ENV_CAPTURE_IO, "proxies,fd"); std::env::set_var(ENV_MODULE_NAME_FROM_GLOBALS, "true"); + std::env::set_var(ENV_PROPAGATE_SCRIPT_EXIT, "true"); configure_policy_from_env().expect("configure from env"); @@ -101,6 +105,7 @@ mod tests { assert!(snap.io_capture.line_proxies); assert!(snap.io_capture.fd_fallback); assert!(snap.module_name_from_globals); + assert!(snap.propagate_script_exit); reset_policy(); } @@ -163,6 +168,7 @@ mod tests { ENV_JSON_ERRORS, ENV_CAPTURE_IO, ENV_MODULE_NAME_FROM_GLOBALS, + ENV_PROPAGATE_SCRIPT_EXIT, ] { std::env::remove_var(key); } diff --git a/codetracer-python-recorder/src/policy/env.rs b/codetracer-python-recorder/src/policy/env.rs index 73312d6..7ff5327 100644 --- a/codetracer-python-recorder/src/policy/env.rs +++ b/codetracer-python-recorder/src/policy/env.rs @@ -21,6 +21,8 @@ pub const ENV_JSON_ERRORS: &str = "CODETRACER_JSON_ERRORS"; pub const ENV_CAPTURE_IO: &str = "CODETRACER_CAPTURE_IO"; /// Environment variable toggling globals-based module name resolution. pub const ENV_MODULE_NAME_FROM_GLOBALS: &str = "CODETRACER_MODULE_NAME_FROM_GLOBALS"; +/// Environment variable toggling whether the recorder mirrors script exit codes. +pub const ENV_PROPAGATE_SCRIPT_EXIT: &str = "CODETRACER_PROPAGATE_SCRIPT_EXIT"; /// Load policy overrides from environment variables. pub fn configure_policy_from_env() -> RecorderResult<()> { @@ -66,6 +68,10 @@ pub fn configure_policy_from_env() -> RecorderResult<()> { update.module_name_from_globals = Some(parse_bool(&value)?); } + if let Ok(value) = env::var(ENV_PROPAGATE_SCRIPT_EXIT) { + update.propagate_script_exit = Some(parse_bool(&value)?); + } + apply_policy_update(update); Ok(()) } @@ -148,6 +154,7 @@ mod tests { std::env::set_var(ENV_JSON_ERRORS, "yes"); std::env::set_var(ENV_CAPTURE_IO, "proxies,fd"); std::env::set_var(ENV_MODULE_NAME_FROM_GLOBALS, "true"); + std::env::set_var(ENV_PROPAGATE_SCRIPT_EXIT, "true"); configure_policy_from_env().expect("configure from env"); let snap = policy_snapshot(); @@ -163,6 +170,7 @@ mod tests { assert!(snap.io_capture.line_proxies); assert!(snap.io_capture.fd_fallback); assert!(snap.module_name_from_globals); + assert!(snap.propagate_script_exit); } #[test] @@ -170,10 +178,12 @@ mod tests { let _guard = EnvGuard; reset_policy_for_tests(); std::env::set_var(ENV_MODULE_NAME_FROM_GLOBALS, "false"); + std::env::set_var(ENV_PROPAGATE_SCRIPT_EXIT, "false"); configure_policy_from_env().expect("configure from env"); let snap = policy_snapshot(); assert!(!snap.module_name_from_globals); + assert!(!snap.propagate_script_exit); } #[test] @@ -202,6 +212,7 @@ mod tests { ENV_JSON_ERRORS, ENV_CAPTURE_IO, ENV_MODULE_NAME_FROM_GLOBALS, + ENV_PROPAGATE_SCRIPT_EXIT, ] { std::env::remove_var(key); } diff --git a/codetracer-python-recorder/src/policy/ffi.rs b/codetracer-python-recorder/src/policy/ffi.rs index 3ec2328..161767d 100644 --- a/codetracer-python-recorder/src/policy/ffi.rs +++ b/codetracer-python-recorder/src/policy/ffi.rs @@ -11,7 +11,7 @@ use std::path::PathBuf; use std::str::FromStr; #[pyfunction(name = "configure_policy")] -#[pyo3(signature = (on_recorder_error=None, require_trace=None, keep_partial_trace=None, log_level=None, log_file=None, json_errors=None, io_capture_line_proxies=None, io_capture_fd_fallback=None, module_name_from_globals=None))] +#[pyo3(signature = (on_recorder_error=None, require_trace=None, keep_partial_trace=None, log_level=None, log_file=None, json_errors=None, io_capture_line_proxies=None, io_capture_fd_fallback=None, module_name_from_globals=None, propagate_script_exit=None))] pub fn configure_policy_py( on_recorder_error: Option<&str>, require_trace: Option, @@ -22,6 +22,7 @@ pub fn configure_policy_py( io_capture_line_proxies: Option, io_capture_fd_fallback: Option, module_name_from_globals: Option, + propagate_script_exit: Option, ) -> PyResult<()> { let mut update = PolicyUpdate::default(); @@ -69,6 +70,10 @@ pub fn configure_policy_py( update.module_name_from_globals = Some(value); } + if let Some(value) = propagate_script_exit { + update.propagate_script_exit = Some(value); + } + apply_policy_update(update); Ok(()) } @@ -106,6 +111,7 @@ pub fn py_policy_snapshot(py: Python<'_>) -> PyResult { "module_name_from_globals", snapshot.module_name_from_globals, )?; + dict.set_item("propagate_script_exit", snapshot.propagate_script_exit)?; let io_dict = PyDict::new(py); io_dict.set_item("line_proxies", snapshot.io_capture.line_proxies)?; @@ -133,6 +139,7 @@ mod tests { Some(true), Some(true), Some(true), + Some(true), ) .expect("configure policy via PyO3 facade"); @@ -152,6 +159,7 @@ mod tests { assert!(snap.io_capture.line_proxies); assert!(snap.io_capture.fd_fallback); assert!(snap.module_name_from_globals); + assert!(snap.propagate_script_exit); reset_policy_for_tests(); } @@ -168,6 +176,7 @@ mod tests { None, None, None, + None, ) .expect_err("invalid variant should error"); // Ensure the error maps through map_recorder_error by checking the display text. @@ -208,6 +217,7 @@ mod tests { Some(false), Some(false), Some(false), + Some(true), ) .expect("configure policy"); @@ -224,6 +234,11 @@ mod tests { dict.contains("io_capture").expect("check io_capture key"), "expected io_capture in snapshot" ); + assert!( + dict.contains("propagate_script_exit") + .expect("check propagate_script_exit key"), + "expected propagate_script_exit in snapshot" + ); }); reset_policy_for_tests(); } @@ -241,6 +256,7 @@ mod tests { super::super::env::ENV_JSON_ERRORS, super::super::env::ENV_CAPTURE_IO, super::super::env::ENV_MODULE_NAME_FROM_GLOBALS, + super::super::env::ENV_PROPAGATE_SCRIPT_EXIT, ] { std::env::remove_var(key); } diff --git a/codetracer-python-recorder/src/policy/model.rs b/codetracer-python-recorder/src/policy/model.rs index c0d77ea..18f7856 100644 --- a/codetracer-python-recorder/src/policy/model.rs +++ b/codetracer-python-recorder/src/policy/model.rs @@ -72,6 +72,7 @@ pub struct RecorderPolicy { pub json_errors: bool, pub io_capture: IoCapturePolicy, pub module_name_from_globals: bool, + pub propagate_script_exit: bool, } impl Default for RecorderPolicy { @@ -85,6 +86,7 @@ impl Default for RecorderPolicy { json_errors: false, io_capture: IoCapturePolicy::default(), module_name_from_globals: true, + propagate_script_exit: false, } } } @@ -128,6 +130,9 @@ impl RecorderPolicy { if let Some(module_name_from_globals) = update.module_name_from_globals { self.module_name_from_globals = module_name_from_globals; } + if let Some(propagate_script_exit) = update.propagate_script_exit { + self.propagate_script_exit = propagate_script_exit; + } } } @@ -150,6 +155,7 @@ pub(crate) struct PolicyUpdate { pub(crate) io_capture_line_proxies: Option, pub(crate) io_capture_fd_fallback: Option, pub(crate) module_name_from_globals: Option, + pub(crate) propagate_script_exit: Option, } /// Snapshot the current policy. diff --git a/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs b/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs index abcd9f6..8686af9 100644 --- a/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs +++ b/codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs @@ -401,6 +401,7 @@ mod tests { None, None, Some(false), + Some(false), ) .expect("reset recorder policy"); } @@ -633,6 +634,7 @@ result = compute()\n" Some(true), Some(false), Some(false), + Some(false), ) .expect("enable io capture proxies"); @@ -724,6 +726,7 @@ result = compute()\n" Some(true), Some(true), Some(false), + Some(false), ) .expect("enable io capture with fd fallback"); @@ -832,6 +835,7 @@ result = compute()\n" Some(true), Some(false), Some(false), + Some(false), ) .expect("enable proxies without fd fallback"); @@ -2205,6 +2209,7 @@ snapshot() None, None, Some(false), + Some(false), ) .expect("enable require_trace policy"); @@ -2285,6 +2290,7 @@ snapshot() None, None, Some(false), + Some(false), ) .expect("enable keep_partial policy"); diff --git a/codetracer-python-recorder/tests/python/test_exit_payloads.py b/codetracer-python-recorder/tests/python/test_exit_payloads.py index 1473b29..6a67223 100644 --- a/codetracer-python-recorder/tests/python/test_exit_payloads.py +++ b/codetracer-python-recorder/tests/python/test_exit_payloads.py @@ -44,7 +44,8 @@ def test_cli_records_exit_code_in_toplevel_return(tmp_path: Path) -> None: check=False, ) - assert result.returncode == 3, result.stderr + assert result.returncode == 0, result.stderr + assert "status 3; returning 0" in result.stderr exit_value = _last_return_value(trace_dir) assert exit_value["kind"] == "Int" assert exit_value["i"] == 3 @@ -54,6 +55,43 @@ def test_cli_records_exit_code_in_toplevel_return(tmp_path: Path) -> None: assert status == {"code": 3, "label": None} +def test_cli_can_propagate_script_exit(tmp_path: Path) -> None: + script = tmp_path / "exit_script.py" + script.write_text( + "import sys\n" + "sys.exit(5)\n", + encoding="utf-8", + ) + + trace_dir = tmp_path / "trace" + result = subprocess.run( + [ + sys.executable, + "-m", + "codetracer_python_recorder", + "--trace-dir", + str(trace_dir), + "--format", + "json", + "--propagate-script-exit", + str(script), + ], + capture_output=True, + text=True, + check=False, + ) + + assert result.returncode == 5, result.stderr + assert "status 5; returning 0" not in result.stderr + exit_value = _last_return_value(trace_dir) + assert exit_value["kind"] == "Int" + assert exit_value["i"] == 5 + + metadata = json.loads((trace_dir / "trace_metadata.json").read_text(encoding="utf-8")) + status = metadata.get("process_exit_status") + assert status == {"code": 5, "label": None} + + def test_default_exit_payload_uses_placeholder(tmp_path: Path) -> None: trace_dir = tmp_path / "trace" trace_dir.mkdir() diff --git a/codetracer-python-recorder/tests/python/unit/test_policy_configuration.py b/codetracer-python-recorder/tests/python/unit/test_policy_configuration.py index 3056c49..390aab2 100644 --- a/codetracer-python-recorder/tests/python/unit/test_policy_configuration.py +++ b/codetracer-python-recorder/tests/python/unit/test_policy_configuration.py @@ -19,6 +19,7 @@ def reset_policy() -> None: log_file="", json_errors=False, module_name_from_globals=True, + propagate_script_exit=False, ) yield codetracer.configure_policy( @@ -29,6 +30,7 @@ def reset_policy() -> None: log_file="", json_errors=False, module_name_from_globals=True, + propagate_script_exit=False, ) @@ -42,6 +44,7 @@ def test_configure_policy_overrides_values(tmp_path: Path) -> None: log_file=str(target_log), json_errors=True, module_name_from_globals=True, + propagate_script_exit=True, ) snapshot = codetracer.policy_snapshot() @@ -52,6 +55,7 @@ def test_configure_policy_overrides_values(tmp_path: Path) -> None: assert snapshot["log_file"] == str(target_log) assert snapshot["json_errors"] is True assert snapshot["module_name_from_globals"] is True + assert snapshot["propagate_script_exit"] is True def test_policy_env_configuration(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: @@ -63,6 +67,7 @@ def test_policy_env_configuration(monkeypatch: pytest.MonkeyPatch, tmp_path: Pat monkeypatch.setenv("CODETRACER_LOG_FILE", str(log_file)) monkeypatch.setenv("CODETRACER_JSON_ERRORS", "yes") monkeypatch.setenv("CODETRACER_MODULE_NAME_FROM_GLOBALS", "true") + monkeypatch.setenv("CODETRACER_PROPAGATE_SCRIPT_EXIT", "true") codetracer.configure_policy_from_env() @@ -74,6 +79,7 @@ def test_policy_env_configuration(monkeypatch: pytest.MonkeyPatch, tmp_path: Pat assert snapshot["log_file"] == str(log_file) assert snapshot["json_errors"] is True assert snapshot["module_name_from_globals"] is True + assert snapshot["propagate_script_exit"] is True def test_clearing_log_configuration(tmp_path: Path) -> None: @@ -90,6 +96,7 @@ def test_session_start_applies_policy_overrides(tmp_path: Path) -> None: "log_file": tmp_path / "policy.log", "json_errors": True, "module_name_from_globals": True, + "propagate_script_exit": True, } session = session_api.start(tmp_path, policy=policy, apply_env_policy=False) @@ -99,6 +106,7 @@ def test_session_start_applies_policy_overrides(tmp_path: Path) -> None: assert snapshot["log_file"] == str(tmp_path / "policy.log") assert snapshot["json_errors"] is True assert snapshot["module_name_from_globals"] is True + assert snapshot["propagate_script_exit"] is True finally: session.stop() diff --git a/design-docs/adr/0017-recorder-exit-code-policy.md b/design-docs/adr/0017-recorder-exit-code-policy.md new file mode 100644 index 0000000..adf50e9 --- /dev/null +++ b/design-docs/adr/0017-recorder-exit-code-policy.md @@ -0,0 +1,45 @@ +# ADR 0017 – Recorder Exit Code Policy + +- **Status:** Proposed +- **Date:** 2025-10-28 +- **Stakeholders:** Desktop CLI team, Runtime tracer maintainers, Release engineering +- **Related Decisions:** ADR 0005 (Python Recorder DB Backend Integration), ADR 0015 (Balanced Toplevel Lifecycle and Trace Gating) + +## Context + +`ct record` invokes the Rust-backed `codetracer_python_recorder` CLI when capturing Python traces. The CLI currently returns the traced script's process exit code (`codetracer_python_recorder/cli.py:165`). When the target program exits with a non-zero status—whether via `SystemExit`, a failed assertion, or an explicit `sys.exit()`—the recorder propagates that status. The desktop CLI treats any non-zero exit as a fatal recording failure, so trace uploads and follow-on automation abort even though the trace artefacts are valid and the recorder itself completed successfully. + +Our recorder already captures the script's exit status in session metadata (`runtime/tracer/lifecycle.rs:143`) and exposes it through trace viewers. Downstream consumers that need to assert on the original program outcome can read that field. However, other integrations (CI pipelines, `ct record` automations, scripted data collection) rely on the CLI process exit code to decide whether to continue, and they expect Codetracer to return `0` when recording succeeded. + +We must let callers control whether the recorder propagates the script's exit status or reports recorder success independently. The default should favour Codetracer success (exit `0`) to preserve `ct record` expectations, while still allowing advanced users and direct CLI invocations to opt back into passthrough semantics. + +## Decision + +Introduce a recorder exit-code policy with the following behaviour: + +1. **Default:** When tracing completes without recorder errors (start, flush, stop, and write phases succeed and `require_trace` did not trigger), the CLI exits with status `0` regardless of the traced script's exit code. The recorder still records the script's status in trace metadata. +2. **Opt-in passthrough:** Expose a CLI flag `--propagate-script-exit` and environment override `CODETRACER_PROPAGATE_SCRIPT_EXIT`. When enabled, the CLI mirrors the traced script's exit code (the current behaviour). Both configuration surfaces resolve through the recorder policy layer so other entry points (e.g., embedded integrations) can opt in. +3. **User feedback:** If passthrough is disabled and the script exits non-zero, emit a one-line warning on stderr indicating the script's exit status and how to re-enable propagation. +4. **Recorder failure precedence:** Recorder failures (startup errors, policy violations such as `--require-trace`, flush/stop exceptions) continue to exit non-zero irrespective of the propagation setting to ensure automation can detect recorder malfunction. + +This policy applies uniformly to `python -m codetracer_python_recorder`, `ct record`, and any embedding that drives the same CLI module. + +## Consequences + +**Positive** + +- `ct record` can treat successful recordings as success even when the target script fails, unblocking chained workflows and uploads. +- The script's exit status remains available in trace metadata, preserving observability without overloading process exit handling. +- Configuration is explicit and discoverable via CLI help, environment variables, and policy APIs. + +**Negative / Risks** + +- Direct CLI users may miss that their script failed if they rely solely on the process exit code. The stderr warning mitigates this but adds additional output. +- Changing the default may surprise users accustomed to passthrough semantics. Documentation and release notes must highlight the new default and the opt-in flag. +- Additional configuration surface increases policy complexity; we must ensure conflicting overrides (CLI vs. env) resolve predictably. + +## Rollout Notes + +- Update CLI help text, README, and desktop `ct record` documentation to describe the new default and override flag. +- Add regression tests covering both default and passthrough modes (CLI invocation, environment override, policy API). +- Communicate the change in the recorder CHANGELOG and release notes so downstream automation owners can adjust expectations. diff --git a/design-docs/recorder-exit-code-policy-implementation-plan.md b/design-docs/recorder-exit-code-policy-implementation-plan.md new file mode 100644 index 0000000..37551e7 --- /dev/null +++ b/design-docs/recorder-exit-code-policy-implementation-plan.md @@ -0,0 +1,61 @@ +# Recorder Exit Code Policy – Implementation Plan + +Plan owners: codetracer Python recorder maintainers +Related ADR: 0017 – Recorder Exit Code Policy +Target release: codetracer-python-recorder 0.x (next minor) + +## Goals +- Default the recorder CLI to exit with status `0` when tracing succeeds, even if the target script exits non-zero. +- Preserve the script exit status in trace metadata and surface it through logs so users stay informed. +- Provide consistent configuration knobs (CLI flag, environment variable, policy API) to re-enable exit-code passthrough when desired. +- Ensure recorder failures (`start`, `flush`, `stop`, `require_trace`) still emit non-zero exit codes. + +## Non-Goals +- Changing how `ct record` parses or surfaces recorder output beyond the new default. +- Altering metadata schemas storing script exit information. +- Introducing scripting hooks for arbitrary exit-code transforms outside passthrough vs. success modes. + +## Current Gaps +- `codetracer_python_recorder.cli.main` (`codetracer_python_recorder/cli.py:165`) always returns the traced script's exit code; there is no concept of recorder success vs. script result. +- `RecorderPolicy` (`src/policy/model.rs`) and associated FFI lack an exit-code behaviour flag, so env policy and embedding APIs cannot control the outcome. +- No CLI argument or environment variable communicates the user's preference, and the help text/docs imply passthrough semantics. +- There are no regression tests asserting CLI exit behaviour; `ct record` integration relies on current passthrough behaviour implicitly. + +## Workstreams + +### WS1 – Policy & Configuration Plumbing +**Scope:** Extend recorder policy models and configuration surfaces with an exit-code behaviour flag. +- Add `propagate_script_exit_code: bool` to `RecorderPolicy` (default `false`) plus matching field in `PolicyUpdate`; update `apply_update`/`Default` implementations. +- Extend PyO3 bindings `configure_policy_py`/`policy_snapshot` to accept and expose `propagate_script_exit_code`. +- Add environment variable `CODETRACER_PROPAGATE_SCRIPT_EXIT` in `policy/env.rs`, parsing booleans via existing helpers. +- Update Python session helpers (`session.py`) to pass through a `propagate_script_exit` policy key, including `_coerce_policy_kwargs`. +- Unit tests: + - Rust: policy default and update round-trips (`policy::model` & `policy::ffi` tests). + - Rust: env configuration toggles new flag and rejects invalid values. + - Python: `configure_policy` keyword argument path accepts the new key. + +### WS2 – CLI Behaviour & Warning Surface +**Scope:** Teach the CLI to honour the new policy and compute the final exit status. +- Introduce `--propagate-script-exit` boolean flag (default `False`) wired into CLI help; set `policy["propagate_script_exit"] = True` when provided. +- After `start(...)`, cache the effective propagation flag by inspecting CLI config and, when unspecified, consulting `policy_snapshot()` to honour env defaults. +- Rework `main`'s shutdown path: + - Track recorder success across `start`, script execution, `flush`, and `stop`. + - Decide final process exit: return `script_exit` if propagation enabled, otherwise `0` when recorder succeeded; use a distinct error code (existing) when recorder fails. + - On non-zero script exit with propagation disabled, emit a concise stderr warning mentioning the exit status and `--propagate-script-exit`. +- Ensure the script exit status continues to flow into `stop(exit_code=...)` and metadata serialisation unchanged. +- Add CLI unit/integration tests (pytest) covering combinations: default non-propagating success/failure, `--propagate-script-exit`, and recorder failure paths (e.g., missing script). + +### WS3 – Documentation, Tooling, and Release Notes +**Scope:** Update user-facing materials and automation checks. +- Refresh CLI `--help`, README, and docs (`docs/book/src/...` if applicable) to describe default exit behaviour and configuration options. +- Document `CODETRACER_PROPAGATE_SCRIPT_EXIT` and Python policy key in API guides. +- Add CHANGELOG entry summarising behaviour change and migration guidance for users relying on passthrough. +- Extend CI/test harness: + - Add regression test via `just test` hitting CLI exit codes (likely in Python test suite under `tests/`). + - Update any existing `ct record` integration smoke tests to pin the expected default (0) where relevant. +- Coordinate with desktop CLI maintainers to flip their expectations once the recorder release lands. + +## Timeline & Dependencies +- WS1 should land first to provide configuration plumbing for CLI work. +- WS2 depends on WS1's policy flag; both should merge within the same feature branch to avoid transient inconsistent behaviour. +- WS3 can progress in parallel once WS2 stabilises, but final doc updates should wait for CLI flag names to settle. diff --git a/design-docs/recorder-exit-code-policy-implementation-plan.status.md b/design-docs/recorder-exit-code-policy-implementation-plan.status.md new file mode 100644 index 0000000..cd5508e --- /dev/null +++ b/design-docs/recorder-exit-code-policy-implementation-plan.status.md @@ -0,0 +1,13 @@ +# Recorder Exit Code Policy – Status + +## Relevant Design Docs +- `design-docs/adr/0017-recorder-exit-code-policy.md` +- `design-docs/recorder-exit-code-policy-implementation-plan.md` + +## Workstream Progress +- ✅ **WS1 – Policy & Configuration Plumbing:** Added `propagate_script_exit` across `RecorderPolicy`, `PolicyUpdate`, PyO3 bindings, env parsing, and Python helpers; introduced `CODETRACER_PROPAGATE_SCRIPT_EXIT`; updated Rust + Python unit coverage; rebuilt the dev wheel (`maturin develop --features integration-test`) and verified via `just test`. +- ✅ **WS2 – CLI Behaviour & Warning Surface:** CLI now defaults to returning `0` on successful recordings, exposes `--propagate-script-exit`/`--no-propagate-script-exit`, emits a warning when suppressing non-zero script exits, and preserves non-zero statuses for recorder failures; added regression coverage (`test_exit_payloads.py`) and executed `just dev test`. +- ✅ **WS3 – Documentation, Tooling, and Release Notes:** README and onboarding docs describe the new default, flags, and env overrides; changelog notes the behavioural change; `just dev test` re-run after docs to confirm tooling. + +## Current Focus +- Monitor downstream consumers for feedback on the new default and capture any follow-up doc gaps or migration notes. diff --git a/docs/onboarding/error-handling.md b/docs/onboarding/error-handling.md index 0fdaacd..715c96f 100644 --- a/docs/onboarding/error-handling.md +++ b/docs/onboarding/error-handling.md @@ -29,9 +29,10 @@ else: - Calling `start` twice raises `RuntimeError` from a thin Python guard. Everything after the guard uses `RecorderError`. ## CLI workflow and JSON trailers -- Run `python -m codetracer_python_recorder --codetracer-format=json app.py` to trace a script. -- Exit codes: `0` for success, script exit code when the script stops itself, `1` when a `RecorderError` escapes startup/shutdown, `2` on CLI misuse. -- Pass `--codetracer-json-errors` (or `configure_policy(json_errors=True)`) to mirror each failure as a one-line JSON object on stderr. +- Run `python -m codetracer_python_recorder --format=json app.py` to trace a script. +- Exit codes: `0` when the recorder completes without errors (default even if the script exits non-zero, warning emitted), `1` when a `RecorderError` escapes startup/shutdown, `2` on CLI misuse. +- Use `--propagate-script-exit` (or set `CODETRACER_PROPAGATE_SCRIPT_EXIT=true`, `configure_policy(propagate_script_exit=True)`) when wrappers must mirror the script status. +- Pass `--json-errors` (or `configure_policy(json_errors=True)`) to mirror each failure as a one-line JSON object on stderr. - JSON fields: `run_id`, optional `trace_id`, `error_code`, `error_kind`, `message`, `context`. ## Migration checklist for existing clients