metacraft-labs · tzanko-matev · Oct 13, 2025 · Oct 9, 2025
diff --git a/codetracer-python-recorder/README.md b/codetracer-python-recorder/README.md
@@ -0,0 +1,46 @@
+# Codetracer Python Recorder
+
+`codetracer-python-recorder` is the Rust-backed recorder module that powers Python
+tracing inside Codetracer. The PyO3 extension exposes a small Python façade so
+packaged environments (desktop bundles, `uv run`, virtualenvs) can start and stop
+recording without shipping an additional interpreter.
+
+## Command-line entry point
+
+The wheel installs a console script named `codetracer-python-recorder` and the
+package can also be invoked with `python -m codetracer_python_recorder`. Both
+forms share the same arguments:
+
+```bash
+python -m codetracer_python_recorder \
+  --trace-dir ./trace-out \
+  --format json \
+  --activation-path app/main.py \
+  --with-diff \
+  app/main.py --arg=value
+```
+
+- `--trace-dir` (default: `./trace-out`) – directory that will receive
+  `trace.json`, `trace_metadata.json`, and `trace_paths.json`.
+- `--format` – trace serialisation format (`binary` or `json`). Use `json` for
+  integration with the DB backend importer.
+- `--activation-path` – optional gate that postpones tracing until the interpreter
+  executes this file (defaults to the target script).
+- `--with-diff` / `--no-with-diff` – records the caller’s preference in
+  `trace_metadata.json`. The desktop Codetracer CLI is responsible for generating
+  diff artefacts; the recorder simply surfaces the flag.
+
+All additional arguments are forwarded to the target script unchanged. The CLI
+reuses whichever interpreter launches it so wrappers such as `uv run`, `pipx`,
+or activated virtual environments behave identically to `python script.py`.
+
+## Packaging expectations
+
+Desktop installers add the wheel to `PYTHONPATH` before invoking the user’s
+interpreter. When embedding the recorder elsewhere, ensure the wheel (or its
+extracted site-packages directory) is discoverable on `sys.path` and run the CLI
+with the interpreter you want to trace.
+
+The CLI writes recorder metadata into `trace_metadata.json` describing the wheel
+version, target script, and diff preference so downstream tooling can make
+decisions without re-running the trace.
diff --git a/codetracer-python-recorder/codetracer_python_recorder/__main__.py b/codetracer-python-recorder/codetracer_python_recorder/__main__.py
@@ -1,89 +1,7 @@
-"""CLI to record a trace while running a Python script.
-
-Usage:
-    python -m codetracer_python_recorder [codetracer options] <script.py> [script args...]
-
-Codetracer options (must appear before the script path):
-    --codetracer-trace PATH             Output events file (default: trace.bin or trace.json)
-    --codetracer-format {binary,json}   Output format (default: binary)
-    --codetracer-capture-values BOOL    Whether to capture values (default: true)
-
-Examples:
-    python -m codetracer_python_recorder --codetracer-format=json app.py --flag=1
-    python -m codetracer_python_recorder --codetracer-trace=out.bin script.py --x=2
-    python -m codetracer_python_recorder --codetracer-capture-values=false script.py
-"""
+"""Thin wrapper for running the recorder CLI via ``python -m``."""
 from __future__ import annotations
 
-import runpy
-import sys
-from pathlib import Path
-
-from . import DEFAULT_FORMAT, start, stop
-import argparse
-
-
-def _default_trace_path(fmt: str) -> Path:
-    # Keep a simple filename; Rust side derives sidecars (metadata/paths)
-    if fmt == "json":
-        return Path.cwd() / "trace.json"
-    return Path.cwd() / "trace.bin"
-
-
-def main(argv: list[str] | None = None) -> int:
-    if argv is None:
-        argv = sys.argv[1:]
-
-    parser = argparse.ArgumentParser(add_help=True)
-    parser.add_argument(
-        "--codetracer-trace",
-        dest="trace",
-        default=None,
-        help="Path to trace folder. If omitted, defaults to trace.bin or trace.json in the current directory based on --codetracer-format.",
-    )
-    parser.add_argument(
-        "--codetracer-format",
-        dest="format",
-        choices=["binary", "json"],
-        default=DEFAULT_FORMAT,
-        help="Output format for trace events. 'binary' is compact; 'json' is human-readable. Default: %(default)s.",
-    )
-    # Only parse our options; leave script and script args in unknown
-    ns, unknown = parser.parse_known_args(argv)
-
-    # Validate that the first unknown token is a script path; otherwise show usage.
-    if not unknown or not Path(unknown[0]).exists():
-        sys.stderr.write("Usage: python -m codetracer_python_recorder [codetracer options] <script.py> [args...]\n")
-        return 2
-
-    script_path = Path(unknown[0]).resolve()
-    script_args = unknown[1:]
-
-    fmt = ns.format or DEFAULT_FORMAT
-    trace_path = Path(ns.trace) if ns.trace else _default_trace_path(fmt)
-
-    old_argv = sys.argv
-    sys.argv = [str(script_path)] + script_args
-    # Activate tracing only after entering the target script file.
-    session = start(
-        trace_path,
-        format=fmt,
-        start_on_enter=script_path,
-    )
-    try:
-        runpy.run_path(str(script_path), run_name="__main__")
-        return 0
-    except SystemExit as e:
-        # Preserve script's exit code
-        code = e.code if isinstance(e.code, int) else 1
-        return code
-    finally:
-        # Ensure tracer stops and files are flushed
-        try:
-            session.flush()
-        finally:
-            stop()
-            sys.argv = old_argv
+from .cli import main
 
 
 if __name__ == "__main__":  # pragma: no cover

diff --git a/codetracer-python-recorder/codetracer_python_recorder/cli.py b/codetracer-python-recorder/codetracer_python_recorder/cli.py
@@ -0,0 +1,206 @@
+"""Command-line interface for the Codetracer Python recorder."""
+from __future__ import annotations
+
+import argparse
+import json
+import runpy
+import sys
+from dataclasses import dataclass
+from importlib import metadata
+from pathlib import Path
+from typing import Iterable, Sequence
+
+from . import flush, start, stop
+from .formats import DEFAULT_FORMAT, SUPPORTED_FORMATS, normalize_format
+
+
+@dataclass(frozen=True)
+class RecorderCLIConfig:
+    """Resolved CLI options for a recorder invocation."""
+
+    trace_dir: Path
+    format: str
+    activation_path: Path
+    script: Path
+    script_args: list[str]
+
+
+def _default_trace_dir() -> Path:
+    return Path.cwd() / "trace-out"
+
+
+def _parse_args(argv: Sequence[str]) -> RecorderCLIConfig:
+    parser = argparse.ArgumentParser(
+        prog="codetracer_python_recorder",
+        description=(
+            "Record a trace for a Python script using the Codetracer runtime tracer. "
+            "All script arguments must be provided after the script path or a '--' separator."
+        ),
+        allow_abbrev=False,
+    )
+    parser.add_argument(
+        "--trace-dir",
+        type=Path,
+        default=_default_trace_dir(),
+        help=(
+            "Directory where trace artefacts will be written "
+            "(defaults to %(default)s relative to the current working directory)."
+        ),
+    )
+    parser.add_argument(
+        "--format",
+        default=DEFAULT_FORMAT,
+        help=(
+            "Trace serialisation format. Supported values: "
+            + ", ".join(sorted(SUPPORTED_FORMATS))
+            + f". Defaults to {DEFAULT_FORMAT}."
+        ),
+    )
+    parser.add_argument(
+        "--activation-path",
+        type=Path,
+        help=(
+            "Optional path used to gate tracing. When provided, tracing begins once the "
+            "interpreter enters this file. Defaults to the target script."
+        ),
+    )
+
+    known, remainder = parser.parse_known_args(argv)
+    pending: list[str] = list(remainder)
+    if not pending:
+        parser.error("missing script to execute")
+
+    if pending[0] == "--":
+        pending.pop(0)
+        if not pending:
+            parser.error("missing script path after '--'")
+
+    script_token = pending[0]
+    script_path = Path(script_token).expanduser()
+    if not script_path.exists():
+        parser.error(f"script '{script_path}' does not exist")
+    script_path = script_path.resolve()
+
+    script_args = pending[1:]
+    if script_args and script_args[0] == "--":
+        script_args = script_args[1:]
+
+    trace_dir = Path(known.trace_dir).expanduser().resolve()
+    fmt = normalize_format(known.format)
+    if fmt not in SUPPORTED_FORMATS:
+        parser.error(
+            f"unsupported trace format '{known.format}'. Expected one of: "
+            + ", ".join(sorted(SUPPORTED_FORMATS))
+        )
+
+    activation_path = (
+        Path(known.activation_path).expanduser().resolve()
+        if known.activation_path
+        else script_path
+    )
+
+    return RecorderCLIConfig(
+        trace_dir=trace_dir,
+        format=fmt,
+        activation_path=activation_path,
+        script=script_path,
+        script_args=script_args,
+    )
+
+
+def _resolve_package_version() -> str | None:
+    try:
+        return metadata.version("codetracer-python-recorder")
+    except metadata.PackageNotFoundError:  # pragma: no cover - dev checkout
+        return None
+
+
+def _serialise_metadata(
+    trace_dir: Path,
+    *,
+    script: Path,
+) -> None:
+    """Augment trace metadata with recorder-specific information."""
+    metadata_path = trace_dir / "trace_metadata.json"
+    try:
+        raw = metadata_path.read_text(encoding="utf-8")
+    except FileNotFoundError:
+        return
+
+    try:
+        payload = json.loads(raw) if raw else {}
+    except json.JSONDecodeError:
+        return
+
+    recorder_block = payload.setdefault(
+        "recorder",
+        {
+            "name": "codetracer_python_recorder",
+        },
+    )
+    if isinstance(recorder_block, dict):
+        recorder_block.setdefault("name", "codetracer_python_recorder")
+        recorder_block["target_script"] = str(script)
+        version = _resolve_package_version()
+        if version:
+            recorder_block["version"] = version
+    else:
+        # Unexpected schema — bail out without mutating further.
+        return
+
+    metadata_path.write_text(json.dumps(payload), encoding="utf-8")
+
+
+def main(argv: Iterable[str] | None = None) -> int:
+    """Entry point for ``python -m codetracer_python_recorder``."""
+    if argv is None:
+        argv = sys.argv[1:]
+
+    try:
+        config = _parse_args(list(argv))
+    except SystemExit:
+        # argparse already printed a helpful message; propagate exit code.
+        raise
+    except Exception as exc:  # pragma: no cover - defensive guardrail
+        sys.stderr.write(f"Failed to parse arguments: {exc}\n")
+        return 2
+
+    trace_dir = config.trace_dir
+    script_path = config.script
+    script_args = config.script_args
+
+    old_argv = sys.argv
+    sys.argv = [str(script_path)] + script_args
+
+    try:
+        start(
+            trace_dir,
+            format=config.format,
+            start_on_enter=config.activation_path,
+        )
+    except Exception as exc:
+        sys.stderr.write(f"Failed to start Codetracer session: {exc}\n")
+        sys.argv = old_argv
+        return 1
+
+    exit_code: int | None = None
+    try:
+        try:
+            runpy.run_path(str(script_path), run_name="__main__")
+        except SystemExit as exc:
+            exit_code = exc.code if isinstance(exc.code, int) else 1
+        else:
+            exit_code = 0
+    finally:
+        try:
+            flush()
+        finally:
+            stop()
+            sys.argv = old_argv
+
+    _serialise_metadata(trace_dir, script=script_path)
+
+    return exit_code if exit_code is not None else 0
+
+
+__all__ = ("main", "RecorderCLIConfig")
diff --git a/codetracer-python-recorder/pyproject.toml b/codetracer-python-recorder/pyproject.toml
@@ -12,6 +12,9 @@ classifiers = [
     "Programming Language :: Rust",
 ]
 
+[project.scripts]
+codetracer-python-recorder = "codetracer_python_recorder.cli:main"
+
 [tool.maturin]
 # Build the PyO3 extension module
 bindings = "pyo3"