Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions codetracer-python-recorder/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Codetracer Python Recorder

`codetracer-python-recorder` is the Rust-backed recorder module that powers Python
tracing inside Codetracer. The PyO3 extension exposes a small Python façade so
packaged environments (desktop bundles, `uv run`, virtualenvs) can start and stop
recording without shipping an additional interpreter.

## Command-line entry point

The wheel installs a console script named `codetracer-python-recorder` and the
package can also be invoked with `python -m codetracer_python_recorder`. Both
forms share the same arguments:

```bash
python -m codetracer_python_recorder \
--trace-dir ./trace-out \
--format json \
--activation-path app/main.py \
--with-diff \
app/main.py --arg=value
```

- `--trace-dir` (default: `./trace-out`) – directory that will receive
`trace.json`, `trace_metadata.json`, and `trace_paths.json`.
- `--format` – trace serialisation format (`binary` or `json`). Use `json` for
integration with the DB backend importer.
- `--activation-path` – optional gate that postpones tracing until the interpreter
executes this file (defaults to the target script).
- `--with-diff` / `--no-with-diff` – records the caller’s preference in
`trace_metadata.json`. The desktop Codetracer CLI is responsible for generating
diff artefacts; the recorder simply surfaces the flag.

All additional arguments are forwarded to the target script unchanged. The CLI
reuses whichever interpreter launches it so wrappers such as `uv run`, `pipx`,
or activated virtual environments behave identically to `python script.py`.

## Packaging expectations

Desktop installers add the wheel to `PYTHONPATH` before invoking the user’s
interpreter. When embedding the recorder elsewhere, ensure the wheel (or its
extracted site-packages directory) is discoverable on `sys.path` and run the CLI
with the interpreter you want to trace.

The CLI writes recorder metadata into `trace_metadata.json` describing the wheel
version, target script, and diff preference so downstream tooling can make
decisions without re-running the trace.
86 changes: 2 additions & 84 deletions codetracer-python-recorder/codetracer_python_recorder/__main__.py
Original file line number Diff line number Diff line change
@@ -1,89 +1,7 @@
"""CLI to record a trace while running a Python script.

Usage:
python -m codetracer_python_recorder [codetracer options] <script.py> [script args...]

Codetracer options (must appear before the script path):
--codetracer-trace PATH Output events file (default: trace.bin or trace.json)
--codetracer-format {binary,json} Output format (default: binary)
--codetracer-capture-values BOOL Whether to capture values (default: true)

Examples:
python -m codetracer_python_recorder --codetracer-format=json app.py --flag=1
python -m codetracer_python_recorder --codetracer-trace=out.bin script.py --x=2
python -m codetracer_python_recorder --codetracer-capture-values=false script.py
"""
"""Thin wrapper for running the recorder CLI via ``python -m``."""
from __future__ import annotations

import runpy
import sys
from pathlib import Path

from . import DEFAULT_FORMAT, start, stop
import argparse


def _default_trace_path(fmt: str) -> Path:
# Keep a simple filename; Rust side derives sidecars (metadata/paths)
if fmt == "json":
return Path.cwd() / "trace.json"
return Path.cwd() / "trace.bin"


def main(argv: list[str] | None = None) -> int:
if argv is None:
argv = sys.argv[1:]

parser = argparse.ArgumentParser(add_help=True)
parser.add_argument(
"--codetracer-trace",
dest="trace",
default=None,
help="Path to trace folder. If omitted, defaults to trace.bin or trace.json in the current directory based on --codetracer-format.",
)
parser.add_argument(
"--codetracer-format",
dest="format",
choices=["binary", "json"],
default=DEFAULT_FORMAT,
help="Output format for trace events. 'binary' is compact; 'json' is human-readable. Default: %(default)s.",
)
# Only parse our options; leave script and script args in unknown
ns, unknown = parser.parse_known_args(argv)

# Validate that the first unknown token is a script path; otherwise show usage.
if not unknown or not Path(unknown[0]).exists():
sys.stderr.write("Usage: python -m codetracer_python_recorder [codetracer options] <script.py> [args...]\n")
return 2

script_path = Path(unknown[0]).resolve()
script_args = unknown[1:]

fmt = ns.format or DEFAULT_FORMAT
trace_path = Path(ns.trace) if ns.trace else _default_trace_path(fmt)

old_argv = sys.argv
sys.argv = [str(script_path)] + script_args
# Activate tracing only after entering the target script file.
session = start(
trace_path,
format=fmt,
start_on_enter=script_path,
)
try:
runpy.run_path(str(script_path), run_name="__main__")
return 0
except SystemExit as e:
# Preserve script's exit code
code = e.code if isinstance(e.code, int) else 1
return code
finally:
# Ensure tracer stops and files are flushed
try:
session.flush()
finally:
stop()
sys.argv = old_argv
from .cli import main


if __name__ == "__main__": # pragma: no cover
Expand Down
206 changes: 206 additions & 0 deletions codetracer-python-recorder/codetracer_python_recorder/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
"""Command-line interface for the Codetracer Python recorder."""
from __future__ import annotations

import argparse
import json
import runpy
import sys
from dataclasses import dataclass
from importlib import metadata
from pathlib import Path
from typing import Iterable, Sequence

from . import flush, start, stop
from .formats import DEFAULT_FORMAT, SUPPORTED_FORMATS, normalize_format


@dataclass(frozen=True)
class RecorderCLIConfig:
"""Resolved CLI options for a recorder invocation."""

trace_dir: Path
format: str
activation_path: Path
script: Path
script_args: list[str]


def _default_trace_dir() -> Path:
return Path.cwd() / "trace-out"


def _parse_args(argv: Sequence[str]) -> RecorderCLIConfig:
parser = argparse.ArgumentParser(
prog="codetracer_python_recorder",
description=(
"Record a trace for a Python script using the Codetracer runtime tracer. "
"All script arguments must be provided after the script path or a '--' separator."
),
allow_abbrev=False,
)
parser.add_argument(
"--trace-dir",
type=Path,
default=_default_trace_dir(),
help=(
"Directory where trace artefacts will be written "
"(defaults to %(default)s relative to the current working directory)."
),
)
parser.add_argument(
"--format",
default=DEFAULT_FORMAT,
help=(
"Trace serialisation format. Supported values: "
+ ", ".join(sorted(SUPPORTED_FORMATS))
+ f". Defaults to {DEFAULT_FORMAT}."
),
)
parser.add_argument(
"--activation-path",
type=Path,
help=(
"Optional path used to gate tracing. When provided, tracing begins once the "
"interpreter enters this file. Defaults to the target script."
),
)

known, remainder = parser.parse_known_args(argv)
pending: list[str] = list(remainder)
if not pending:
parser.error("missing script to execute")

if pending[0] == "--":
pending.pop(0)
if not pending:
parser.error("missing script path after '--'")

script_token = pending[0]
script_path = Path(script_token).expanduser()
if not script_path.exists():
parser.error(f"script '{script_path}' does not exist")
script_path = script_path.resolve()

script_args = pending[1:]
if script_args and script_args[0] == "--":
script_args = script_args[1:]

trace_dir = Path(known.trace_dir).expanduser().resolve()
fmt = normalize_format(known.format)
if fmt not in SUPPORTED_FORMATS:
parser.error(
f"unsupported trace format '{known.format}'. Expected one of: "
+ ", ".join(sorted(SUPPORTED_FORMATS))
)

activation_path = (
Path(known.activation_path).expanduser().resolve()
if known.activation_path
else script_path
)

return RecorderCLIConfig(
trace_dir=trace_dir,
format=fmt,
activation_path=activation_path,
script=script_path,
script_args=script_args,
)


def _resolve_package_version() -> str | None:
try:
return metadata.version("codetracer-python-recorder")
except metadata.PackageNotFoundError: # pragma: no cover - dev checkout
return None


def _serialise_metadata(
trace_dir: Path,
*,
script: Path,
) -> None:
"""Augment trace metadata with recorder-specific information."""
metadata_path = trace_dir / "trace_metadata.json"
try:
raw = metadata_path.read_text(encoding="utf-8")
except FileNotFoundError:
return

try:
payload = json.loads(raw) if raw else {}
except json.JSONDecodeError:
return

recorder_block = payload.setdefault(
"recorder",
{
"name": "codetracer_python_recorder",
},
)
if isinstance(recorder_block, dict):
recorder_block.setdefault("name", "codetracer_python_recorder")
recorder_block["target_script"] = str(script)
version = _resolve_package_version()
if version:
recorder_block["version"] = version
else:
# Unexpected schema — bail out without mutating further.
return

metadata_path.write_text(json.dumps(payload), encoding="utf-8")


def main(argv: Iterable[str] | None = None) -> int:
"""Entry point for ``python -m codetracer_python_recorder``."""
if argv is None:
argv = sys.argv[1:]

try:
config = _parse_args(list(argv))
except SystemExit:
# argparse already printed a helpful message; propagate exit code.
raise
except Exception as exc: # pragma: no cover - defensive guardrail
sys.stderr.write(f"Failed to parse arguments: {exc}\n")
return 2

trace_dir = config.trace_dir
script_path = config.script
script_args = config.script_args

old_argv = sys.argv
sys.argv = [str(script_path)] + script_args

try:
start(
trace_dir,
format=config.format,
start_on_enter=config.activation_path,
)
except Exception as exc:
sys.stderr.write(f"Failed to start Codetracer session: {exc}\n")
sys.argv = old_argv
return 1

exit_code: int | None = None
try:
try:
runpy.run_path(str(script_path), run_name="__main__")
except SystemExit as exc:
exit_code = exc.code if isinstance(exc.code, int) else 1
else:
exit_code = 0
finally:
try:
flush()
finally:
stop()
sys.argv = old_argv

_serialise_metadata(trace_dir, script=script_path)

return exit_code if exit_code is not None else 0


__all__ = ("main", "RecorderCLIConfig")
3 changes: 3 additions & 0 deletions codetracer-python-recorder/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ classifiers = [
"Programming Language :: Rust",
]

[project.scripts]
codetracer-python-recorder = "codetracer_python_recorder.cli:main"

[tool.maturin]
# Build the PyO3 extension module
bindings = "pyo3"
Expand Down
Loading
Loading