diff --git a/codetracer-python-recorder/README.md b/codetracer-python-recorder/README.md new file mode 100644 index 0000000..aa71948 --- /dev/null +++ b/codetracer-python-recorder/README.md @@ -0,0 +1,46 @@ +# Codetracer Python Recorder + +`codetracer-python-recorder` is the Rust-backed recorder module that powers Python +tracing inside Codetracer. The PyO3 extension exposes a small Python façade so +packaged environments (desktop bundles, `uv run`, virtualenvs) can start and stop +recording without shipping an additional interpreter. + +## Command-line entry point + +The wheel installs a console script named `codetracer-python-recorder` and the +package can also be invoked with `python -m codetracer_python_recorder`. Both +forms share the same arguments: + +```bash +python -m codetracer_python_recorder \ + --trace-dir ./trace-out \ + --format json \ + --activation-path app/main.py \ + --with-diff \ + app/main.py --arg=value +``` + +- `--trace-dir` (default: `./trace-out`) – directory that will receive + `trace.json`, `trace_metadata.json`, and `trace_paths.json`. +- `--format` – trace serialisation format (`binary` or `json`). Use `json` for + integration with the DB backend importer. +- `--activation-path` – optional gate that postpones tracing until the interpreter + executes this file (defaults to the target script). +- `--with-diff` / `--no-with-diff` – records the caller’s preference in + `trace_metadata.json`. The desktop Codetracer CLI is responsible for generating + diff artefacts; the recorder simply surfaces the flag. + +All additional arguments are forwarded to the target script unchanged. The CLI +reuses whichever interpreter launches it so wrappers such as `uv run`, `pipx`, +or activated virtual environments behave identically to `python script.py`. + +## Packaging expectations + +Desktop installers add the wheel to `PYTHONPATH` before invoking the user’s +interpreter. When embedding the recorder elsewhere, ensure the wheel (or its +extracted site-packages directory) is discoverable on `sys.path` and run the CLI +with the interpreter you want to trace. + +The CLI writes recorder metadata into `trace_metadata.json` describing the wheel +version, target script, and diff preference so downstream tooling can make +decisions without re-running the trace. diff --git a/codetracer-python-recorder/codetracer_python_recorder/__main__.py b/codetracer-python-recorder/codetracer_python_recorder/__main__.py index 6982239..a7bb867 100644 --- a/codetracer-python-recorder/codetracer_python_recorder/__main__.py +++ b/codetracer-python-recorder/codetracer_python_recorder/__main__.py @@ -1,89 +1,7 @@ -"""CLI to record a trace while running a Python script. - -Usage: - python -m codetracer_python_recorder [codetracer options] [script args...] - -Codetracer options (must appear before the script path): - --codetracer-trace PATH Output events file (default: trace.bin or trace.json) - --codetracer-format {binary,json} Output format (default: binary) - --codetracer-capture-values BOOL Whether to capture values (default: true) - -Examples: - python -m codetracer_python_recorder --codetracer-format=json app.py --flag=1 - python -m codetracer_python_recorder --codetracer-trace=out.bin script.py --x=2 - python -m codetracer_python_recorder --codetracer-capture-values=false script.py -""" +"""Thin wrapper for running the recorder CLI via ``python -m``.""" from __future__ import annotations -import runpy -import sys -from pathlib import Path - -from . import DEFAULT_FORMAT, start, stop -import argparse - - -def _default_trace_path(fmt: str) -> Path: - # Keep a simple filename; Rust side derives sidecars (metadata/paths) - if fmt == "json": - return Path.cwd() / "trace.json" - return Path.cwd() / "trace.bin" - - -def main(argv: list[str] | None = None) -> int: - if argv is None: - argv = sys.argv[1:] - - parser = argparse.ArgumentParser(add_help=True) - parser.add_argument( - "--codetracer-trace", - dest="trace", - default=None, - help="Path to trace folder. If omitted, defaults to trace.bin or trace.json in the current directory based on --codetracer-format.", - ) - parser.add_argument( - "--codetracer-format", - dest="format", - choices=["binary", "json"], - default=DEFAULT_FORMAT, - help="Output format for trace events. 'binary' is compact; 'json' is human-readable. Default: %(default)s.", - ) - # Only parse our options; leave script and script args in unknown - ns, unknown = parser.parse_known_args(argv) - - # Validate that the first unknown token is a script path; otherwise show usage. - if not unknown or not Path(unknown[0]).exists(): - sys.stderr.write("Usage: python -m codetracer_python_recorder [codetracer options] [args...]\n") - return 2 - - script_path = Path(unknown[0]).resolve() - script_args = unknown[1:] - - fmt = ns.format or DEFAULT_FORMAT - trace_path = Path(ns.trace) if ns.trace else _default_trace_path(fmt) - - old_argv = sys.argv - sys.argv = [str(script_path)] + script_args - # Activate tracing only after entering the target script file. - session = start( - trace_path, - format=fmt, - start_on_enter=script_path, - ) - try: - runpy.run_path(str(script_path), run_name="__main__") - return 0 - except SystemExit as e: - # Preserve script's exit code - code = e.code if isinstance(e.code, int) else 1 - return code - finally: - # Ensure tracer stops and files are flushed - try: - session.flush() - finally: - stop() - sys.argv = old_argv +from .cli import main if __name__ == "__main__": # pragma: no cover diff --git a/codetracer-python-recorder/codetracer_python_recorder/cli.py b/codetracer-python-recorder/codetracer_python_recorder/cli.py new file mode 100644 index 0000000..59378bc --- /dev/null +++ b/codetracer-python-recorder/codetracer_python_recorder/cli.py @@ -0,0 +1,206 @@ +"""Command-line interface for the Codetracer Python recorder.""" +from __future__ import annotations + +import argparse +import json +import runpy +import sys +from dataclasses import dataclass +from importlib import metadata +from pathlib import Path +from typing import Iterable, Sequence + +from . import flush, start, stop +from .formats import DEFAULT_FORMAT, SUPPORTED_FORMATS, normalize_format + + +@dataclass(frozen=True) +class RecorderCLIConfig: + """Resolved CLI options for a recorder invocation.""" + + trace_dir: Path + format: str + activation_path: Path + script: Path + script_args: list[str] + + +def _default_trace_dir() -> Path: + return Path.cwd() / "trace-out" + + +def _parse_args(argv: Sequence[str]) -> RecorderCLIConfig: + parser = argparse.ArgumentParser( + prog="codetracer_python_recorder", + description=( + "Record a trace for a Python script using the Codetracer runtime tracer. " + "All script arguments must be provided after the script path or a '--' separator." + ), + allow_abbrev=False, + ) + parser.add_argument( + "--trace-dir", + type=Path, + default=_default_trace_dir(), + help=( + "Directory where trace artefacts will be written " + "(defaults to %(default)s relative to the current working directory)." + ), + ) + parser.add_argument( + "--format", + default=DEFAULT_FORMAT, + help=( + "Trace serialisation format. Supported values: " + + ", ".join(sorted(SUPPORTED_FORMATS)) + + f". Defaults to {DEFAULT_FORMAT}." + ), + ) + parser.add_argument( + "--activation-path", + type=Path, + help=( + "Optional path used to gate tracing. When provided, tracing begins once the " + "interpreter enters this file. Defaults to the target script." + ), + ) + + known, remainder = parser.parse_known_args(argv) + pending: list[str] = list(remainder) + if not pending: + parser.error("missing script to execute") + + if pending[0] == "--": + pending.pop(0) + if not pending: + parser.error("missing script path after '--'") + + script_token = pending[0] + script_path = Path(script_token).expanduser() + if not script_path.exists(): + parser.error(f"script '{script_path}' does not exist") + script_path = script_path.resolve() + + script_args = pending[1:] + if script_args and script_args[0] == "--": + script_args = script_args[1:] + + trace_dir = Path(known.trace_dir).expanduser().resolve() + fmt = normalize_format(known.format) + if fmt not in SUPPORTED_FORMATS: + parser.error( + f"unsupported trace format '{known.format}'. Expected one of: " + + ", ".join(sorted(SUPPORTED_FORMATS)) + ) + + activation_path = ( + Path(known.activation_path).expanduser().resolve() + if known.activation_path + else script_path + ) + + return RecorderCLIConfig( + trace_dir=trace_dir, + format=fmt, + activation_path=activation_path, + script=script_path, + script_args=script_args, + ) + + +def _resolve_package_version() -> str | None: + try: + return metadata.version("codetracer-python-recorder") + except metadata.PackageNotFoundError: # pragma: no cover - dev checkout + return None + + +def _serialise_metadata( + trace_dir: Path, + *, + script: Path, +) -> None: + """Augment trace metadata with recorder-specific information.""" + metadata_path = trace_dir / "trace_metadata.json" + try: + raw = metadata_path.read_text(encoding="utf-8") + except FileNotFoundError: + return + + try: + payload = json.loads(raw) if raw else {} + except json.JSONDecodeError: + return + + recorder_block = payload.setdefault( + "recorder", + { + "name": "codetracer_python_recorder", + }, + ) + if isinstance(recorder_block, dict): + recorder_block.setdefault("name", "codetracer_python_recorder") + recorder_block["target_script"] = str(script) + version = _resolve_package_version() + if version: + recorder_block["version"] = version + else: + # Unexpected schema — bail out without mutating further. + return + + metadata_path.write_text(json.dumps(payload), encoding="utf-8") + + +def main(argv: Iterable[str] | None = None) -> int: + """Entry point for ``python -m codetracer_python_recorder``.""" + if argv is None: + argv = sys.argv[1:] + + try: + config = _parse_args(list(argv)) + except SystemExit: + # argparse already printed a helpful message; propagate exit code. + raise + except Exception as exc: # pragma: no cover - defensive guardrail + sys.stderr.write(f"Failed to parse arguments: {exc}\n") + return 2 + + trace_dir = config.trace_dir + script_path = config.script + script_args = config.script_args + + old_argv = sys.argv + sys.argv = [str(script_path)] + script_args + + try: + start( + trace_dir, + format=config.format, + start_on_enter=config.activation_path, + ) + except Exception as exc: + sys.stderr.write(f"Failed to start Codetracer session: {exc}\n") + sys.argv = old_argv + return 1 + + exit_code: int | None = None + try: + try: + runpy.run_path(str(script_path), run_name="__main__") + except SystemExit as exc: + exit_code = exc.code if isinstance(exc.code, int) else 1 + else: + exit_code = 0 + finally: + try: + flush() + finally: + stop() + sys.argv = old_argv + + _serialise_metadata(trace_dir, script=script_path) + + return exit_code if exit_code is not None else 0 + + +__all__ = ("main", "RecorderCLIConfig") diff --git a/codetracer-python-recorder/pyproject.toml b/codetracer-python-recorder/pyproject.toml index 5bfc8ee..835a634 100644 --- a/codetracer-python-recorder/pyproject.toml +++ b/codetracer-python-recorder/pyproject.toml @@ -12,6 +12,9 @@ classifiers = [ "Programming Language :: Rust", ] +[project.scripts] +codetracer-python-recorder = "codetracer_python_recorder.cli:main" + [tool.maturin] # Build the PyO3 extension module bindings = "pyo3" diff --git a/codetracer-python-recorder/tests/python/test_cli_integration.py b/codetracer-python-recorder/tests/python/test_cli_integration.py new file mode 100644 index 0000000..33dc62a --- /dev/null +++ b/codetracer-python-recorder/tests/python/test_cli_integration.py @@ -0,0 +1,72 @@ +"""Integration tests for the recorder CLI entry point.""" +from __future__ import annotations + +import json +import os +import subprocess +import sys +from pathlib import Path + +import pytest + + +REPO_ROOT = Path(__file__).resolve().parents[2] + + +def _write_script(path: Path, body: str = "print('hello from recorder')\n") -> None: + path.write_text(body, encoding="utf-8") + + +def _run_cli( + args: list[str], + *, + cwd: Path, + env: dict[str, str], +) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [sys.executable, "-m", "codetracer_python_recorder", *args], + cwd=cwd, + env=env, + check=True, + capture_output=True, + text=True, + ) + + +def _prepare_env() -> dict[str, str]: + env = os.environ.copy() + pythonpath = env.get("PYTHONPATH", "") + root = str(REPO_ROOT) + env["PYTHONPATH"] = root if not pythonpath else os.pathsep.join([root, pythonpath]) + return env + + +def test_cli_emits_trace_artifacts(tmp_path: Path) -> None: + script = tmp_path / "program.py" + _write_script(script, "value = 21 + 21\nprint(value)\n") + + trace_dir = tmp_path / "trace" + env = _prepare_env() + args = [ + "--trace-dir", + str(trace_dir), + "--format", + "json", + ] + args.append(str(script)) + + result = _run_cli(args, cwd=tmp_path, env=env) + assert result.returncode == 0 + assert trace_dir.is_dir() + + events_file = trace_dir / "trace.json" + metadata_file = trace_dir / "trace_metadata.json" + paths_file = trace_dir / "trace_paths.json" + assert events_file.exists() + assert metadata_file.exists() + assert paths_file.exists() + + payload = json.loads(metadata_file.read_text(encoding="utf-8")) + recorder_info = payload.get("recorder", {}) + assert recorder_info.get("name") == "codetracer_python_recorder" + assert recorder_info.get("target_script") == str(script.resolve()) diff --git a/codetracer-python-recorder/tests/python/unit/test_cli.py b/codetracer-python-recorder/tests/python/unit/test_cli.py new file mode 100644 index 0000000..bed2007 --- /dev/null +++ b/codetracer-python-recorder/tests/python/unit/test_cli.py @@ -0,0 +1,66 @@ +"""Unit tests for the recorder CLI helpers.""" +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codetracer_python_recorder import formats +from codetracer_python_recorder.cli import _parse_args + + +def _write_script(path: Path) -> None: + path.write_text("print('cli test')\n", encoding="utf-8") + + +def test_parse_args_uses_defaults(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.chdir(tmp_path) + script = Path("sample.py") + _write_script(script) + + config = _parse_args([str(script)]) + + assert config.script == script.resolve() + assert config.script_args == [] + assert config.trace_dir == (tmp_path / "trace-out").resolve() + assert config.format == formats.DEFAULT_FORMAT + assert config.activation_path == script.resolve() + + +def test_parse_args_accepts_custom_trace_dir(tmp_path: Path) -> None: + script = tmp_path / "app.py" + _write_script(script) + trace_dir = tmp_path / "custom-trace" + + config = _parse_args(["--trace-dir", str(trace_dir), str(script)]) + + assert config.trace_dir == trace_dir.resolve() + + +def test_parse_args_validates_format(tmp_path: Path) -> None: + script = tmp_path / "main.py" + _write_script(script) + + with pytest.raises(SystemExit): + _parse_args(["--format", "yaml", str(script)]) + + +def test_parse_args_handles_activation_and_script_args(tmp_path: Path) -> None: + script = tmp_path / "prog.py" + _write_script(script) + activation = tmp_path / "activation.py" + _write_script(activation) + + config = _parse_args( + [ + "--activation-path", + str(activation), + str(script), + "--", + "--flag", + "value", + ] + ) + + assert config.activation_path == activation.resolve() + assert config.script_args == ["--flag", "value"] diff --git a/design-docs/adr/0005-python-recorder-db-backend-integration.md b/design-docs/adr/0005-python-recorder-db-backend-integration.md new file mode 100644 index 0000000..e58db31 --- /dev/null +++ b/design-docs/adr/0005-python-recorder-db-backend-integration.md @@ -0,0 +1,64 @@ +# ADR 0005: Wire the Rust/PyO3 Python Recorder into the Codetracer DB Backend + +- **Status:** Proposed +- **Date:** 2025-10-09 +- **Deciders:** Codetracer Runtime & Tooling Leads +- **Consulted:** Desktop Packaging, Python Platform WG, Release Engineering +- **Informed:** Developer Experience, Support, Product Management + +## Context + +We now have a Rust-based `codetracer_python_recorder` PyO3 extension that captures Python execution through `sys.monitoring` and emits the `runtime_tracing` event stream (`libs/codetracer-python-recorder/codetracer-python-recorder/src/lib.rs`). The module ships with a thin Python façade (`codetracer_python_recorder/session.py`) and is intended to become the canonical recorder for Python users. + +Inside the desktop Codetracer distribution, the `ct record` workflow still routes Python scripts through the legacy rr-based backend. That path is not portable across platforms, diverges from the new recorder API, and prevents us from delivering a unified CLI experience. Today only Ruby/Noir/WASM go through the self-contained db-backend (`src/ct/db_backend_record.nim`), so Python recordings inside the desktop app do not benefit from the same trace schema, caching, or upload flow. More importantly, developers expect `ct record foo.py` to behave exactly like `python foo.py` (or inside wrappers such as `uv run python foo.py`), reusing the same interpreter, virtual environment, and installed dependencies. + +To ship a single CLI/UI (`ct record`, `ct upload`) regardless of installation method, we must integrate the Rust-backed Python recorder into the db-backend flow used by other languages. The integration needs to ensure the recorder lives inside the desktop bundle (AppImage, DMG, upcoming Windows installer), the CLI resolves it without virtualenvs, and traces are imported via the same sqlite pipeline as Ruby. + +## Decision + +We will treat Python as a db-backend language inside Codetracer by adding a Python-specific launcher that invokes the PyO3 module, streams traces into the standard `trace.json`/`trace_metadata.json` format, and imports the results via `importDbTrace`. + +1. **Introduce `LangPythonDb`:** Extend `Lang` to include a db-backed variant for Python (`LangPythonDb`), mark it as db-based, and update language detection so `.py` scripts resolve to this enum when the bundled recorder is available. +2. **Bundle the Recorder Wheel:** During desktop builds (AppImage, DMG, future Windows installer) compile the `codetracer_python_recorder` wheel via maturin and ship it inside the distribution alongside its Python shims. Provide a small launcher script (`ct-python-recorder`) that lives next to the CLI binaries. +3. **CLI Invocation & Environment Parity:** Update `recordDb` so when `lang == LangPythonDb` it launches the *same* Python that the user’s shell would resolve for `python`/`python3` (or whatever interpreter is on `$PATH` inside wrappers such as `uv run`). The command will execute `-m codetracer_python_recorder` (or an equivalent entry point) inside the caller’s environment so that site-packages, virtualenvs, and tool-managed setups behave identically. If no interpreter is available, we surface the same error the user would see when running `python`, rather than falling back to a bundled runtime. +4. **Configuration Parity:** Respect the same flags (`--with-diff`, activation scopes, environment auto-start) by translating CLI options into recorder arguments/env vars, and inherit all user environment variables untouched. The db backend will continue to populate sqlite indices and cached metadata as it does for Ruby. +5. **Installer Hooks:** Ensure the bundled CLI exposes the recorder module without overriding interpreter discovery. Wrapper scripts should add our wheel to `PYTHONPATH` (or `CODERTRACER_RECORDER_PATH`) while deferring to the interpreter already active in the user’s shell (`uv`, `pipx`, virtualenv). On macOS/Linux this happens via scripts created by `installCodetracerOnPath`; the Windows installer will register similar shims. We will not ship a backup interpreter for unmatched environments. +6. **Failure Behaviour:** When interpreter discovery or module import fails, surface a structured error that matches what the user would experience running `python myscript.py`. The expectation is parity—if their environment cannot run the script, neither can `ct record`. + +This decision establishes the db-backend as the single ingestion interface for Codetracer traces, simplifying future features such as diff attachment, uploads, and analytics. + +## Alternatives Considered + +- **Keep Python on the rr backend:** Rejected because rr is not available on Windows/macOS ARM, adds heavyweight dependencies, and diverges from the new recorder capabilities (sys.monitoring, value capture). +- **Call the PyO3 recorder directly from Nim:** Rejected; embedding Python within the Nim process complicates packaging, GIL management, and conflicts with the existing external-process model used for other languages. +- **Ship separate Python-only bundles:** Rejected; it increases cognitive load and contradicts the goal of a unified `ct` CLI regardless of installation method. + +## Consequences + +- **Positive:** One recorder path across install surfaces, easier support and docs, leverage db-backend import tooling (diffs, uploads, cache), and users keep their existing interpreter/virtualenv semantics when invoking `ct record`. Packaging the wheel centralizes updates and keeps the CLI consistent with the pip experience. +- **Negative:** Desktop builds gain a maturin build step (longer CI), and we assume responsibility for distributing the PyO3 wheel across platforms. Interpreter discovery adds complexity when respecting arbitrary `python` shims (`uv run`, pyenv, poetry). Without a bundled fallback interpreter, misconfigured environments will fail fast and require user fixes. +- **Risks & Mitigations:** Wheel build failures will block installer pipelines—mitigate with cached artifacts and CI smoke tests. Interpreter mismatch remains the user’s responsibility; we provide clear diagnostics and docs on supported Python versions. + +## Key locations + +- `src/common/common_lang.nim` – add `LangPythonDb`, update `IS_DB_BASED`, and adapt language detection. +- `src/ct/trace/record.nim` – route Python recordings to `dbBackendRecordExe` and pass through recorder-specific arguments. +- `src/ct/db_backend_record.nim` – add a `LangPythonDb` branch that launches the embedded Python recorder CLI and imports the generated trace. +- `src/db-backend/src` – adjust import logic if additional metadata fields are required for Python traces. +- `libs/codetracer-python-recorder/**` – build configuration, PyO3 module entry points, and CLI wrappers that will be invoked by `ct record`. +- `appimage-scripts/` & `non-nix-build/` – package the Python recorder wheel into Linux/macOS distributions and expose the runner. +- `nix/**` & CI workflows – ensure development shells and pipelines can build the wheel and make it available to the desktop bundle. + +## Implementation Notes + +1. Create a maturin build step in installer pipelines that outputs wheels for the target platform and stage them under `resources/python/`. +2. Add a tiny launcher script (e.g., `bin/ct-python-recorder`) that amends `PYTHONPATH` to include the bundled wheel but defers to the interpreter in `$PATH` so wrappers like `uv run` or virtualenv activation continue to work—no backup interpreter is provided. +3. Extend `recordDb` with a Python branch that discovers the interpreter (`env["PYTHON"]`, `which python`, activated `sys.executable` within wrappers) and invokes the launcher with activation paths, output directories, and user arguments. If discovery fails, return an error mirroring `python`’s behaviour (e.g., “command not found”). +4. Update trace import tests to cover Python recordings end-to-end, ensuring sqlite metadata matches expectations. +5. Modify CLI help (`ct record --help`) and docs to note that Python recordings are now first-class within the desktop app. + +## Status & Next Steps + +- Draft ADR for feedback (this document). +- Spike installer support by building the wheel inside the AppImage pipeline and confirming it runs `ct record` on sample scripts. +- Once validated, mark this ADR **Accepted** and schedule the code changes behind a feature flag for phased rollout. diff --git a/design-docs/python-recorder-db-backend-implementation-plan.md b/design-docs/python-recorder-db-backend-implementation-plan.md new file mode 100644 index 0000000..34eca34 --- /dev/null +++ b/design-docs/python-recorder-db-backend-implementation-plan.md @@ -0,0 +1,64 @@ +# Python Recorder DB Backend Integration – Implementation Plan + +This plan tracks the work required to implement ADR 0005 (“Wire the Rust/PyO3 Python Recorder into the Codetracer DB Backend”). + +--- + +## Part 1 – Modifications to `codetracer-python-recorder` + +1. **Recorder CLI parity** + - Expose an explicit CLI entry point (e.g., `codetracer_python_recorder.__main__` / `cli.py`) that accepts trace directory, format, activation path, and diff flags mirroring `ct record`. + - Map CLI arguments to existing `start_tracing` API and ensure environment variables propagate unchanged. + - Add integration tests that execute the module via `python -m codetracer_python_recorder` to confirm argument handling and trace emission. + +2. **Trace artifact compatibility** + - Verify the recorder produces `trace.json`, `trace_paths.json`, and `trace_metadata.json` aligned with db-backend expectations (field names, metadata schema). + - Introduce fixtures or golden files if additional metadata (e.g., tool identifiers) must be appended. + +3. **Wheel packaging & layout** + - Update `pyproject.toml` / `Cargo.toml` to tag the wheel for the platforms we ship and to include the new CLI module in the distribution. + - Provide a tiny shim script (e.g., `bin/codetracer-python-recorder`) that simply invokes `python -m codetracer_python_recorder`. + - Extend the `Justfile`/CI workflow to build release wheels and run smoke tests against the CLI entry point. + +4. **Documentation & tooling** + - Add recorder CLI usage examples to `README` / design docs. + - Document expected environment variables (PYTHONPATH additions, activation behaviour) for installer integration. + +Deliverable: a new release of the `codetracer_python_recorder` wheel that the desktop bundle can consume without additional patches. + +--- + +## Part 2 – Modifications to the broader Codetracer codebase + +1. **Language detection & enums** + - Add `LangPythonDb` to `src/common/common_lang.nim`, set `IS_DB_BASED[LangPythonDb] = true`, and update `detectLang` to return the new enum when `.py` files are encountered. + +2. **`ct record` wiring** + - Extend `src/ct/trace/record.nim` to treat `LangPythonDb` like other db-backed languages, passing through user arguments, diff flags, and activation paths. + - Capture interpreter discovery (respect `$PYTHON`, current shell PATH, `sys.executable` inside wrappers) and surface clear errors when the executable is missing. + +3. **db-backend invocation** + - In `src/ct/db_backend_record.nim`, add a Python branch that launches the user’s interpreter with the packaged launcher (e.g., `python -m codetracer_python_recorder --trace-dir ...`). + - Ensure the subprocess inherits the current environment, including virtualenv variables, without modification. + - Reuse the existing import pipeline (`importDbTrace`) to ingest the generated trace artifacts. + +4. **Installer & packaging updates** + - Hook maturin wheel builds into AppImage / DMG / (future) Windows pipelines, staging the wheel and launcher under `resources/python/`. + - Update PATH-install scripts (`install_utils.nim`, installer shell scripts) to expose the launcher while deferring interpreter selection to the user’s environment. + - Add CI smoke tests that run `ct record examples/python_script.py` on each platform build artifact. + +5. **CLI UX & documentation** + - Update `ct record --help`, docs (`docs/book/src/installation.md`, CLI guides) and release notes to communicate Python parity expectations (“matches `python script.py` in the caller’s environment”). + +6. **Validation** + - Add end-to-end tests: record + upload a Python trace via the CLI inside a virtual environment and confirm trace metadata matches expectations. + - Ensure failure modes (missing interpreter, import errors) surface actionable messages. + +Deliverable: desktop Codetracer builds where `ct record` for Python scripts behaves identically to invoking `python` directly, using the user’s interpreter, while storing traces through the db-backend workflow. + +--- + +**Milestones** +1. Ship updated `codetracer_python_recorder` wheel with CLI parity (Part 1). +2. Land Codetracer integration (Part 2) behind a feature flag. +3. Remove the flag after cross-platform packaging and smoke tests succeed. diff --git a/design-docs/python-recorder-db-backend-implementation-plan.status.md b/design-docs/python-recorder-db-backend-implementation-plan.status.md new file mode 100644 index 0000000..00c3a4f --- /dev/null +++ b/design-docs/python-recorder-db-backend-implementation-plan.status.md @@ -0,0 +1,12 @@ +# Python Recorder DB Backend Integration – Status + +## Part 1 – `codetracer-python-recorder` +- ✅ CLI redesigned: accepts trace directory, format, activation path, and diff preference, recording the latter in metadata for the main Codetracer binary to act on. +- ✅ Trace artefacts (`trace.json`, `trace_metadata.json`, `trace_paths.json`) verified via end-to-end CLI execution; metadata now captures recorder details. +- ✅ Wheel packaging now installs a `codetracer-python-recorder` console script that shells out to `python -m codetracer_python_recorder`. +- ✅ New unit and integration tests cover argument parsing plus `python -m` execution to guard against regressions. +- ✅ README documents the CLI, flag semantics (including the fact that diff processing happens in the Codetracer CLI), and packaging expectations for installers. + +## Next Steps +- Coordinate with Part 2 owners to hook the new CLI into the db-backend flow inside the main Codetracer codebase. +- Extend CI/pipeline tasks to distribute the wheel artefacts across desktop bundles once the upstream integration lands.