Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@
build
*~
.idea/
.cargo/
.cargo/

**/*.egg-info/
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,8 @@

from . import api as _api
from .api import * # re-export public API symbols
from .auto_start import auto_start_from_env

auto_start_from_env()

__all__ = _api.__all__
142 changes: 6 additions & 136 deletions codetracer-python-recorder/codetracer_python_recorder/api.py
Original file line number Diff line number Diff line change
@@ -1,142 +1,12 @@
"""High-level tracing API built on a Rust backend.

This module exposes a minimal interface for starting and stopping
runtime traces. The heavy lifting is delegated to the
`codetracer_python_recorder` Rust extension which hooks
into `runtime_tracing` and `sys.monitoring`.
"""
"""High-level tracing API built on a Rust backend."""
from __future__ import annotations

import contextlib
import os
from pathlib import Path
from typing import Iterator, Optional

from .codetracer_python_recorder import (
flush_tracing as _flush_backend,
is_tracing as _is_tracing_backend,
start_tracing as _start_backend,
stop_tracing as _stop_backend,
)

TRACE_BINARY: str = "binary"
TRACE_JSON: str = "json"
DEFAULT_FORMAT: str = TRACE_BINARY

_active_session: Optional["TraceSession"] = None


def start(
path: os.PathLike | str,
*,
format: str = DEFAULT_FORMAT,
start_on_enter: os.PathLike | str | None = None,
) -> "TraceSession":
"""Start a global trace session.

- ``path``: Target directory where trace files will be written.
Files created: ``trace.json``/``trace.bin``, ``trace_metadata.json``, ``trace_paths.json``.
- ``format``: Either ``binary`` or ``json`` (controls events file name/format).
- ``start_on_enter``: Optional file path; when provided, tracing remains
paused until the tracer observes execution entering this file. Useful to
avoid recording interpreter and import startup noise when launching a
script via the CLI.

The current implementation records trace data through a Rust backend.
"""
global _active_session
if _is_tracing_backend():
raise RuntimeError("tracing already active")

trace_path = Path(path)
_start_backend(
str(trace_path),
format,
str(Path(start_on_enter)) if start_on_enter is not None else None,
)
session = TraceSession(path=trace_path, format=format)
_active_session = session
return session


def stop() -> None:
"""Stop the active trace session if one is running."""
global _active_session
if not _is_tracing_backend():
return
_stop_backend()
_active_session = None


def is_tracing() -> bool:
"""Return ``True`` when a trace session is active."""
return _is_tracing_backend()


def flush() -> None:
"""Flush buffered trace data.
from typing import Iterable

With the current placeholder implementation this is a no-op but the
function is provided to match the planned public API.
"""
if _is_tracing_backend():
_flush_backend()
from .formats import DEFAULT_FORMAT, TRACE_BINARY, TRACE_JSON
from .session import TraceSession, flush, is_tracing, start, stop, trace


@contextlib.contextmanager
def trace(
path: os.PathLike | str,
*,
format: str = DEFAULT_FORMAT,
) -> Iterator["TraceSession"]:
"""Context manager helper for scoped tracing."""
session = start(
path,
format=format,
)
try:
yield session
finally:
session.stop()


class TraceSession:
"""Handle representing a live tracing session."""

path: Path
format: str

def __init__(self, path: Path, format: str) -> None:
self.path = path
self.format = format

def stop(self) -> None:
"""Stop this trace session."""
if _active_session is self:
stop()

def flush(self) -> None:
"""Flush buffered trace data for this session."""
flush()

def __enter__(self) -> "TraceSession":
return self

def __exit__(self, exc_type, exc, tb) -> None: # pragma: no cover - thin wrapper
self.stop()


def _auto_start_from_env() -> None:
path = os.getenv("CODETRACER_TRACE")
if not path:
return
fmt = os.getenv("CODETRACER_FORMAT", DEFAULT_FORMAT)
start(path, format=fmt)


_auto_start_from_env()

__all__ = [
__all__: Iterable[str] = (
"TraceSession",
"DEFAULT_FORMAT",
"TRACE_BINARY",
Expand All @@ -146,4 +16,4 @@ def _auto_start_from_env() -> None:
"is_tracing",
"trace",
"flush",
]
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Environment-driven trace auto-start helper."""
from __future__ import annotations

import logging
import os
from typing import Iterable

from .formats import DEFAULT_FORMAT

ENV_TRACE_PATH = "CODETRACER_TRACE"
ENV_TRACE_FORMAT = "CODETRACER_FORMAT"

log = logging.getLogger(__name__)


def auto_start_from_env() -> None:
"""Start tracing automatically when the relevant environment variables are set."""
path = os.getenv(ENV_TRACE_PATH)
if not path:
return

# Delay import to avoid boot-time circular dependencies.
from . import session

if session.is_tracing():
log.debug("codetracer auto-start skipped: tracing already active")
return

fmt = os.getenv(ENV_TRACE_FORMAT, DEFAULT_FORMAT)
log.debug(
"codetracer auto-start triggered", extra={"trace_path": path, "format": fmt}
)
session.start(path, format=fmt)


__all__: Iterable[str] = (
"ENV_TRACE_FORMAT",
"ENV_TRACE_PATH",
"auto_start_from_env",
)
37 changes: 37 additions & 0 deletions codetracer-python-recorder/codetracer_python_recorder/formats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Trace format constants and helpers."""
from __future__ import annotations

from typing import Iterable

TRACE_BINARY: str = "binary"
TRACE_JSON: str = "json"
DEFAULT_FORMAT: str = TRACE_BINARY
SUPPORTED_FORMATS: frozenset[str] = frozenset({TRACE_BINARY, TRACE_JSON})


def normalize_format(value: str | None) -> str:
"""Normalise user-provided strings to the format names recognised by the backend.

The runtime currently accepts ``"binary"`` (plus legacy aliases handled
on the Rust side) and ``"json"``. Unknown formats fall back to the
lower-cased input so the backend can decide how to react; callers can
choose to guard against unsupported values by checking ``SUPPORTED_FORMATS``.
"""
if value is None:
return DEFAULT_FORMAT
return value.lower()


def is_supported(value: str) -> bool:
"""Return ``True`` if *value* is one of the officially supported formats."""
return value.lower() in SUPPORTED_FORMATS


__all__: Iterable[str] = (
"DEFAULT_FORMAT",
"TRACE_BINARY",
"TRACE_JSON",
"SUPPORTED_FORMATS",
"is_supported",
"normalize_format",
)
130 changes: 130 additions & 0 deletions codetracer-python-recorder/codetracer_python_recorder/session.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
"""Tracing session management helpers."""
from __future__ import annotations

import contextlib
from pathlib import Path
from typing import Iterator, Optional

from .codetracer_python_recorder import (
flush_tracing as _flush_backend,
is_tracing as _is_tracing_backend,
start_tracing as _start_backend,
stop_tracing as _stop_backend,
)
from .formats import DEFAULT_FORMAT, SUPPORTED_FORMATS, is_supported, normalize_format

_active_session: Optional["TraceSession"] = None


class TraceSession:
"""Handle representing a live tracing session."""

path: Path
format: str

def __init__(self, path: Path, format: str) -> None:
self.path = path
self.format = format

def stop(self) -> None:
"""Stop this trace session."""
if _active_session is self:
stop()

def flush(self) -> None:
"""Flush buffered trace data for this session."""
flush()

def __enter__(self) -> "TraceSession":
return self

def __exit__(self, exc_type, exc, tb) -> None: # pragma: no cover - thin wrapper
self.stop()


def start(
path: str | Path,
*,
format: str = DEFAULT_FORMAT,
start_on_enter: str | Path | None = None,
) -> TraceSession:
"""Start a new global trace session."""
global _active_session
if _is_tracing_backend():
raise RuntimeError("tracing already active")

trace_path = _validate_trace_path(Path(path))
normalized_format = _coerce_format(format)
activation_path = _normalize_activation_path(start_on_enter)

_start_backend(str(trace_path), normalized_format, activation_path)
session = TraceSession(path=trace_path, format=normalized_format)
_active_session = session
return session


def stop() -> None:
"""Stop the active trace session if one is running."""
global _active_session
if not _is_tracing_backend():
return
_stop_backend()
_active_session = None


def is_tracing() -> bool:
"""Return ``True`` when a trace session is active."""
return _is_tracing_backend()


def flush() -> None:
"""Flush buffered trace data."""
if _is_tracing_backend():
_flush_backend()


@contextlib.contextmanager
def trace(
path: str | Path,
*,
format: str = DEFAULT_FORMAT,
) -> Iterator[TraceSession]:
"""Context manager helper for scoped tracing."""
session = start(path, format=format)
try:
yield session
finally:
session.stop()


def _coerce_format(value: str) -> str:
normalized = normalize_format(value)
if not is_supported(normalized):
supported = ", ".join(sorted(SUPPORTED_FORMATS))
raise ValueError(
f"unsupported trace format '{value}'. Expected one of: {supported}"
)
return normalized


def _validate_trace_path(path: Path) -> Path:
path = path.expanduser()
if path.exists() and not path.is_dir():
raise ValueError("trace path exists and is not a directory")
return path


def _normalize_activation_path(value: str | Path | None) -> str | None:
if value is None:
return None
return str(Path(value).expanduser())


__all__ = (
"TraceSession",
"flush",
"is_tracing",
"start",
"stop",
"trace",
)
5 changes: 4 additions & 1 deletion codetracer-python-recorder/src/code_object.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! Shared code-object caching utilities for sys.monitoring callbacks.
use dashmap::DashMap;
use once_cell::sync::OnceCell;
use pyo3::prelude::*;
Expand Down Expand Up @@ -148,7 +150,8 @@ impl CodeObjectRegistry {
self.map
.entry(id)
.or_insert_with(|| Arc::new(CodeObjectWrapper::new(py, code)))
.clone() //AI? Why do we need to clone here?
// Clone the `Arc` so each caller receives its own reference-counted handle.
.clone()
}

/// Remove the wrapper for a given code id, if present.
Expand Down
Loading