metacraft-labs
diff --git a/‎codetracer-python-recorder/README.md‎
Lines changed: 4 additions & 4 deletions b/‎codetracer-python-recorder/README.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎codetracer-python-recorder/codetracer_python_recorder/cli.py‎
Lines changed: 1 addition & 1 deletion b/‎codetracer-python-recorder/codetracer_python_recorder/cli.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎codetracer-python-recorder/codetracer_python_recorder/session.py‎
Lines changed: 2 additions & 2 deletions b/‎codetracer-python-recorder/codetracer_python_recorder/session.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎codetracer-python-recorder/tests/python/unit/test_auto_start.py‎
Lines changed: 1 addition & 1 deletion b/‎codetracer-python-recorder/tests/python/unit/test_auto_start.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎codetracer-python-recorder/tests/python/unit/test_cli.py‎
Lines changed: 2 additions & 2 deletions b/‎codetracer-python-recorder/tests/python/unit/test_cli.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎codetracer-python-recorder/tests/python/unit/test_session_helpers.py‎
Lines changed: 1 addition & 1 deletion b/‎codetracer-python-recorder/tests/python/unit/test_session_helpers.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎design-docs/US0028 - Configurable Python trace filters.md‎
Lines changed: 3 additions & 3 deletions b/‎design-docs/US0028 - Configurable Python trace filters.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎design-docs/adr/0009-configurable-trace-filters.md‎
Lines changed: 3 additions & 3 deletions b/‎design-docs/adr/0009-configurable-trace-filters.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎design-docs/adr/0010-codetracer-python-recorder-benchmarking.md‎
Lines changed: 60 additions & 0 deletions b/‎design-docs/adr/0010-codetracer-python-recorder-benchmarking.md‎
Lines changed: 60 additions & 0 deletions
@@ -40,12 +40,12 @@ python -m codetracer_python_recorder \
   integration with the DB backend importer.
 - `--activation-path` – optional gate that postpones tracing until the interpreter
   executes this file (defaults to the target script).
-- `--trace-filter` – path to a filter file. Provide multiple times or use `//`
+- `--trace-filter` – path to a filter file. Provide multiple times or use `::`
   separators within a single argument to build a chain. When present, the recorder
   prepends the project default `.codetracer/trace-filter.toml` (if found near the
   target script) so later entries override the defaults. The
-  `CODETRACER_TRACE_FILTER` environment variable accepts the same syntax when using
-  the auto-start hook.
+  `CODETRACER_TRACE_FILTER` environment variable accepts the same `::`-separated
+  syntax when using the auto-start hook.
 
 All additional arguments are forwarded to the target script unchanged. The CLI
 reuses whichever interpreter launches it so wrappers such as `uv run`, `pipx`,
@@ -54,7 +54,7 @@ or activated virtual environments behave identically to `python script.py`.
 ## Trace filter configuration
 - Filter files are TOML with `[meta]`, `[scope]`, and `[[scope.rules]]` tables. Rules evaluate in declaration order and can tweak both execution (`exec`) and value decisions (`value_default`).
 - Supported selector domains: `pkg`, `file`, `obj` for scopes; `local`, `global`, `arg`, `ret`, `attr` for value policies. Match types default to `glob` and also accept `regex` or `literal` (e.g. `local:regex:^(metric|masked)_\w+$`).
-- Default discovery: `.codetracer/trace-filter.toml` next to the traced script. Chain additional files via CLI (`--trace-filter path_a --trace-filter path_b`), environment variable (`CODETRACER_TRACE_FILTER=path_a//path_b`), or Python helpers (`trace(..., trace_filter=[path_a, path_b])`). Later entries override earlier ones when selectors overlap.
+- Default discovery: `.codetracer/trace-filter.toml` next to the traced script. Chain additional files via CLI (`--trace-filter path_a --trace-filter path_b`), environment variable (`CODETRACER_TRACE_FILTER=path_a::path_b`), or Python helpers (`trace(..., trace_filter=[path_a, path_b])`). Later entries override earlier ones when selectors overlap.
 - Runtime metadata captures the active chain under `trace_metadata.json -> trace_filter`, including per-kind redaction counters. See `docs/onboarding/trace-filters.md` for the full DSL reference and examples.
 
 Example snippet:
 
@@ -71,7 +71,7 @@ def _parse_args(argv: Sequence[str]) -> RecorderCLIConfig:
         action="append",
         help=(
             "Path to a trace filter file. Provide multiple times to chain filters; "
-            "specify multiple paths within a single argument using '//' separators. "
+            "specify multiple paths within a single argument using '::' separators. "
             "Filters load after any project default '.codetracer/trace-filter.toml' so "
             "later entries override earlier ones; the CODETRACER_TRACE_FILTER "
             "environment variable accepts the same syntax for env auto-start."
 
@@ -76,7 +76,7 @@ def start(
         the referenced file.
     trace_filter:
         Optional filter specification. Accepts a path-like object, an iterable
-        of path-like objects, or a string containing ``//``-separated paths.
+        of path-like objects, or a string containing ``::``-separated paths.
         Paths are expanded to absolute locations and must exist.
     policy:
         Optional mapping of runtime policy overrides forwarded to
@@ -224,7 +224,7 @@ def _extract_filter_segments(
 
 
 def _split_filter_spec(value: str) -> list[str]:
-    parts = [segment.strip() for segment in value.split("//")]
+    parts = [segment.strip() for segment in value.split("::")]
     return [segment for segment in parts if segment]
 
 
 
@@ -43,7 +43,7 @@ def fake_stop_backend() -> None:
 
     monkeypatch.setenv(auto_start.ENV_TRACE_PATH, str(trace_dir))
     monkeypatch.setenv(
-        auto_start.ENV_TRACE_FILTER, f"{default_filter}//{override_filter}"
+        auto_start.ENV_TRACE_FILTER, f"{default_filter}::{override_filter}"
     )
 
     monkeypatch.setattr(session, "_start_backend", fake_start_backend)
 
@@ -132,12 +132,12 @@ def test_parse_args_collects_trace_filters(tmp_path: Path) -> None:
             "--trace-filter",
             str(filter_a),
             "--trace-filter",
-            f"{filter_b}//{filter_a}",
+            f"{filter_b}::{filter_a}",
             str(script),
         ]
     )
 
-    assert config.trace_filter == (str(filter_a), f"{filter_b}//{filter_a}")
+    assert config.trace_filter == (str(filter_a), f"{filter_b}::{filter_a}")
 
 
 def test_parse_args_enables_io_capture_fd_mirroring(tmp_path: Path) -> None:
 
@@ -140,7 +140,7 @@ def test_normalize_trace_filter_expands_sequence(tmp_path: Path) -> None:
     overrides.write_text("# overrides\n", encoding="utf-8")
 
     result = session._normalize_trace_filter(
-        [default, f"{overrides}//{default}", overrides]
+        [default, f"{overrides}::{default}", overrides]
     )
 
     assert result == [
 
@@ -30,7 +30,7 @@ As a **Python team lead**, I want **a powerful configuration language to filter
   - When I inspect a trace event  
   - Then only the allowed variables are serialized, with excluded variables redacted
 - [ ] Scenario: Merge multiple filter files  
-  - Given I provide a base filter `filters/common.trace` and user-specific overrides `filters/local.trace` combined as `filters/common.trace//filters/local.trace`  
+  - Given I provide a base filter `filters/common.trace` and user-specific overrides `filters/local.trace` combined as `filters/common.trace::filters/local.trace`  
   - When I launch the recorder  
   - Then the merged configuration applies deterministic precedence and validation before tracing starts
 - [ ] Scenario: Default filter protects secrets  
@@ -47,7 +47,7 @@ As a **Python team lead**, I want **a powerful configuration language to filter
 - **Value Capture Controls**: Within each scope rule, evaluate a top-down list of value patterns (locals, globals, arguments, return payloads, and optionally nested attributes). Patterns resolve to allow-or-deny decisions, with denied values redacted while preserving variable names to indicate omission.
 - **I/O Capture Toggle**: Expose a filter flag to enable/disable interceptors for stdout, stderr, stdin, and file descriptors, aligning with the concurrent IO capture effort.
 - **Configuration Source**: Filters live in a human-editable file (default path: `<project_root>/.codetracer/trace-filter.cfg`) and can be overridden via CLI/API parameters for alternate locations.
-- **Filter Composition**: Support chained composition using the `filter_a//filter_b` syntax, where later filters extend or override earlier ones with clear conflict resolution rules.
+- **Filter Composition**: Support chained composition using the `filter_a::filter_b` syntax, where later filters extend or override earlier ones with clear conflict resolution rules.
 - **Default Policy**: Ship a curated default filter that aggressively redacts common secrets (tokens, passwords, keys) and excludes sensitive system modules. This fallback activates when no project filter is found.
 
 ## Unified Scope Selector Format
@@ -308,7 +308,7 @@ The recorder validates filter files against the schema below. Keys not listed ar
       - `reason` *(string, optional)*: Document why the pattern exists.  
 
 ### Composition Semantics
-- Filters may be combined via `filter_a//filter_b`. Evaluation walks the chain left → right; later filters override earlier ones when keys conflict.
+- Filters may be combined via `filter_a::filter_b`. Evaluation walks the chain left → right; later filters override earlier ones when keys conflict.
 - `inherit` defaults carry the value from the previous filter in the chain; if no prior value exists, validation fails with a descriptive error.
 - `scope.rules` arrays merge by appending, so rules contributed by later filters execute after earlier ones and can override them through ordered evaluation.
 - Nested `value_patterns` arrays also append, preserving the expectation that later entries refine or replace earlier decisions.
 
@@ -17,7 +17,7 @@
 We must let maintainers author deterministic filters that:
 - Enable or disable tracing for specific packages, files, or fully qualified code objects with glob/regex support.
 - Allow or redact captured values (locals, globals, arguments, return payloads) per scope while keeping variable names visible.
-- Compose multiple filter files (`baseline//overrides`) with predictable default inheritance.
+- Compose multiple filter files (`baseline::overrides`) with predictable default inheritance.
 
 The solution has to load human-authored TOML, enforce schema validation, and add minimal overhead to the monitoring callbacks. Policy errors must surface as structured `RecorderError` instances.
 
@@ -26,7 +26,7 @@ The solution has to load human-authored TOML, enforce schema validation, and add
    - Parse TOML using `serde` + `toml` with `deny_unknown_fields`.
    - Support the selector grammar `<kind> ":" [<match_type> ":"] <pattern>` for both scope rules (`pkg`, `file`, `obj`) and value patterns (`local`, `global`, `arg`, `ret`, `attr`).
    - Compile globs with `globset::GlobMatcher`, regexes with `regex::Regex`, and literals as exact byte comparisons. Keep compiled matchers alongside original text for diagnostics.
-   - Resolve `inherit` defaults while chaining multiple files (split on `//`). Later files append to the ordered rule list; `value_patterns` are likewise appended.
+   - Resolve `inherit` defaults while chaining multiple files (split on `::`). Later files append to the ordered rule list; `value_patterns` are likewise appended.
 2. **Expose filter loading at session bootstrap.**
    - Extend `TraceSessionBootstrap` to locate the default project filter (`<cwd>/.codetracer/trace-filter.toml` up the directory tree) and accept optional override specs from CLI, Python API, or env (`CODETRACER_TRACE_FILTER`).
    - Parse each provided file once per `start_tracing` call. Propagate `RecorderError` on IO or schema failures with context about the offending selector.
@@ -39,7 +39,7 @@ The solution has to load human-authored TOML, enforce schema validation, and add
    - Augment `capture_call_arguments`, `record_visible_scope`, and `record_return_value` to accept a `ValuePolicy`. Encode real values for `Allow` and emit a reusable redaction record (`ValueRecord::Error { msg: "<redacted>" }`) for `Deny`.
    - Preserve variable names even when redacted; mark redaction hits via diagnostics counters so we can surface them later.
 4. **Surface configuration from Python.**
-   - Extend `codetracer_python_recorder.session.start` with a `trace_filter` keyword accepting a string or pathlike. Accept the same parameter on the CLI as `--trace-filter`, honouring `filter_a//filter_b` composition or repeated flags.
+   - Extend `codetracer_python_recorder.session.start` with a `trace_filter` keyword accepting a string or pathlike. Accept the same parameter on the CLI as `--trace-filter`, honouring `filter_a::filter_b` composition or repeated flags.
    - Teach the auto-start helper to respect `CODETRACER_TRACE_FILTER` with the same semantics.
    - Provide `codetracer_python_recorder.codetracer_python_recorder.configure_trace_filter(path_spec: str | None)` to preload/clear filters for embedding scenarios.
 5. **Diagnostics and metadata.**
 
@@ -0,0 +1,60 @@
+# 0010 – Codetracer Python Recorder Benchmarking
+
+## Status
+Proposed – pending review and implementation sequencing (target: post-configurable-trace-filter release).
+
+## Context
+- The Rust-backed `codetracer-python-recorder` now exposes configurable trace filters (WS1–WS6) and baseline micro/perf smoke benchmarks, but these are developer-only workflows with no CI visibility or historical tracking.
+- Performance regressions are difficult to detect: Criterion runs produce local reports, the Python smoke benchmark is opt-in, and CI currently exercises only functional correctness.
+- Product direction demands confidence that new features (filters, IO capture, PyO3 integration, policy changes) do not introduce unacceptable overhead or redaction slippage across representative workloads.
+- We require an auditable, automated benchmarking strategy that integrates with existing tooling (`just`, `uv`, Nix flake, GitHub Actions/Jenkins) and surfaces trends to the team without burdening release cadence.
+
+## Decision
+We will build a first-class benchmarking suite for `codetracer-python-recorder` with three pillars:
+
+1. **Deterministic harness coverage**
+   - Preserve the existing Criterion microbench (`benches/trace_filter.rs`) and Python smoke benchmark, expanding them into a common `bench` workspace with reusable fixtures and scenario definitions (baseline, glob, regex, IO-heavy, auto-start).
+   - Introduce additional Rust benches for runtime hot paths (scope resolution, redaction policy application, telemetry writes) under `codetracer-python-recorder/benches/`.
+   - Add Python benchmarks (Pytest plugins + `pytest-benchmark` or custom timers) for end-to-end CLI runs, session API usage, and cross-process start/stop costs.
+
+2. **Automated execution & artefacts**
+   - Create a dedicated `just bench-all` (or extend `just bench`) command that orchestrates all benchmarks, produces structured JSON summaries (`target/perf/*.json`), and archives raw outputs (Criterion reports, flamegraphs when enabled).
+   - Provide a stable JSON schema capturing metadata (git SHA, platform, interpreter versions), scenario descriptors, statistics (p50/p95/mean, variance), and thresholds.
+   - Ship a lightweight renderer (`scripts/render_bench_report.py`) that compares current results against the latest baseline stored in CI artefacts.
+
+3. **CI integration & historical tracking**
+   - Add a continuous benchmark job (nightly and pull-request optional) that executes the suite inside the Nix shell (ensuring gnuplot/nodeps), uploads artefacts to GitHub Actions artefacts for long-term storage, and posts summary comments in PRs.
+   - Maintain baseline snapshots in-repo (`codetracer-python-recorder/benchmarks/baselines/*.json`) refreshed on release branches after running on dedicated hardware.
+   - Gate merges when regressions exceed configured tolerances (e.g., >5% slowdowns on primary scenarios) unless explicitly approved.
+
+Supporting practices:
+- Store benchmark configuration alongside code (`benchconfig.toml`) to keep scenarios versioned and reviewable.
+- Ensure opt-in developer tooling (`just bench`) remains fast by allowing subset filters (e.g., `JUST_BENCH_SCENARIOS=filters,session`).
+
+## Rationale
+- **Consistency:** Centralising definitions and outputs ensures that local runs and CI share identical workflows, reducing “works on my machine” drift.
+- **Observability:** Structured artefacts + historical storage let us graph trends, spot regressions early, and correlate with feature work.
+- **Scalability:** By codifying thresholds and baselines, we can expand the suite without rethinking CI each time (e.g., adding memory benchmarks).
+- **Maintainability:** Versioned configuration and scripts avoid ad-hoc shell pipelines and make it easy for contributors to extend benchmarks.
+
+## Consequences
+Positive:
+- Faster detection of performance regressions and validation of expected improvements.
+- Shared language for performance goals (scenarios, metrics, thresholds) across Rust and Python components.
+- Developers gain confidence via `just bench` parity with CI, plus local comparison tooling.
+
+Negative / Risks:
+- Running the full suite may increase CI time; we mitigate by scheduling nightly runs and allowing PR opt-in toggles.
+- Maintaining baselines requires disciplined updates whenever we intentionally change performance characteristics.
+- Additional scripts and artefacts introduce upkeep; we must document workflows and automate cleanup.
+
+Mitigations:
+- Provide partial runs (`just bench --scenarios filters`, `pytest ... -k benchmark`) for quick iteration.
+- Automate baseline updates via a `scripts/update_bench_baseline.py` helper with reviewable diffs.
+- Document the suite in `docs/onboarding/trace-filters.md` (updated) and a new benchmarking guide.
+
+## References
+- `codetracer-python-recorder/benches/trace_filter.rs` (current microbench harness).
+- `codetracer-python-recorder/tests/python/perf/test_trace_filter_perf.py` (Python smoke benchmark).
+- `Justfile` (`bench` recipe) and `nix/flake.nix` (dev shell dependencies, now including gnuplot).
+- Storage backend for historical data (settled: GitHub Actions artefacts).
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@ def fake_stop_backend() -> None:`
`43`	`43`
`44`	`44`	`monkeypatch.setenv(auto_start.ENV_TRACE_PATH, str(trace_dir))`
`45`	`45`	`monkeypatch.setenv(`
`46`		`- auto_start.ENV_TRACE_FILTER, f"{default_filter}//{override_filter}"`
	`46`	`+ auto_start.ENV_TRACE_FILTER, f"{default_filter}::{override_filter}"`
`47`	`47`	`)`
`48`	`48`
`49`	`49`	`monkeypatch.setattr(session, "_start_backend", fake_start_backend)`
Original file line number	Diff line number	Diff line change
`@@ -132,12 +132,12 @@ def test_parse_args_collects_trace_filters(tmp_path: Path) -> None:`
`132`	`132`	`"--trace-filter",`
`133`	`133`	`str(filter_a),`
`134`	`134`	`"--trace-filter",`
`135`		`- f"{filter_b}//{filter_a}",`
	`135`	`+ f"{filter_b}::{filter_a}",`
`136`	`136`	`str(script),`
`137`	`137`	`]`
`138`	`138`	`)`
`139`	`139`
`140`		`- assert config.trace_filter == (str(filter_a), f"{filter_b}//{filter_a}")`
	`140`	`+ assert config.trace_filter == (str(filter_a), f"{filter_b}::{filter_a}")`
`141`	`141`
`142`	`142`
`143`	`143`	`def test_parse_args_enables_io_capture_fd_mirroring(tmp_path: Path) -> None:`
Original file line number	Diff line number	Diff line change
`@@ -140,7 +140,7 @@ def test_normalize_trace_filter_expands_sequence(tmp_path: Path) -> None:`
`140`	`140`	`overrides.write_text("# overrides\n", encoding="utf-8")`
`141`	`141`
`142`	`142`	`result = session._normalize_trace_filter(`
`143`		`- [default, f"{overrides}//{default}", overrides]`
	`143`	`+ [default, f"{overrides}::{default}", overrides]`
`144`	`144`	`)`
`145`	`145`
`146`	`146`	`assert result == [`