From 4adc9399c9d8b48cf3aca1c442c12d99bb7398a4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 08:57:12 +0000 Subject: [PATCH 1/8] Initial plan From 1d0a1561eaa736cc182bba2ba4c034334b89be17 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 09:35:43 +0000 Subject: [PATCH 2/8] feat: implement closed-loop execution/reconciliation/memory lanes for repo-architect - Add WorkItem dataclass and durable work_state.json (memory lane) - Add select_ready_issue() with deterministic priority-ordered selection - Add delegate_to_copilot() with dry-run/live mode and GitHub API integration - Add reconcile_pr_state() ingesting open/merged/closed/stale PR outcomes - Add run_execution_cycle() and run_reconciliation_cycle() entry points - Update run_issue_cycle() to filter out already-in-progress fingerprints - Add lifecycle labels: ready-for-delegation, in-progress, pr-open, merged, blocked, superseded - Add OBJECTIVE_LABELS dict with 7 canonical architectural objectives - Extend Config with 7 new operator control fields - Validate new env vars: ACTIVE_OBJECTIVE, ENABLE_LIVE_DELEGATION, MAX_CONCURRENT_DELEGATED, STALE_TIMEOUT_DAYS, RECONCILIATION_WINDOW_DAYS - Add --mode execution/reconcile CLI args + operator flags - Add dedicated workflow files: repo-architect-execution.yml (every 2h), repo-architect-reconcile.yml (every 4h) - Simplify repo-architect.yml schedule to planning only; remove invalid github.event.schedule conditions - Add 47 new tests across 7 test classes (231 total, all passing) - Rewrite OPERATOR_GUIDE.md to document all four lanes, work state schema, label lifecycle, dry-run vs live, and scheduled automation Co-authored-by: Steake <530040+Steake@users.noreply.github.com> --- .../workflows/repo-architect-execution.yml | 124 +++ .../workflows/repo-architect-reconcile.yml | 94 ++ .github/workflows/repo-architect.yml | 51 +- docs/repo_architect/OPERATOR_GUIDE.md | 500 ++++++++-- repo_architect.py | 910 +++++++++++++++++- tests/test_repo_architect.py | 627 ++++++++++++ 6 files changed, 2192 insertions(+), 114 deletions(-) create mode 100644 .github/workflows/repo-architect-execution.yml create mode 100644 .github/workflows/repo-architect-reconcile.yml diff --git a/.github/workflows/repo-architect-execution.yml b/.github/workflows/repo-architect-execution.yml new file mode 100644 index 0000000..4cdbbde --- /dev/null +++ b/.github/workflows/repo-architect-execution.yml @@ -0,0 +1,124 @@ +name: repo-architect-execution + +# Execution lane: select one ready issue and delegate it to Copilot. +# Runs every 2 hours on a schedule, or on workflow_dispatch. +# Uses a per-repository concurrency group to prevent duplicate delegations. + +on: + workflow_dispatch: + inputs: + enable_live_delegation: + description: 'Actually delegate to Copilot via GitHub API (false = dry-run report only)' + required: false + default: 'false' + type: choice + options: + - 'false' + - 'true' + active_objective: + description: 'Restrict issue selection to a specific objective (blank = any)' + required: false + default: '' + type: choice + options: + - '' + - restore-parse-correctness + - eliminate-import-cycles + - converge-runtime-structure + - normalise-knowledge-substrate + - isolate-agent-boundaries + - reduce-architecture-score-risk + - add-consciousness-instrumentation + lane_filter: + description: 'Restrict issue selection to a specific charter lane (blank = any)' + required: false + default: '' + type: string + max_concurrent_delegated: + description: 'Max number of issues simultaneously in-flight' + required: false + default: '1' + type: choice + options: + - '1' + - '2' + - '3' + stale_timeout_days: + description: 'Days before a delegated-but-PR-less item is marked stale' + required: false + default: '14' + type: string + schedule: + # Every 2 hours + - cron: '37 */2 * * *' + +concurrency: + group: repo-architect-execution-${{ github.repository }} + cancel-in-progress: false # never cancel in-flight delegation runs + +permissions: + contents: read + issues: write + models: read + +jobs: + execute: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Ensure artifact directories exist + run: mkdir -p .agent docs/repo_architect + + - name: Run repo architect (execution mode) + env: + GITHUB_TOKEN: ${{ github.token }} + GITHUB_REPO: ${{ github.repository }} + REPO_ARCHITECT_BRANCH_SUFFIX: ${{ github.run_id }}-${{ github.run_attempt }} + ENABLE_LIVE_DELEGATION: ${{ github.event.inputs.enable_live_delegation || 'false' }} + MAX_CONCURRENT_DELEGATED: ${{ github.event.inputs.max_concurrent_delegated || '1' }} + ACTIVE_OBJECTIVE: ${{ github.event.inputs.active_objective || '' }} + LANE_FILTER: ${{ github.event.inputs.lane_filter || '' }} + STALE_TIMEOUT_DAYS: ${{ github.event.inputs.stale_timeout_days || '14' }} + run: | + EXTRA_ARGS="" + if [ "$ENABLE_LIVE_DELEGATION" = "true" ]; then + EXTRA_ARGS="$EXTRA_ARGS --enable-live-delegation" + fi + if [ -n "$ACTIVE_OBJECTIVE" ]; then + EXTRA_ARGS="$EXTRA_ARGS --active-objective $ACTIVE_OBJECTIVE" + fi + if [ -n "$LANE_FILTER" ]; then + EXTRA_ARGS="$EXTRA_ARGS --lane-filter $LANE_FILTER" + fi + python repo_architect.py --allow-dirty --mode execution \ + --max-concurrent-delegated "$MAX_CONCURRENT_DELEGATED" \ + --stale-timeout-days "$STALE_TIMEOUT_DAYS" \ + $EXTRA_ARGS + + - name: Upload execution artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: repo-architect-execution-${{ github.run_id }} + path: .agent + if-no-files-found: warn + retention-days: 7 + + - name: Write workflow summary + if: always() + run: | + echo "## repo-architect execution run summary" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "- **mode**: \`execution\`" >> $GITHUB_STEP_SUMMARY + echo "- **live delegation**: \`${{ github.event.inputs.enable_live_delegation || 'false' }}\`" >> $GITHUB_STEP_SUMMARY + echo "- **active objective**: \`${{ github.event.inputs.active_objective || 'any' }}\`" >> $GITHUB_STEP_SUMMARY + echo "- **run**: [${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/repo-architect-reconcile.yml b/.github/workflows/repo-architect-reconcile.yml new file mode 100644 index 0000000..24c69b6 --- /dev/null +++ b/.github/workflows/repo-architect-reconcile.yml @@ -0,0 +1,94 @@ +name: repo-architect-reconcile + +# Reconciliation lane: ingest PR outcomes back into durable work state. +# Runs every 4 hours on a schedule, or on workflow_dispatch. +# Uses a per-repository concurrency group to prevent overlapping reconciliation runs. + +on: + workflow_dispatch: + inputs: + stale_timeout_days: + description: 'Days before a delegated-but-PR-less item is marked stale' + required: false + default: '14' + type: string + reconciliation_window_days: + description: 'Days of PRs to consider during reconciliation' + required: false + default: '30' + type: string + dry_run: + description: 'Dry-run: read PR state but do not update lifecycle labels on issues' + required: false + default: 'false' + type: choice + options: + - 'false' + - 'true' + schedule: + # Every 4 hours + - cron: '57 */4 * * *' + +concurrency: + group: repo-architect-reconcile-${{ github.repository }} + cancel-in-progress: false + +permissions: + contents: read + issues: write + pull-requests: read + models: read + +jobs: + reconcile: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Ensure artifact directories exist + run: mkdir -p .agent docs/repo_architect + + - name: Run repo architect (reconcile mode) + env: + GITHUB_TOKEN: ${{ github.token }} + GITHUB_REPO: ${{ github.repository }} + REPO_ARCHITECT_BRANCH_SUFFIX: ${{ github.run_id }}-${{ github.run_attempt }} + STALE_TIMEOUT_DAYS: ${{ github.event.inputs.stale_timeout_days || '14' }} + RECONCILIATION_WINDOW_DAYS: ${{ github.event.inputs.reconciliation_window_days || '30' }} + run: | + DRY_RUN="${{ github.event.inputs.dry_run || 'false' }}" + EXTRA_ARGS="" + if [ "$DRY_RUN" = "true" ]; then + EXTRA_ARGS="$EXTRA_ARGS --dry-run" + fi + python repo_architect.py --allow-dirty --mode reconcile \ + --stale-timeout-days "$STALE_TIMEOUT_DAYS" \ + --reconciliation-window-days "$RECONCILIATION_WINDOW_DAYS" \ + $EXTRA_ARGS + + - name: Upload reconciliation artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: repo-architect-reconcile-${{ github.run_id }} + path: .agent + if-no-files-found: warn + retention-days: 7 + + - name: Write workflow summary + if: always() + run: | + echo "## repo-architect reconciliation run summary" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "- **mode**: \`reconcile\`" >> $GITHUB_STEP_SUMMARY + echo "- **stale timeout**: \`${{ github.event.inputs.stale_timeout_days || '14' }}d\`" >> $GITHUB_STEP_SUMMARY + echo "- **reconciliation window**: \`${{ github.event.inputs.reconciliation_window_days || '30' }}d\`" >> $GITHUB_STEP_SUMMARY + echo "- **run**: [${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/repo-architect.yml b/.github/workflows/repo-architect.yml index 16db5b1..c57b4b0 100644 --- a/.github/workflows/repo-architect.yml +++ b/.github/workflows/repo-architect.yml @@ -4,12 +4,14 @@ on: workflow_dispatch: inputs: mode: - description: 'Operating mode. ''issue'' (default safe governance mode). ''analyze'' is read-only. ''report''/''mutate''/''campaign'' are charter-validated mutation-capable modes (§9–§11).' + description: 'Operating mode. ''issue'' (default safe governance mode). ''execution'' selects one ready issue and delegates to Copilot. ''reconcile'' ingests PR outcomes. ''analyze'' is read-only. ''report''/''mutate''/''campaign'' are charter-validated mutation-capable modes (§9–§11).' required: true default: 'issue' type: choice options: - issue + - execution + - reconcile - analyze - report - mutate @@ -22,6 +24,52 @@ on: options: - 'false' - 'true' + enable_live_delegation: + description: 'Execution mode: actually delegate to Copilot via GitHub API (false = dry-run report only)' + required: false + default: 'false' + type: choice + options: + - 'false' + - 'true' + active_objective: + description: 'Execution mode: restrict issue selection to a specific objective (leave blank for any)' + required: false + default: '' + type: choice + options: + - '' + - restore-parse-correctness + - eliminate-import-cycles + - converge-runtime-structure + - normalise-knowledge-substrate + - isolate-agent-boundaries + - reduce-architecture-score-risk + - add-consciousness-instrumentation + lane_filter: + description: 'Execution mode: restrict issue selection to a specific charter lane (leave blank for any)' + required: false + default: '' + type: string + max_concurrent_delegated: + description: 'Execution mode: max number of issues simultaneously in-flight' + required: false + default: '1' + type: choice + options: + - '1' + - '2' + - '3' + stale_timeout_days: + description: 'Reconciliation: days before a delegated-but-PR-less item is marked stale' + required: false + default: '14' + type: string + reconciliation_window_days: + description: 'Reconciliation: days of PRs to consider during reconciliation' + required: false + default: '30' + type: string max_issues: description: 'Issue mode: maximum issues to open/update per run' required: false @@ -84,6 +132,7 @@ on: default: 'parse_errors,import_cycles,entrypoint_consolidation,hygiene,report' type: string schedule: + # Planning: hourly gap detection and issue synthesis - cron: '17 * * * *' concurrency: diff --git a/docs/repo_architect/OPERATOR_GUIDE.md b/docs/repo_architect/OPERATOR_GUIDE.md index 2f913d3..0c333ad 100644 --- a/docs/repo_architect/OPERATOR_GUIDE.md +++ b/docs/repo_architect/OPERATOR_GUIDE.md @@ -2,7 +2,16 @@ ## Operating Model -`repo_architect.py` is an **architectural governance** tool that inspects the repository, diagnoses structural and operational gaps, and opens structured GitHub Issues containing Copilot-ready implementation prompts. +`repo_architect.py` is an **architectural governance** tool that inspects the repository, diagnoses structural and operational gaps, and runs a continuous closed-loop system for issue tracking, Copilot delegation, and PR reconciliation. + +### Four operating lanes + +``` +Planning lane: analyze → diagnose → dedupe → create/update issue +Execution lane: select ready issue → delegate to Copilot → track PR +Memory lane: ingest issue/PR state → update work state → feed next planning cycle +Scheduler lane: run automatically on a schedule with safe gating and no duplicate execution +``` ### Default safe mode: issue-first governance @@ -13,6 +22,24 @@ inspect repo → identify architectural gap → deduplicate against existing iss The GitHub Issue becomes the mutation surface. Copilot or a human becomes the code author. CI validates the resulting implementation PRs. +### Execution lane + +``` +load work state → reconcile open PRs → select one ready issue + → delegate to Copilot (label + assign + comment) → save work state +``` + +The execution lane selects at most one issue per run. Delegation is either dry-run (report only) or live (calls GitHub API). Live delegation requires `--enable-live-delegation` or `ENABLE_LIVE_DELEGATION=true`. + +### Reconciliation lane + +``` +load work state → list recent PRs → match PRs to tracked issues + → update pr_state/merged/closed → update lifecycle labels → save work state +``` + +Reconciliation feeds the memory lane. After reconciliation, the planning lane can see which issues have open PRs, which have merged, and which are stale. + ### Charter-validated secondary modes: lane-based mutation ``` @@ -99,6 +126,8 @@ Operators who need autonomous lane-based mutation can use `--mode mutate` or `-- | Mode | Description | Charter basis | Default? | |---|---|---|---| | `issue` | **Default safe governance mode.** Detects gaps and opens/updates GitHub Issues with Copilot-ready prompts. | §14 Effective Gödel-Machine, §15 Self-Modification Doctrine, §20 Automation Policy | ✅ Yes | +| `execution` | Selects one ready issue and delegates it to Copilot (dry-run or live). | Execution lane — closed-loop extension | Scheduled | +| `reconcile` | Ingests PR outcomes back into durable work state. | Reconciliation lane — memory loop | Scheduled | | `analyze` | Build analysis and write `.agent/` artifacts. No GitHub API calls. | §20 Automation Policy | Read-only | | `report` | Refresh `docs/repo_architect/` documentation reports. | Lane 0 (Report generation) | Read-only | | `mutate` | Attempt one direct code mutation via charter-validated lanes. | §9–§10 Self-Modification Contract, Lanes 0–4 | Opt-in | @@ -112,12 +141,15 @@ The default mode (both CLI and scheduled workflow) is `issue`. ### What it does -1. Builds repository analysis (parse errors, import cycles, entrypoints, architecture score). -2. Calls GitHub Models for an architectural risk summary (if credentials are available). -3. Diagnoses concrete architectural gaps from the analysis. -4. Deduplicates: searches existing open issues for each gap using a deterministic fingerprint. -5. Creates a new GitHub Issue (or updates an existing one with a comment). -6. Emits a structured JSON result and workflow step summary. +1. Loads durable work state (memory lane) to avoid re-raising issues already actively in-progress. +2. Builds repository analysis (parse errors, import cycles, entrypoints, architecture score). +3. Calls GitHub Models for an architectural risk summary (if credentials are available). +4. Diagnoses concrete architectural gaps from the analysis. +5. Filters gaps already covered by active delegations in work state. +6. Deduplicates: searches existing open issues for each gap using a deterministic fingerprint. +7. Creates a new GitHub Issue (or updates an existing one with a comment). +8. Records newly created issues in durable work state for future planning passes. +9. Emits a structured JSON result and workflow step summary. ### Running it @@ -149,7 +181,7 @@ python repo_architect.py --mode issue --allow-dirty --max-issues 3 | Flag | Default | Description | |---|---|---| -| `--mode issue` | `issue` | Operating mode (issue/mutate/campaign/report/analyze) | +| `--mode issue` | `issue` | Operating mode | | `--dry-run` | `false` | Write issue bodies to disk only | | `--max-issues N` | `1` | Maximum issues to open/update per run | | `--issue-subsystem X` | all | Restrict to one subsystem | @@ -157,150 +189,266 @@ python repo_architect.py --mode issue --allow-dirty --max-issues 3 --- -## Lane-Based Mutation Modes (Charter §9–§10) +## Execution Lane -Modes `mutate` and `campaign` implement the narrow, validated self-modification lanes defined in the charter. They are retained as charter-sanctioned secondary modes. +### What it does -### Lane Priority Order +1. Loads durable work state. +2. Runs lightweight PR reconciliation to refresh issue states. +3. Selects at most one issue that is ready for delegation (eligible labels + not blocked/in-progress). +4. Delegates to Copilot: applies `in-progress` label, assigns to `@copilot`, posts delegation comment. +5. Records the delegation in work state. -| Priority | Lane | Charter ref | Behaviour | -|---|---|---|---| -| 1 | `parse_errors` | Lane 2 (Parse repair) | Model-assisted syntax fix. Skipped if no parse errors exist or model is unavailable. | -| 2 | `import_cycles` | Lane 3 (Circular dependency elimination) | Model-assisted import cycle break. Skipped if no cycles or model unavailable. | -| 3 | `entrypoint_consolidation` | Lane 4 (Entrypoint consolidation) | Annotates redundant entrypoints when ≥ 4 exist. Model-assisted. | -| 4 | `hygiene` | Lane 1 (Hygiene) | Remove explicitly `# DEBUG`-marked `print()` statements. No model required. | -| 5 | `report` | Lane 0 (Report generation) | Refresh architecture documentation. Fallback when no code mutation is possible. | +### Selection rules -### Running mutation modes +- Issue must have all of: `arch-gap`, `copilot-task`, `needs-implementation` +- Issue must NOT have: `blocked`, `superseded`, `in-progress`, `pr-open`, `merged` +- Fingerprint must not already be delegated +- At most one issue per charter lane at a time +- Respects `MAX_CONCURRENT_DELEGATED` limit (default: 1) +- Prefers highest priority first (critical > high > medium > low) + +### Running it ```bash -# Single mutation in lane-priority order -python repo_architect.py --mode mutate --allow-dirty +# Dry-run: report what would be delegated (default) +python repo_architect.py --mode execution --allow-dirty -# Restrict to specific lanes -python repo_architect.py --mode mutate --lane hygiene --allow-dirty +# Live delegation to Copilot via GitHub API +python repo_architect.py --mode execution --allow-dirty --enable-live-delegation -# Multi-slice campaign -python repo_architect.py --mode campaign --allow-dirty --max-slices 3 --lanes parse_errors,import_cycles,hygiene,report +# Restrict to a specific objective +python repo_architect.py --mode execution --allow-dirty --active-objective eliminate-import-cycles + +# Restrict to a specific lane +python repo_architect.py --mode execution --allow-dirty --lane-filter import_cycles ``` -### Validation policy (charter §9.3) +### Environment variables -Each lane runs validation before pushing: +| Variable | Default | Description | +|---|---|---| +| `ENABLE_LIVE_DELEGATION` | `false` | Enable live GitHub API delegation | +| `MAX_CONCURRENT_DELEGATED` | `1` | Max simultaneous in-flight delegations | +| `ACTIVE_OBJECTIVE` | (any) | Restrict selection to a specific objective key | +| `LANE_FILTER` | (any) | Restrict selection to a specific charter lane | +| `STALE_TIMEOUT_DAYS` | `14` | Days before a delegated-but-PR-less item is marked stale | -| Lane | Validation | +### Valid objective keys + +| Key | Description | |---|---| -| `parse_errors` | `ast.parse` on model-generated content | -| `import_cycles` | `ast.parse` + import smoke test (warn-only) | -| `entrypoint_consolidation` | `ast.parse` on model-generated content | -| `hygiene` | `python -m py_compile` on all touched Python files | -| `report` | Verify report files were written | +| `restore-parse-correctness` | Restore or preserve parse correctness (Lane 2) | +| `eliminate-import-cycles` | Eliminate import cycles (Lane 3) | +| `converge-runtime-structure` | Converge runtime entrypoint structure (Lane 4) | +| `normalise-knowledge-substrate` | Normalise knowledge substrate boundaries (Lane 8) | +| `isolate-agent-boundaries` | Isolate agent boundaries (Lane 7) | +| `reduce-architecture-score-risk` | Reduce architecture score risk (Lanes 0–9) | +| `add-consciousness-instrumentation` | Add consciousness instrumentation (Lane 9) | -Validation failures abort the mutation and **never push a broken branch**. +### How Copilot execution is triggered + +In live mode, the execution lane: + +1. Applies `in-progress` label and removes `ready-for-delegation`. +2. Assigns the issue to `@copilot` (GitHub Copilot coding agent username). +3. Posts a delegation comment with the active objective, lane, and fingerprint. + +GitHub Copilot coding agent is automatically triggered when an issue is assigned to `@copilot`. It reads the issue body (including the Copilot implementation prompt) and opens a PR. The reconciliation lane then detects this PR and updates the work state. --- -## Issue Structure +## Reconciliation Lane -Each generated issue follows this template: +### What it does -| Section | Description | +1. Loads durable work state. +2. Fetches all recent PRs from the repository. +3. For each tracked issue, detects linked PRs (by `#issue_number` reference in PR body/title). +4. Updates item state: `merged`, `closed_unmerged`, `open`, `draft`, `stale`. +5. Updates lifecycle labels on the GitHub Issue. +6. Saves updated work state. + +### PR state classifications + +| Classification | Condition | |---|---| -| **Summary** | 1-2 sentence description of the gap | -| **Problem** | Detailed problem statement with evidence | -| **Why it matters** | Impact and urgency justification | -| **Scope** | Bounded implementation scope | -| **Suggested files** | Repo-relative file paths relevant to the fix | -| **Implementation notes** | Guidance for the implementer | -| **Copilot implementation prompt** | Ready-to-paste prompt for Copilot Chat / agent mode | -| **Acceptance criteria** | Checklist items to verify the fix | -| **Validation** | Shell commands to validate the implementation | -| **Out of scope** | Explicit exclusions | -| **Machine metadata** | Structured JSON: subsystem, priority, confidence, fingerprint, run_id, etc. | +| `merged` | PR has `merged_at` timestamp | +| `closed_unmerged` | PR state is `closed` and no `merged_at` | +| `draft` | PR is open and marked as draft | +| `open` | PR is open and not draft | +| `stale` | No PR found and item has been delegated for > `STALE_TIMEOUT_DAYS` days | -### Machine metadata fields +### Running it -```json -{ - "subsystem": "runtime", - "priority": "high", - "confidence": 0.95, - "mode": "issue", - "generated_at": "2026-01-01T00:00:00Z", - "run_id": "12345678-1", - "repo": "org/repo", - "issue_key": "import-cycles", - "fingerprint": "a1b2c3d4e5f6" -} +```bash +# Ingest PR outcomes +python repo_architect.py --mode reconcile --allow-dirty + +# With custom thresholds +python repo_architect.py --mode reconcile --allow-dirty --stale-timeout-days 7 --reconciliation-window-days 60 ``` ---- +### Environment variables -## Detected Gap Types +| Variable | Default | Description | +|---|---|---| +| `STALE_TIMEOUT_DAYS` | `14` | Days before a delegated-but-PR-less item is marked stale | +| `RECONCILIATION_WINDOW_DAYS` | `30` | Days of PRs to consider during reconciliation | -| Gap | Subsystem | Priority | Charter lane equivalent | -|---|---|---|---| -| Python parse errors | `runtime` | `critical` | Lane 2 (Parse repair) | -| Import cycles | `runtime` | `high` | Lane 3 (Circular dependency elimination) | -| Entrypoint fragmentation (≥4 backend entrypoints) | `runtime` | `medium` | Lane 4 (Entrypoint consolidation) | -| Dependency direction violations | `core` | `medium` | Lane 5 (Contract repair) | -| Agent boundary violations | `agents` | `medium` | Lane 7 (Agent boundary enforcement) | -| Architecture score < 70/100 | `reporting` | `high` / `medium` | Cross-lane | -| Workflow / documentation drift (model-assisted) | `workflow` | `medium` | Lane 0 (Report generation) | +### How issues and PRs feed the next planning cycle -Lanes 6 (Runtime extraction), 8 (Knowledge substrate normalisation), and 9 (Consciousness instrumentation) are represented in `CHARTER_LANE_MAP` and will be detected as gap signals become available in the analysis engine. +After reconciliation: +- Issues with `merged` PRs are marked `done` in work state → planner can generate follow-on work. +- Issues with `open` PRs are blocked from re-delegation → planner skips them. +- Issues with `stale` status are flagged → operator can review or unblock. +- Issues with `closed_unmerged` PRs are marked done (superseded) → planner can re-raise if still needed. --- -## Deduplication +## Durable Work State (Memory Lane) -Each gap has a deterministic 12-hex-character fingerprint derived from `subsystem:issue_key`. The fingerprint is embedded in the issue body as: +The work state is stored in `.agent/work_state.json` (gitignored, refreshed each run). -```html - +### Schema + +```json +{ + "version": "2.1.0", + "updated_at": "2026-01-01T00:00:00+00:00", + "items": [ + { + "fingerprint": "a1b2c3d4e5f6", + "objective": "eliminate-import-cycles", + "lane": "import_cycles", + "issue_number": 42, + "issue_state": "open", + "delegation_state": "delegated", + "assignee": "copilot", + "pr_number": 101, + "pr_url": "https://github.com/org/repo/pull/101", + "pr_state": "merged", + "merged": true, + "closed_unmerged": false, + "blocked": false, + "superseded": false, + "created_at": "2026-01-01T00:00:00+00:00", + "updated_at": "2026-01-02T00:00:00+00:00", + "run_id": "12345678-1", + "gap_title": "Eliminate import cycles in backend.core", + "gap_subsystem": "runtime" + } + ] +} ``` -On each run: -- If a matching **open** issue exists → add a re-scan comment (no new issue). -- If no matching issue exists → create a new one. +### Field reference + +| Field | Type | Description | +|---|---|---| +| `fingerprint` | string | 12-hex deterministic fingerprint from `issue_fingerprint()` | +| `objective` | string | Active objective at time of creation | +| `lane` | string | Charter lane name | +| `issue_number` | int\|null | GitHub Issue number | +| `issue_state` | string | `open` or `closed` | +| `delegation_state` | string | `pending`, `delegated`, `done`, `blocked`, or `superseded` | +| `assignee` | string\|null | GitHub username delegated to | +| `pr_number` | int\|null | Linked PR number | +| `pr_url` | string\|null | Linked PR URL | +| `pr_state` | string\|null | `open`, `draft`, `merged`, `closed_unmerged`, `stale`, or null | +| `merged` | bool | Whether the linked PR merged | +| `closed_unmerged` | bool | Whether the linked PR was closed without merging | +| `blocked` | bool | Whether the item is manually blocked | +| `superseded` | bool | Whether the item has been superseded | +| `created_at` | ISO-8601 | Creation timestamp | +| `updated_at` | ISO-8601 | Last update timestamp | +| `run_id` | string | Workflow run provenance | +| `gap_title` | string | Issue title | +| `gap_subsystem` | string | Detected subsystem | --- -## Labels and Lifecycle +## Label Lifecycle + +Labels transition deterministically through the following states: -### Base labels (always applied) +``` +[created] + └─ arch-gap + copilot-task + needs-implementation + └─ [optional] ready-for-delegation (added by planner on first synthesis) -- `arch-gap` — identifies this as an architecture governance issue -- `copilot-task` — ready for Copilot to implement -- `needs-implementation` — awaiting a code PR +[selected for execution] + └─ in-progress (removes ready-for-delegation) -### Subsystem labels (where applicable) +[PR opened] + └─ pr-open (removes in-progress) -`workflow`, `runtime`, `reporting`, `docs`, `model-routing`, `issue-orchestration`, `core`, `knowledge`, `agents`, `consciousness` +[PR merged] + └─ merged (removes pr-open) -### Priority labels (critical and high only) +[PR closed unmerged] + └─ superseded (removes pr-open) -`priority:critical`, `priority:high` +[stale: delegated > STALE_TIMEOUT_DAYS with no PR] + └─ blocked (flags for operator review) +``` -### Lifecycle labels (manually applied by maintainers) +### All labels used by repo-architect -- `ready-for-validation` — implementation PR exists, needs CI review -- `blocked` — blocked by a dependency or decision -- `superseded` — replaced by a more comprehensive issue +| Label | Category | Description | +|---|---|---| +| `arch-gap` | Base | Architecture governance issue | +| `copilot-task` | Base | Ready for Copilot to implement | +| `needs-implementation` | Base | Awaiting a code PR | +| `ready-for-delegation` | Lifecycle | Ready for execution selection | +| `in-progress` | Lifecycle | Currently delegated to Copilot | +| `pr-open` | Lifecycle | PR exists and is open | +| `merged` | Lifecycle | PR has merged | +| `blocked` | Lifecycle | Stale or manually blocked | +| `superseded` | Lifecycle | PR closed unmerged or issue replaced | +| `priority:critical` | Priority | Critical priority (applied automatically) | +| `priority:high` | Priority | High priority (applied automatically) | +| Subsystem labels | Subsystem | `runtime`, `core`, `agents`, etc. | --- ## Dry-Run Mode -In dry-run mode (`--dry-run` flag or `dry_run: 'true'` workflow input), the system writes issue bodies to `docs/repo_architect/issues/.md` instead of calling the GitHub Issues API. This is useful for: -- Local testing and preview -- CI pipelines without `issues: write` permission -- Auditing generated prompts before publishing +In dry-run mode (`--dry-run` flag, or `ENABLE_LIVE_DELEGATION=false` for execution mode), the system operates without GitHub API side-effects: + +- **Issue mode dry-run**: writes issue bodies to `docs/repo_architect/issues/.md` instead of calling the Issues API. +- **Execution mode dry-run** (default): reports which issue would be delegated but does not assign labels, assignees, or post comments. Work state is still updated with `delegation_state: pending`. +- **Reconcile mode dry-run**: reads PR state but does not update lifecycle labels on issues. + +--- + +## Scheduled Automation + +The workflow runs on three separate schedules: + +| Schedule | Cron | Job | Purpose | +|---|---|---|---| +| Planning | `17 * * * *` | `repo-architect-issue` | Hourly gap detection + issue synthesis | +| Execution | `37 */2 * * *` | `repo-architect-execution` | Every 2h: select + delegate one ready issue | +| Reconciliation | `57 */4 * * *` | `repo-architect-reconcile` | Every 4h: ingest PR outcomes into work state | + +### Concurrency guards + +- The issue job uses `group: repo-architect-${{ github.ref }}` with `cancel-in-progress: true` to prevent concurrent planning runs. +- The execution job uses `group: repo-architect-execution-${{ github.repository }}` with `cancel-in-progress: false` to avoid cancelling an in-flight delegation. +- The reconciliation job uses `group: repo-architect-reconcile-${{ github.repository }}` similarly. + +These groups ensure that: +- Two planning runs don't produce duplicate issues. +- An execution run doesn't fire twice and create duplicate delegations. +- A reconciliation run doesn't overlap and corrupt work state. --- ## How Copilot Consumes the Generated Prompt +### Manual (current capability) + 1. Maintainer opens the GitHub Issue created by this workflow. 2. In the issue body, find the **Copilot implementation prompt** section. 3. Copy the entire code block. @@ -309,16 +457,30 @@ In dry-run mode (`--dry-run` flag or `dry_run: 'true'` workflow input), the syst 6. CI validates the PR against the **Validation** commands in the issue. 7. Maintainer reviews and merges. +### Automated (via execution lane) + +1. Execution lane selects the issue and assigns it to `@copilot`. +2. GitHub Copilot coding agent is triggered automatically on assignment. +3. Copilot reads the issue body and opens a PR. +4. Reconciliation lane detects the PR and updates work state + lifecycle labels. +5. Next planning run sees the in-progress state and skips re-raising the issue. + --- ## Workflow Dispatch Inputs | Input | Default | Description | |---|---|---| -| `mode` | `issue` | Operating mode | -| `dry_run` | `false` | Dry-run without API calls | -| `max_issues` | `1` | Max issues per run | -| `issue_subsystem` | (all) | Target subsystem | +| `mode` | `issue` | Operating mode (`issue`, `execution`, `reconcile`, `analyze`, `report`, `mutate`, `campaign`) | +| `dry_run` | `false` | Issue mode dry-run without API calls | +| `enable_live_delegation` | `false` | Execution mode: actually delegate via GitHub API | +| `active_objective` | (any) | Execution mode: restrict to a specific objective | +| `lane_filter` | (any) | Execution mode: restrict to a specific charter lane | +| `max_concurrent_delegated` | `1` | Execution mode: max in-flight delegations | +| `stale_timeout_days` | `14` | Reconciliation: days before stale marking | +| `reconciliation_window_days` | `30` | Reconciliation: days of PRs to consider | +| `max_issues` | `1` | Issue mode: max issues per run | +| `issue_subsystem` | (all) | Issue mode: target subsystem | | `github_model` | (catalog) | Override preferred model | | `github_fallback_model` | (catalog) | Override fallback model | | `report_path` | `docs/repo_architect/runtime_inventory.md` | Report output path (analyze/report modes) | @@ -345,6 +507,29 @@ In dry-run mode (`--dry-run` flag or `dry_run: 'true'` workflow input), the syst | `artifact_files` | All artifact paths generated | | `charter` | Charter metadata | +### Execution mode output + +| Field | Description | +|---|---| +| `status` | `execution_cycle_complete` | +| `mode` | `execution` | +| `dry_run` | Whether dry-run mode was active | +| `selected_issue` | Dict with `number`, `title`, `url` (or null if nothing selected) | +| `delegation` | Delegation result dict | +| `reconcile` | Embedded reconciliation result | +| `summary` | Human-readable summary lines | + +### Reconciliation mode output + +| Field | Description | +|---|---| +| `status` | `reconcile_cycle_complete` | +| `mode` | `reconcile` | +| `updated` | Number of work items updated | +| `prs_found` | Number of PRs matched to tracked issues | +| `details` | Array of per-issue reconciliation detail objects | +| `summary` | Human-readable summary lines | + ### IssueAction fields | Field | Description | @@ -361,14 +546,116 @@ In dry-run mode (`--dry-run` flag or `dry_run: 'true'` workflow input), the syst | `gap_subsystem` | Subsystem of the detected gap | | `error` | Error message (error action only) | -### Example output +--- + +## Lane-Based Mutation Modes (Charter §9–§10) + +Modes `mutate` and `campaign` implement the narrow, validated self-modification lanes defined in the charter. They are retained as charter-sanctioned secondary modes. + +### Lane Priority Order + +| Priority | Lane | Charter ref | Behaviour | +|---|---|---|---| +| 1 | `parse_errors` | Lane 2 (Parse repair) | Model-assisted syntax fix. Skipped if no parse errors exist or model is unavailable. | +| 2 | `import_cycles` | Lane 3 (Circular dependency elimination) | Model-assisted import cycle break. Skipped if no cycles or model unavailable. | +| 3 | `entrypoint_consolidation` | Lane 4 (Entrypoint consolidation) | Annotates redundant entrypoints when ≥ 4 exist. Model-assisted. | +| 4 | `hygiene` | Lane 1 (Hygiene) | Remove explicitly `# DEBUG`-marked `print()` statements. No model required. | +| 5 | `report` | Lane 0 (Report generation) | Refresh architecture documentation. Fallback when no code mutation is possible. | + +### Running mutation modes + +```bash +# Single mutation in lane-priority order +python repo_architect.py --mode mutate --allow-dirty + +# Restrict to specific lanes +python repo_architect.py --mode mutate --lane hygiene --allow-dirty + +# Multi-slice campaign +python repo_architect.py --mode campaign --allow-dirty --max-slices 3 --lanes parse_errors,import_cycles,hygiene,report +``` + +### Validation policy (charter §9.3) + +Each lane runs validation before pushing: + +| Lane | Validation | +|---|---| +| `parse_errors` | `ast.parse` on model-generated content | +| `import_cycles` | `ast.parse` + import smoke test (warn-only) | +| `entrypoint_consolidation` | `ast.parse` on model-generated content | +| `hygiene` | `python -m py_compile` on all touched Python files | +| `report` | Verify report files were written | + +Validation failures abort the mutation and **never push a broken branch**. + +--- + +## Issue Structure + +Each generated issue follows this template: +| Section | Description | +|---|---| +| **Summary** | 1-2 sentence description of the gap | +| **Problem** | Detailed problem statement with evidence | +| **Why it matters** | Impact and urgency justification | +| **Scope** | Bounded implementation scope | +| **Suggested files** | Repo-relative file paths relevant to the fix | +| **Implementation notes** | Guidance for the implementer | +| **Copilot implementation prompt** | Ready-to-paste prompt for Copilot Chat / agent mode | +| **Acceptance criteria** | Checklist items to verify the fix | +| **Validation** | Shell commands to validate the implementation | +| **Out of scope** | Explicit exclusions | +| **Machine metadata** | Structured JSON: subsystem, priority, confidence, fingerprint, run_id, etc. | + +### Machine metadata fields + +```json +{ + "subsystem": "runtime", + "priority": "high", + "confidence": 0.95, + "mode": "issue", + "generated_at": "2026-01-01T00:00:00Z", + "run_id": "12345678-1", + "repo": "org/repo", + "issue_key": "import-cycles", + "fingerprint": "a1b2c3d4e5f6" +} ``` -created issue #12 for workflow architectural drift -updated existing issue #9 for reporting schema inconsistency -dry-run generated issue body at docs/repo_architect/issues/a1b2c3d4e5f6.md + +--- + +## Detected Gap Types + +| Gap | Subsystem | Priority | Charter lane equivalent | +|---|---|---|---| +| Python parse errors | `runtime` | `critical` | Lane 2 (Parse repair) | +| Import cycles | `runtime` | `high` | Lane 3 (Circular dependency elimination) | +| Entrypoint fragmentation (≥4 backend entrypoints) | `runtime` | `medium` | Lane 4 (Entrypoint consolidation) | +| Dependency direction violations | `core` | `medium` | Lane 5 (Contract repair) | +| Agent boundary violations | `agents` | `medium` | Lane 7 (Agent boundary enforcement) | +| Architecture score < 70/100 | `reporting` | `high` / `medium` | Cross-lane | +| Workflow / documentation drift (model-assisted) | `workflow` | `medium` | Lane 0 (Report generation) | + +Lanes 6 (Runtime extraction), 8 (Knowledge substrate normalisation), and 9 (Consciousness instrumentation) are represented in `CHARTER_LANE_MAP` and will be detected as gap signals become available in the analysis engine. + +--- + +## Deduplication + +Each gap has a deterministic 12-hex-character fingerprint derived from `subsystem:issue_key`. The fingerprint is embedded in the issue body as: + +```html + ``` +On each run: +- If a matching **open** issue exists → add a re-scan comment (no new issue). +- If no matching issue exists → create a new one. +- If the fingerprint is already tracked as `delegated` in work state → the planner skips it (no duplicate issue). + --- ## Model Selection (preferred / fallback) @@ -395,7 +682,7 @@ The system tries a **preferred** model first and automatically retries with a ** python -m unittest tests.test_repo_architect -v ``` -The test suite covers: branch suffix generation, model fallback, `ast.parse` gate, campaign aggregation, output schema stability, lane priority, `entrypoint_consolidation`, lane scoping, `validate_change`, charter context, issue fingerprint generation, issue body rendering, deduplication behavior, label assignment, gap diagnosis, `run_issue_cycle` output schema, charter-validated mode notices, module-name normalization, and companion file existence. +The test suite covers: branch suffix generation, model fallback, `ast.parse` gate, campaign aggregation, output schema stability, lane priority, `entrypoint_consolidation`, lane scoping, `validate_change`, charter context, issue fingerprint generation, issue body rendering, deduplication behavior, label assignment, gap diagnosis, `run_issue_cycle` output schema, charter-validated mode notices, module-name normalization, companion file existence, **work state ingestion, issue execution selection, delegation dry-run behavior, PR reconciliation, lifecycle label transitions, planner skip-in-progress logic, and new operator control validation**. --- @@ -410,3 +697,4 @@ The implementation charter (§15) requires machine-readable policy files that en | [`dependency_contract.json`](dependency_contract.json) | §6 | Layer order, allowed dependency direction, hard prohibitions, circular import policy, ownership hints | Agents should consume these files before proposing code changes. The constants `CHARTER_COMPANION_FILES`, `CHARTER_PRIORITY_ORDER`, and `AGENT_INSTRUCTION_CONTRACT` in `repo_architect.py` encode the same data as Python tuples for runtime use. + diff --git a/repo_architect.py b/repo_architect.py index 3bfd9ee..b67fc6b 100644 --- a/repo_architect.py +++ b/repo_architect.py @@ -156,6 +156,38 @@ # Maximum number of violations shown in issue body for Lane 5/7 gaps _MAX_VIOLATIONS_DISPLAY = 5 +# --------------------------------------------------------------------------- +# Closed-loop execution / memory lane constants +# --------------------------------------------------------------------------- +# Durable work-state file stored in .agent/ (gitignored alongside other artifacts) +WORK_STATE_FILE = "work_state.json" +# New operating modes for the execution and reconciliation lanes +EXECUTION_MODE = "execution" +RECONCILE_MODE = "reconcile" +# Lifecycle labels — document and drive issue → PR state transitions +LIFECYCLE_LABELS: Tuple[str, ...] = ( + "ready-for-delegation", + "in-progress", + "pr-open", + "merged", + "blocked", + "superseded", +) +# Labels required for an issue to be eligible for execution selection +EXECUTION_ELIGIBLE_LABELS: Tuple[str, ...] = ("arch-gap", "copilot-task", "needs-implementation") +# GitHub Copilot coding agent assignee username +COPILOT_AGENT_ASSIGNEE = "copilot" +# Canonical architectural objectives aligned with charter §14 priority order +OBJECTIVE_LABELS: Dict[str, str] = { + "restore-parse-correctness": "Restore or preserve parse correctness (Lane 2)", + "eliminate-import-cycles": "Eliminate import cycles (Lane 3)", + "converge-runtime-structure": "Converge runtime entrypoint structure (Lane 4)", + "normalise-knowledge-substrate": "Normalise knowledge substrate boundaries (Lane 8)", + "isolate-agent-boundaries": "Isolate agent boundaries (Lane 7)", + "reduce-architecture-score-risk": "Reduce architecture score risk (Lanes 0–9)", + "add-consciousness-instrumentation": "Add consciousness instrumentation (Lane 9)", +} + class RepoArchitectError(Exception): pass @@ -195,6 +227,14 @@ class Config: dry_run: bool = False # write issue bodies to disk but do not call GitHub API max_issues: int = 1 # maximum issues to open/update per run in issue mode issue_subsystem: Optional[str] = None # target a specific subsystem (None = all) + # Closed-loop execution / reconciliation options + work_state_path: Optional[pathlib.Path] = None # path to durable work state JSON; defaults to agent_dir/WORK_STATE_FILE + enable_live_delegation: bool = False # False = dry-run only; True = actually assign/label on GitHub + max_concurrent_delegated: int = 1 # max number of issues simultaneously delegated + active_objective: Optional[str] = None # restrict execution selection to this objective key + lane_filter: Optional[str] = None # restrict execution selection to this lane name + stale_timeout_days: int = 14 # days before a delegated-but-PR-less item is marked stale + reconciliation_window_days: int = 30 # days of PRs to consider during reconciliation @dataclasses.dataclass @@ -255,6 +295,34 @@ class IssueAction: labels_confirmed: Optional[List[str]] = None # labels actually *confirmed* by GitHub API response; None when no API call was made (dry-run/error) +@dataclasses.dataclass +class WorkItem: + """Durable record of a single unit of work tracked through planning → execution → PR → reconciliation. + + Stored in .agent/work_state.json (gitignored). The work state is the memory lane: + it feeds back into future planning passes to prevent duplicate/overlapping issues. + """ + fingerprint: str # 12-hex deterministic fingerprint (from issue_fingerprint()) + objective: str # active objective at time of creation (e.g. "eliminate-import-cycles") + lane: str # charter lane name (e.g. "import_cycles") + issue_number: Optional[int] + issue_state: str # "open" | "closed" + delegation_state: str # "pending" | "delegated" | "done" | "blocked" | "superseded" + assignee: Optional[str] # GitHub username delegated to (e.g. "copilot") + pr_number: Optional[int] + pr_url: Optional[str] + pr_state: Optional[str] # "open" | "draft" | "merged" | "closed_unmerged" | "stale" | None + merged: bool + closed_unmerged: bool + blocked: bool + superseded: bool + created_at: str # ISO-8601 UTC + updated_at: str # ISO-8601 UTC + run_id: str # workflow run provenance + gap_title: str + gap_subsystem: str + + def log(message: str, *, data: Optional[Dict[str, Any]] = None, json_mode: bool = False) -> None: if json_mode: payload: Dict[str, Any] = {"ts": int(time.time()), "message": message} @@ -1389,13 +1457,19 @@ def run_issue_cycle(config: Config) -> Dict[str, Any]: """Execute one issue-synthesis cycle. Steps: - 1. Build analysis and model enrichment (same as analyze/report modes). - 2. Diagnose architectural gaps. - 3. For each gap (up to max_issues): synthesize a GitHub Issue or dry-run artifact. - 4. Emit a structured JSON result and write a step summary. + 1. Load work state (memory lane) to avoid re-raising in-progress issues. + 2. Build analysis and model enrichment (same as analyze/report modes). + 3. Diagnose architectural gaps. + 4. For each gap (up to max_issues): synthesize a GitHub Issue or dry-run artifact. + 5. Record new issues into work state so future passes see them. + 6. Emit a structured JSON result and write a step summary. """ ensure_agent_dir(config.agent_dir) state = load_state(config) + # Load durable work state (memory lane) — used to suppress in-progress objectives + work_state = load_work_state(config) + active_fps = _active_fingerprints_in_work_state(work_state) + analysis = build_analysis(config.git_root) charter_context = load_charter_context(config.git_root) analysis["charter_context"] = charter_context @@ -1412,6 +1486,15 @@ def run_issue_cycle(config: Config) -> Dict[str, Any]: ) gaps = diagnose_gaps(config, analysis, model_meta) + # Filter out gaps whose fingerprint is already actively in-progress/delegated + if active_fps: + filtered_gaps = [ + g for g in gaps + if issue_fingerprint(g.subsystem, g.issue_key) not in active_fps + ] + if filtered_gaps: + gaps = filtered_gaps + # If all gaps are filtered, still proceed with original list so planner isn't blocked selected_gaps = gaps[: config.max_issues] issue_actions: List[Dict[str, Any]] = [] @@ -1490,6 +1573,10 @@ def run_issue_cycle(config: Config) -> Dict[str, Any]: persist_manifest(config, artifact_files) write_step_summary(config, result) + # Record new issues into durable work state (memory lane) + ingest_issue_actions_to_work_state(config, work_state, issue_actions, run_id) + save_work_state(config, work_state) + state["runs"] = int(state.get("runs", 0)) + 1 state["last_run_epoch"] = int(time.time()) state["last_outcome"] = result["status"] @@ -2002,6 +2089,47 @@ def save_state(config: Config, state: Dict[str, Any]) -> None: atomic_write_json(config.state_path, state) +def _work_state_path(config: Config) -> pathlib.Path: + """Return the resolved path to the durable work state JSON file.""" + if config.work_state_path is not None: + return config.work_state_path + return config.agent_dir / WORK_STATE_FILE + + +def load_work_state(config: Config) -> Dict[str, Any]: + """Load the durable work-state artifact (memory lane). + + Returns a dict with schema:: + + { + "version": str, + "updated_at": str | null, + "items": [WorkItem-dict, ...] + } + """ + return read_json( + _work_state_path(config), + {"version": VERSION, "updated_at": None, "items": []}, + ) + + +def save_work_state(config: Config, work_state: Dict[str, Any]) -> None: + """Persist work state to disk atomically.""" + work_state["updated_at"] = dt.datetime.now(dt.timezone.utc).isoformat() + atomic_write_json(_work_state_path(config), work_state) + + +def upsert_work_item(work_state: Dict[str, Any], item: WorkItem) -> None: + """Insert or update a WorkItem in the work state, keyed by fingerprint.""" + items: List[Dict[str, Any]] = work_state.setdefault("items", []) + item_dict = dataclasses.asdict(item) + for i, existing in enumerate(items): + if existing.get("fingerprint") == item.fingerprint: + items[i] = item_dict + return + items.append(item_dict) + + def persist_analysis(config: Config, analysis: Dict[str, Any]) -> None: atomic_write_json(config.analysis_path, analysis) atomic_write_json(config.graph_path, analysis.get("local_import_graph", {})) @@ -2015,6 +2143,662 @@ def baseline_dirty_guard(config: Config) -> None: raise RepoArchitectError("Repository has uncommitted changes. Re-run with --allow-dirty if you really want mutation.") +# --------------------------------------------------------------------------- +# Execution lane: issue selection, Copilot delegation, PR reconciliation +# --------------------------------------------------------------------------- + +def _list_github_issues_by_labels( + config: Config, labels: Sequence[str], state: str = "open" +) -> List[Dict[str, Any]]: + """List GitHub issues that carry ALL of the given labels.""" + if not config.github_token or not config.github_repo: + return [] + try: + params = urllib.parse.urlencode({ + "labels": ",".join(labels), + "state": state, + "per_page": "50", + }) + result = github_request(config.github_token, f"/repos/{config.github_repo}/issues?{params}") + return result if isinstance(result, list) else [] + except RepoArchitectError: + return [] + + +def _extract_fingerprint_from_body(body: str) -> Optional[str]: + """Extract the 12-hex arch-gap fingerprint marker from an issue body.""" + m = re.search(r"arch-gap-fingerprint:\s*([0-9a-f]{12})", body) + return m.group(1) if m else None + + +def _extract_lane_from_body(body: str) -> Optional[str]: + """Attempt to extract the charter lane name from an issue body.""" + m = re.search(r"(?i)Lane[:\s]+([A-Za-z][A-Za-z0-9_-]*)", body) + return m.group(1).lower() if m else None + + +def select_ready_issue( + config: Config, work_state: Dict[str, Any] +) -> Optional[Dict[str, Any]]: + """Select one ready issue for delegation to Copilot. + + Selection rules (all must pass): + - Issue has labels: arch-gap, copilot-task, needs-implementation + - Issue does NOT have: blocked, superseded, in-progress, pr-open, merged + - Not already tracked as delegated/in-progress in work state + - Fingerprint not already delegated + - At most one issue per lane at a time + - Respects MAX_CONCURRENT_DELEGATED + - Prefers highest priority first (critical > high > medium > low) + - Respects active_objective and lane_filter preferences if set + + Returns the GitHub issue dict for the selected issue, or None. + """ + if not config.github_token or not config.github_repo: + return None + + items: List[Dict[str, Any]] = work_state.get("items", []) + + # Count currently in-flight items (delegated, not yet done) + in_flight = [ + it for it in items + if it.get("delegation_state") == "delegated" + and not it.get("merged") + and not it.get("closed_unmerged") + ] + if len(in_flight) >= config.max_concurrent_delegated: + return None + + # Build blocked sets from work state + blocked_fingerprints: Set[str] = set() + blocked_issue_numbers: Set[int] = set() + blocked_lanes: Set[str] = set() + for it in in_flight: + if it.get("fingerprint"): + blocked_fingerprints.add(it["fingerprint"]) + if it.get("issue_number"): + blocked_issue_numbers.add(int(it["issue_number"])) + if it.get("lane"): + blocked_lanes.add(it["lane"]) + + # Also block superseded/blocked items by fingerprint + for it in items: + if it.get("blocked") or it.get("superseded"): + if it.get("fingerprint"): + blocked_fingerprints.add(it["fingerprint"]) + if it.get("issue_number"): + blocked_issue_numbers.add(int(it["issue_number"])) + + # Fetch eligible issues from GitHub + candidate_issues = _list_github_issues_by_labels( + config, list(EXECUTION_ELIGIBLE_LABELS), state="open" + ) + + filtered: List[Tuple[Dict[str, Any], Optional[str], Optional[str]]] = [] + blocking_lifecycle = {"blocked", "superseded", "in-progress", "pr-open", "merged"} + + for issue in candidate_issues: + issue_labels: Set[str] = { + lbl["name"] + for lbl in issue.get("labels", []) + if isinstance(lbl, dict) + } + # Skip lifecycle-blocked issues + if issue_labels & blocking_lifecycle: + continue + + issue_num = issue.get("number") + if issue_num and int(issue_num) in blocked_issue_numbers: + continue + + body = issue.get("body") or "" + fp = _extract_fingerprint_from_body(body) + if fp and fp in blocked_fingerprints: + continue + + lane = _extract_lane_from_body(body) + + # One issue per lane at a time + if lane and lane in blocked_lanes: + continue + + # Lane filter preference (soft — only skip if we have other options) + if config.lane_filter and lane and lane != config.lane_filter: + continue + + filtered.append((issue, fp, lane)) + + if not filtered: + return None + + # Sort by priority + _prank = {p: i for i, p in enumerate(ISSUE_PRIORITY_LEVELS)} + + def _priority(entry: Tuple[Dict[str, Any], Optional[str], Optional[str]]) -> int: + issue, _, _ = entry + lbls = {lbl["name"] for lbl in issue.get("labels", []) if isinstance(lbl, dict)} + for lbl in lbls: + if lbl.startswith("priority:"): + return _prank.get(lbl.split(":", 1)[1], 99) + return 99 + + filtered.sort(key=_priority) + best_issue, _, _ = filtered[0] + return best_issue + + +def delegate_to_copilot( + config: Config, + issue: Dict[str, Any], + work_state: Dict[str, Any], + run_id: str, +) -> Dict[str, Any]: + """Delegate an issue to GitHub Copilot coding agent. + + Dry-run mode (config.enable_live_delegation is False): + - Reports what would happen; no GitHub API side effects. + + Live mode (config.enable_live_delegation is True): + - Adds 'in-progress' label, removes 'ready-for-delegation'. + - Assigns the issue to COPILOT_AGENT_ASSIGNEE ("copilot"). + - Posts a delegation comment. + + Always records the delegation event in work_state. + """ + dry_run = not config.enable_live_delegation + issue_number = issue.get("number") + issue_title = issue.get("title", "") + issue_url = issue.get("html_url", "") + body = issue.get("body") or "" + + fp = _extract_fingerprint_from_body(body) or f"unknown-{issue_number}" + lane = _extract_lane_from_body(body) or "unknown" + issue_labels: Set[str] = { + lbl["name"] for lbl in issue.get("labels", []) if isinstance(lbl, dict) + } + subsystem = next((s for s in SUBSYSTEM_LABELS if s in issue_labels), "runtime") + now = dt.datetime.now(dt.timezone.utc).isoformat() + + result: Dict[str, Any] = { + "action": "dry_run" if dry_run else "delegated", + "issue_number": issue_number, + "issue_url": issue_url, + "issue_title": issue_title, + "fingerprint": fp, + "assignee": COPILOT_AGENT_ASSIGNEE if not dry_run else None, + "labels_added": ["in-progress"], + "labels_removed": ["ready-for-delegation"], + "dry_run": dry_run, + } + + if dry_run: + log( + f"[dry-run] Would delegate issue #{issue_number} " + f"to @{COPILOT_AGENT_ASSIGNEE}: {issue_title}", + json_mode=config.log_json, + ) + else: + if not config.github_token or not config.github_repo: + result["action"] = "error" + result["error"] = "Missing GITHUB_TOKEN or GITHUB_REPO for live delegation." + else: + errors: List[str] = [] + # 1. Update lifecycle labels + new_labels = (issue_labels - {"ready-for-delegation"}) | {"in-progress"} + try: + ensure_github_labels(config, sorted(new_labels)) + set_github_issue_labels(config, issue_number, sorted(new_labels)) + except RepoArchitectError as exc: + errors.append(f"label update: {exc}") + # 2. Assign to Copilot + try: + github_request( + config.github_token, + f"/repos/{config.github_repo}/issues/{issue_number}/assignees", + method="POST", + payload={"assignees": [COPILOT_AGENT_ASSIGNEE]}, + ) + except RepoArchitectError as exc: + errors.append(f"assignment: {exc}") + # 3. Post delegation comment + comment = ( + f"**repo-architect delegation** (run `{run_id}`): " + f"this issue has been selected for execution and assigned to " + f"`@{COPILOT_AGENT_ASSIGNEE}`.\n\n" + f"**Active objective**: `{config.active_objective or 'general'}`\n" + f"**Lane**: `{lane}`\n" + f"**Fingerprint**: `{fp}`\n\n" + f"When a PR is opened, repo-architect reconciliation will ingest its state " + f"and update this issue's lifecycle labels." + ) + try: + update_github_issue_api(config, issue_number, comment) + except RepoArchitectError as exc: + errors.append(f"comment: {exc}") + if errors: + result["action"] = "partial" + result["errors"] = errors + result["assignee"] = COPILOT_AGENT_ASSIGNEE + + # Record in work state + delegation_state = "pending" if dry_run else "delegated" + existing_item_dict: Optional[Dict[str, Any]] = None + for it in work_state.get("items", []): + if it.get("fingerprint") == fp: + existing_item_dict = it + break + + if existing_item_dict: + work_item = WorkItem( + fingerprint=fp, + objective=existing_item_dict.get("objective") or config.active_objective or "", + lane=existing_item_dict.get("lane") or lane, + issue_number=issue_number, + issue_state=existing_item_dict.get("issue_state") or "open", + delegation_state=delegation_state, + assignee=COPILOT_AGENT_ASSIGNEE if not dry_run else existing_item_dict.get("assignee"), + pr_number=existing_item_dict.get("pr_number"), + pr_url=existing_item_dict.get("pr_url"), + pr_state=existing_item_dict.get("pr_state"), + merged=bool(existing_item_dict.get("merged")), + closed_unmerged=bool(existing_item_dict.get("closed_unmerged")), + blocked=bool(existing_item_dict.get("blocked")), + superseded=bool(existing_item_dict.get("superseded")), + created_at=existing_item_dict.get("created_at") or now, + updated_at=now, + run_id=run_id, + gap_title=issue_title, + gap_subsystem=subsystem, + ) + else: + work_item = WorkItem( + fingerprint=fp, + objective=config.active_objective or "", + lane=lane, + issue_number=issue_number, + issue_state="open", + delegation_state=delegation_state, + assignee=COPILOT_AGENT_ASSIGNEE if not dry_run else None, + pr_number=None, + pr_url=None, + pr_state=None, + merged=False, + closed_unmerged=False, + blocked=False, + superseded=False, + created_at=now, + updated_at=now, + run_id=run_id, + gap_title=issue_title, + gap_subsystem=subsystem, + ) + upsert_work_item(work_state, work_item) + return result + + +def _list_prs_for_repo( + config: Config, state: str = "all", per_page: int = 50 +) -> List[Dict[str, Any]]: + """List pull requests from the repository.""" + if not config.github_token or not config.github_repo: + return [] + try: + params = urllib.parse.urlencode({"state": state, "per_page": str(per_page)}) + result = github_request( + config.github_token, + f"/repos/{config.github_repo}/pulls?{params}", + ) + return result if isinstance(result, list) else [] + except RepoArchitectError: + return [] + + +def _classify_pr(pr: Dict[str, Any]) -> str: + """Classify a PR into a lifecycle state string.""" + if pr.get("merged_at"): + return "merged" + state = pr.get("state", "open") + if state == "closed": + return "closed_unmerged" + if pr.get("draft"): + return "draft" + return "open" + + +def _pr_mentions_issue(pr: Dict[str, Any], issue_number: int) -> bool: + """Return True if the PR body or title references the given issue number.""" + body = (pr.get("body") or "").lower() + title = (pr.get("title") or "").lower() + needle = f"#{issue_number}" + if needle in body or needle in title: + return True + # Also check for "closes/fixes/resolves NNN" patterns without # + for pat in (f"closes {issue_number}", f"fixes {issue_number}", f"resolves {issue_number}"): + if pat in body: + return True + return False + + +def _update_issue_lifecycle_labels_for_pr( + config: Config, issue_number: int, pr_class: str +) -> None: + """Transition lifecycle labels on an issue based on the detected PR state.""" + if not config.github_token or not config.github_repo: + return + try: + issue_data = github_request( + config.github_token, + f"/repos/{config.github_repo}/issues/{issue_number}", + ) + except RepoArchitectError: + return + current = {lbl["name"] for lbl in issue_data.get("labels", []) if isinstance(lbl, dict)} + new_labels = current - set(LIFECYCLE_LABELS) + if pr_class == "merged": + new_labels.add("merged") + elif pr_class == "closed_unmerged": + new_labels.add("superseded") + elif pr_class in ("open", "draft"): + new_labels.add("pr-open") + elif pr_class == "stale": + new_labels.add("blocked") + try: + ensure_github_labels(config, sorted(new_labels)) + set_github_issue_labels(config, issue_number, sorted(new_labels)) + except RepoArchitectError: + pass + + +def reconcile_pr_state( + config: Config, work_state: Dict[str, Any] +) -> Dict[str, Any]: + """Ingest PR state back into work state for all tracked work items. + + For each tracked item that is not yet finished (merged / closed_unmerged), + detect linked PRs and update item state + lifecycle labels accordingly. + + Returns a summary dict with ``status``, ``updated``, ``prs_found``, and ``details``. + """ + items: List[Dict[str, Any]] = work_state.get("items", []) + if not items: + return {"status": "reconcile_complete", "updated": 0, "prs_found": 0, "details": []} + + # Fetch enough PRs to cover the reconciliation window (100 = GitHub API max per page) + recent_prs = _list_prs_for_repo(config, state="all", per_page=100) + stale_cutoff = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=config.stale_timeout_days) + window_cutoff = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=config.reconciliation_window_days) + # Filter PRs to the reconciliation window + filtered_prs: List[Dict[str, Any]] = [] + for pr in recent_prs: + created = pr.get("created_at") or "" + try: + pr_created = dt.datetime.fromisoformat(created.replace("Z", "+00:00")) + if pr_created >= window_cutoff: + filtered_prs.append(pr) + except (ValueError, TypeError): + filtered_prs.append(pr) # keep PRs with unparseable dates + recent_prs = filtered_prs + now = dt.datetime.now(dt.timezone.utc).isoformat() + + updated = 0 + prs_found = 0 + details: List[Dict[str, Any]] = [] + + for i, item in enumerate(items): + issue_number = item.get("issue_number") + if not issue_number: + continue + # Already finished + if item.get("merged") or item.get("closed_unmerged"): + continue + + matching = [pr for pr in recent_prs if _pr_mentions_issue(pr, int(issue_number))] + if not matching: + # Check for stale (delegated with no PR for too long) + updated_str = item.get("updated_at") + if item.get("delegation_state") == "delegated" and updated_str: + try: + updated_at = dt.datetime.fromisoformat(updated_str.replace("Z", "+00:00")) + if updated_at < stale_cutoff and item.get("pr_state") != "stale": + new_item = dict(item) + new_item["pr_state"] = "stale" + new_item["updated_at"] = now + items[i] = new_item + updated += 1 + details.append({"issue": issue_number, "pr_state": "stale"}) + if not config.dry_run: + _update_issue_lifecycle_labels_for_pr( + config, int(issue_number), "stale" + ) + except (ValueError, TypeError): + pass + continue + + prs_found += len(matching) + merged_pr = next((pr for pr in matching if pr.get("merged_at")), None) + open_pr = next( + (pr for pr in matching if pr.get("state") == "open" and not pr.get("merged_at")), + None, + ) + closed_pr = next( + (pr for pr in matching if pr.get("state") == "closed" and not pr.get("merged_at")), + None, + ) + best_pr = merged_pr or open_pr or closed_pr or matching[0] + pr_class = _classify_pr(best_pr) + + new_item = dict(item) + new_item["pr_number"] = best_pr.get("number") + new_item["pr_url"] = best_pr.get("html_url") + new_item["pr_state"] = pr_class + new_item["updated_at"] = now + if pr_class == "merged": + new_item["merged"] = True + new_item["delegation_state"] = "done" + elif pr_class == "closed_unmerged": + new_item["closed_unmerged"] = True + new_item["delegation_state"] = "done" + elif pr_class in ("open", "draft"): + new_item["delegation_state"] = "delegated" + + if new_item != item: + items[i] = new_item + updated += 1 + details.append({ + "issue": issue_number, + "pr_number": best_pr.get("number"), + "pr_state": pr_class, + "old_delegation": item.get("delegation_state"), + "new_delegation": new_item.get("delegation_state"), + }) + if not config.dry_run: + _update_issue_lifecycle_labels_for_pr(config, int(issue_number), pr_class) + + work_state["items"] = items + return { + "status": "reconcile_complete", + "updated": updated, + "prs_found": prs_found, + "details": details, + } + + +def ingest_issue_actions_to_work_state( + config: Config, + work_state: Dict[str, Any], + issue_actions: List[Dict[str, Any]], + run_id: str, +) -> None: + """Record newly created/updated issues into the work state (memory lane). + + Called at the end of run_issue_cycle() so future planning passes can see + what has already been submitted. + """ + now = dt.datetime.now(dt.timezone.utc).isoformat() + for action in issue_actions: + if action.get("action") not in ("created", "updated", "dry_run"): + continue + fp = action.get("fingerprint") + if not fp: + continue + issue_number = action.get("issue_number") + gap_title = action.get("gap_title") or "" + gap_subsystem = action.get("gap_subsystem") or "runtime" + + # Find existing item or create new + existing: Optional[Dict[str, Any]] = None + for it in work_state.get("items", []): + if it.get("fingerprint") == fp: + existing = it + break + + if existing: + # Refresh issue_number if it was just created + new_it = dict(existing) + if issue_number and not existing.get("issue_number"): + new_it["issue_number"] = issue_number + new_it["issue_state"] = "open" + new_it["updated_at"] = now + new_it["run_id"] = run_id + upsert_work_item(work_state, WorkItem(**new_it)) + else: + work_item = WorkItem( + fingerprint=fp, + objective=config.active_objective or "", + lane="unknown", + issue_number=issue_number, + issue_state="open", + delegation_state="pending", + assignee=None, + pr_number=None, + pr_url=None, + pr_state=None, + merged=False, + closed_unmerged=False, + blocked=False, + superseded=False, + created_at=now, + updated_at=now, + run_id=run_id, + gap_title=gap_title, + gap_subsystem=gap_subsystem, + ) + upsert_work_item(work_state, work_item) + + +def _active_fingerprints_in_work_state(work_state: Dict[str, Any]) -> Set[str]: + """Return fingerprints of issues that are currently in-progress or delegated. + + Used by the planner to avoid re-raising the same issue when one is already active. + """ + return { + it["fingerprint"] + for it in work_state.get("items", []) + if it.get("fingerprint") + and it.get("delegation_state") in ("pending", "delegated") + and not it.get("merged") + and not it.get("closed_unmerged") + } + + +def run_execution_cycle(config: Config) -> Dict[str, Any]: + """Execute one execution-lane pass: select + delegate one ready issue. + + Steps: + 1. Load work state. + 2. Reconcile PR state (so selection sees current issue states). + 3. Select one ready issue. + 4. Delegate it (dry-run or live depending on config.enable_live_delegation). + 5. Save updated work state. + 6. Return structured result. + """ + ensure_agent_dir(config.agent_dir) + work_state = load_work_state(config) + + # Run lightweight reconciliation first so selection state is fresh + reconcile_result = reconcile_pr_state(config, work_state) + + run_id = ( + os.environ.get("REPO_ARCHITECT_BRANCH_SUFFIX") + or os.environ.get("GITHUB_RUN_ID") + or dt.datetime.now(dt.timezone.utc).strftime("%Y%m%d%H%M%S") + ) + + selected = select_ready_issue(config, work_state) + if selected is None: + save_work_state(config, work_state) + return { + "status": "execution_cycle_complete", + "mode": EXECUTION_MODE, + "dry_run": not config.enable_live_delegation, + "selected_issue": None, + "delegation": None, + "reconcile": reconcile_result, + "message": "No ready issues available for delegation.", + } + + delegation_result = delegate_to_copilot(config, selected, work_state, run_id) + save_work_state(config, work_state) + + summary_line = ( + f"[dry-run] " if not config.enable_live_delegation else "" + ) + ( + f"delegated issue #{selected.get('number')} " + f"— {selected.get('title', '')}" + ) + log(summary_line, json_mode=config.log_json) + + result: Dict[str, Any] = { + "status": "execution_cycle_complete", + "mode": EXECUTION_MODE, + "dry_run": not config.enable_live_delegation, + "selected_issue": { + "number": selected.get("number"), + "title": selected.get("title"), + "url": selected.get("html_url"), + }, + "delegation": delegation_result, + "reconcile": reconcile_result, + "summary": [summary_line], + } + write_step_summary(config, result) + return result + + +def run_reconciliation_cycle(config: Config) -> Dict[str, Any]: + """Execute one reconciliation-lane pass: ingest PR outcomes into work state. + + Steps: + 1. Load work state. + 2. Fetch all recent PRs. + 3. Update item states and lifecycle labels. + 4. Save updated work state. + 5. Return structured result. + """ + ensure_agent_dir(config.agent_dir) + work_state = load_work_state(config) + reconcile_result = reconcile_pr_state(config, work_state) + save_work_state(config, work_state) + + summary = ( + f"reconcile: {reconcile_result['updated']} items updated, " + f"{reconcile_result['prs_found']} PRs found" + ) + log(summary, json_mode=config.log_json) + + result: Dict[str, Any] = { + "status": "reconcile_cycle_complete", + "mode": RECONCILE_MODE, + "dry_run": config.dry_run, + "updated": reconcile_result.get("updated", 0), + "prs_found": reconcile_result.get("prs_found", 0), + "details": reconcile_result.get("details", []), + "summary": [summary], + } + write_step_summary(config, result) + return result + + def remove_marked_debug_prints(root: pathlib.Path, analysis: Dict[str, Any], budget: int) -> Optional[PatchPlan]: if budget <= 0: return None @@ -2779,9 +3563,13 @@ def persist_manifest(config: Config, artifact_files: List[str]) -> None: def run_cycle(config: Config) -> Dict[str, Any]: - # Route issue mode to the dedicated issue cycle function + # Route modes to dedicated cycle functions if config.mode == ISSUE_MODE: return run_issue_cycle(config) + if config.mode == EXECUTION_MODE: + return run_execution_cycle(config) + if config.mode == RECONCILE_MODE: + return run_reconciliation_cycle(config) ensure_agent_dir(config.agent_dir) state = load_state(config) @@ -3115,6 +3903,76 @@ def build_config(args: argparse.Namespace) -> Config: f"Invalid REPO_ARCHITECT_SUBSYSTEM={issue_subsystem!r}. " f"Expected one of: {', '.join(SUBSYSTEM_LABELS)}" ) + # Closed-loop execution / reconciliation options + active_objective = ( + getattr(args, "active_objective", None) + or os.environ.get("ACTIVE_OBJECTIVE") + or os.environ.get("REPO_ARCHITECT_ACTIVE_OBJECTIVE") + ) + if active_objective and active_objective not in OBJECTIVE_LABELS: + raise RepoArchitectError( + f"Invalid active_objective={active_objective!r}. " + f"Expected one of: {', '.join(OBJECTIVE_LABELS)}" + ) + lane_filter = ( + getattr(args, "lane_filter", None) + or os.environ.get("LANE_FILTER") + or os.environ.get("REPO_ARCHITECT_LANE_FILTER") + ) + # ENABLE_LIVE_DELEGATION accepts "true"/"false" (canonical from workflow) or "1"/"0"/"yes"/"no" + # for shell/env compatibility. Only "true"/"1"/"yes" enables live delegation. + enable_live_delegation_raw = ( + os.environ.get("ENABLE_LIVE_DELEGATION", "").strip().lower() + or ("true" if getattr(args, "enable_live_delegation", False) else "false") + ) + _delegation_truthy = frozenset({"true", "1", "yes"}) + _delegation_falsy = frozenset({"false", "0", "no", ""}) + if enable_live_delegation_raw not in _delegation_truthy | _delegation_falsy: + raise RepoArchitectError( + f"Invalid ENABLE_LIVE_DELEGATION={enable_live_delegation_raw!r}. " + f"Expected true or false." + ) + enable_live_delegation = enable_live_delegation_raw in _delegation_truthy + + max_concurrent_raw = ( + os.environ.get("MAX_CONCURRENT_DELEGATED", "") + or str(getattr(args, "max_concurrent_delegated", 1) or 1) + ) + try: + max_concurrent_delegated = int(max_concurrent_raw) + if max_concurrent_delegated < 1: + raise ValueError("must be >= 1") + except (ValueError, TypeError): + raise RepoArchitectError( + f"Invalid MAX_CONCURRENT_DELEGATED={max_concurrent_raw!r}. Expected positive integer." + ) + + stale_raw = ( + os.environ.get("STALE_TIMEOUT_DAYS", "") + or str(getattr(args, "stale_timeout_days", 14) or 14) + ) + try: + stale_timeout_days = int(stale_raw) + if stale_timeout_days < 1: + raise ValueError("must be >= 1") + except (ValueError, TypeError): + raise RepoArchitectError( + f"Invalid STALE_TIMEOUT_DAYS={stale_raw!r}. Expected positive integer." + ) + + reconciliation_raw = ( + os.environ.get("RECONCILIATION_WINDOW_DAYS", "") + or str(getattr(args, "reconciliation_window_days", 30) or 30) + ) + try: + reconciliation_window_days = int(reconciliation_raw) + if reconciliation_window_days < 1: + raise ValueError("must be >= 1") + except (ValueError, TypeError): + raise RepoArchitectError( + f"Invalid RECONCILIATION_WINDOW_DAYS={reconciliation_raw!r}. Expected positive integer." + ) + return Config( git_root=git_root, agent_dir=agent_dir, @@ -3144,6 +4002,12 @@ def build_config(args: argparse.Namespace) -> Config: dry_run=args.dry_run, max_issues=args.max_issues, issue_subsystem=issue_subsystem, + enable_live_delegation=enable_live_delegation, + max_concurrent_delegated=max_concurrent_delegated, + active_objective=active_objective, + lane_filter=lane_filter, + stale_timeout_days=stale_timeout_days, + reconciliation_window_days=reconciliation_window_days, ) @@ -3155,11 +4019,13 @@ def parse_args(argv: Optional[Sequence[str]] = None) -> argparse.Namespace: ) p.add_argument( "--mode", - choices=["analyze", "report", "issue", "mutate", "campaign"], + choices=["analyze", "report", "issue", "mutate", "campaign", "execution", "reconcile"], default="issue", help=( "Operating mode. 'issue' (default) is the safe governance mode: detects architectural gaps and " - "opens/updates GitHub Issues. 'analyze'/'report' are read-only. " + "opens/updates GitHub Issues. 'execution' selects one ready issue and delegates it to Copilot. " + "'reconcile' ingests PR outcomes back into work state. " + "'analyze'/'report' are read-only. " "'mutate'/'campaign' are charter-validated secondary modes that perform " "narrow, validated code mutations per GODELOS_REPO_IMPLEMENTATION_CHARTER §9–§10." ), @@ -3191,6 +4057,20 @@ def parse_args(argv: Optional[Sequence[str]] = None) -> argparse.Namespace: p.add_argument("--issue-subsystem", default=None, choices=SUBSYSTEM_LABELS, help="Issue mode: restrict gap detection to a specific subsystem.") + # Closed-loop execution / reconciliation operator controls + p.add_argument("--enable-live-delegation", action="store_true", + help="Execution mode: actually delegate to Copilot via GitHub API (default: dry-run only).") + p.add_argument("--max-concurrent-delegated", type=int, default=1, + help="Execution mode: max number of issues simultaneously in-flight (default: 1).") + p.add_argument("--active-objective", default=None, + help=f"Execution mode: restrict selection to a specific objective. " + f"Valid values: {', '.join(OBJECTIVE_LABELS)}.") + p.add_argument("--lane-filter", default=None, + help="Execution mode: restrict issue selection to a specific charter lane name.") + p.add_argument("--stale-timeout-days", type=int, default=14, + help="Reconciliation: days before a delegated-but-PR-less item is marked stale (default: 14).") + p.add_argument("--reconciliation-window-days", type=int, default=30, + help="Reconciliation: days of PRs to consider during reconciliation (default: 30).") return p.parse_args(argv) @@ -3248,6 +4128,22 @@ def main(argv: Optional[Sequence[str]] = None) -> int: print(json.dumps(result, indent=2, sort_keys=True)) return 0 + # Execution lane — select one ready issue and delegate to Copilot + if config.mode == EXECUTION_MODE: + result = run_execution_cycle(config) + for line in result.get("summary", []): + log(line, json_mode=config.log_json) + print(json.dumps(result, indent=2, sort_keys=True)) + return 0 + + # Reconciliation lane — ingest PR outcomes back into work state + if config.mode == RECONCILE_MODE: + result = run_reconciliation_cycle(config) + for line in result.get("summary", []): + log(line, json_mode=config.log_json) + print(json.dumps(result, indent=2, sort_keys=True)) + return 0 + # Charter-validated campaign mode (secondary — requires explicit opt-in) if config.mode == "campaign": log( diff --git a/tests/test_repo_architect.py b/tests/test_repo_architect.py index 686a99a..ca5ac1a 100644 --- a/tests/test_repo_architect.py +++ b/tests/test_repo_architect.py @@ -12,6 +12,7 @@ import ast import dataclasses +import datetime as dt import json import os import pathlib @@ -2473,5 +2474,631 @@ def test_cross_agent_still_flagged(self) -> None: "Cross-agent import should still trigger agent-boundary gap") +# --------------------------------------------------------------------------- +# 35. Work state ingestion +# --------------------------------------------------------------------------- + +class TestWorkStateIngestion(unittest.TestCase): + """Verify load/save/upsert of durable work state.""" + + def _make_work_item(self, fp: str = "aabbccddeeff", **kwargs: Any) -> ra.WorkItem: + now = "2025-01-01T00:00:00+00:00" + defaults: Dict[str, Any] = dict( + fingerprint=fp, objective="eliminate-import-cycles", + lane="import_cycles", issue_number=42, issue_state="open", + delegation_state="pending", assignee=None, + pr_number=None, pr_url=None, pr_state=None, + merged=False, closed_unmerged=False, blocked=False, superseded=False, + created_at=now, updated_at=now, run_id="run123", + gap_title="Test gap", gap_subsystem="runtime", + ) + defaults.update(kwargs) + return ra.WorkItem(**defaults) + + def test_load_empty_work_state(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + config = _make_config(root) + ws = ra.load_work_state(config) + self.assertEqual(ws["items"], []) + self.assertEqual(ws["version"], ra.VERSION) + + def test_upsert_inserts_new_item(self) -> None: + ws: Dict[str, Any] = {"version": ra.VERSION, "updated_at": None, "items": []} + item = self._make_work_item() + ra.upsert_work_item(ws, item) + self.assertEqual(len(ws["items"]), 1) + self.assertEqual(ws["items"][0]["fingerprint"], "aabbccddeeff") + + def test_upsert_updates_existing_item(self) -> None: + ws: Dict[str, Any] = {"version": ra.VERSION, "updated_at": None, "items": []} + item = self._make_work_item() + ra.upsert_work_item(ws, item) + updated = self._make_work_item(delegation_state="delegated") + ra.upsert_work_item(ws, updated) + self.assertEqual(len(ws["items"]), 1) + self.assertEqual(ws["items"][0]["delegation_state"], "delegated") + + def test_save_and_reload_work_state(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + config = _make_config(root) + ws: Dict[str, Any] = ra.load_work_state(config) + item = self._make_work_item() + ra.upsert_work_item(ws, item) + ra.save_work_state(config, ws) + reloaded = ra.load_work_state(config) + self.assertEqual(len(reloaded["items"]), 1) + self.assertEqual(reloaded["items"][0]["fingerprint"], "aabbccddeeff") + + def test_ingest_issue_actions_records_created_issue(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + config = _make_config(root) + ws: Dict[str, Any] = ra.load_work_state(config) + actions = [{ + "action": "created", + "issue_number": 99, + "issue_url": "https://github.com/x/y/issues/99", + "fingerprint": "aabbccddeeff", + "gap_title": "Import cycles in backend.core", + "gap_subsystem": "runtime", + "labels_applied": [], + "dedupe_result": "new", + "dry_run_path": None, + "error": None, + "labels_confirmed": None, + }] + ra.ingest_issue_actions_to_work_state(config, ws, actions, "run1") + self.assertEqual(len(ws["items"]), 1) + self.assertEqual(ws["items"][0]["issue_number"], 99) + + def test_ingest_skips_error_actions(self) -> None: + ws: Dict[str, Any] = {"version": ra.VERSION, "updated_at": None, "items": []} + actions = [{"action": "error", "fingerprint": "aabbccddeeff", + "gap_title": "x", "gap_subsystem": "runtime", + "issue_number": None, "issue_url": None, "labels_applied": [], + "dedupe_result": "create_failed", "dry_run_path": None, + "error": "some error", "labels_confirmed": None}] + config = _make_config() + ra.ingest_issue_actions_to_work_state(config, ws, actions, "run1") + self.assertEqual(len(ws["items"]), 0) + + +# --------------------------------------------------------------------------- +# 36. select_ready_issue — deterministic selection logic +# --------------------------------------------------------------------------- + +class TestSelectReadyIssue(unittest.TestCase): + """Verify deterministic selection with dedup and in-progress guards.""" + + def _make_config_exec(self, **overrides: Any) -> ra.Config: + cfg = _make_config(mode="execution") + return dataclasses.replace(cfg, **overrides) + + def _make_issue(self, number: int = 1, labels: Optional[List[str]] = None, + body: str = "", title: str = "Fix thing") -> Dict[str, Any]: + if labels is None: + labels = ["arch-gap", "copilot-task", "needs-implementation"] + return { + "number": number, + "title": title, + "html_url": f"https://github.com/x/y/issues/{number}", + "body": body or f"", + "labels": [{"name": lbl} for lbl in labels], + "state": "open", + } + + def test_no_credentials_returns_none(self) -> None: + config = self._make_config_exec() + ws: Dict[str, Any] = {"items": []} + result = ra.select_ready_issue(config, ws) + self.assertIsNone(result) + + def test_max_concurrent_blocks_selection(self) -> None: + """If we've already hit MAX_CONCURRENT_DELEGATED, select_ready_issue returns None.""" + config = self._make_config_exec( + github_token="tok", github_repo="x/y", max_concurrent_delegated=1 + ) + ws: Dict[str, Any] = { + "items": [{ + "fingerprint": "aabb001122cc", + "delegation_state": "delegated", + "merged": False, "closed_unmerged": False, + "lane": "import_cycles", "issue_number": 5, + }] + } + result = ra.select_ready_issue(config, ws) + self.assertIsNone(result, "Should not select when concurrency cap is hit") + + def test_blocked_fingerprint_excluded(self) -> None: + """Issues whose fingerprint is already delegated must be excluded from selection.""" + fp = "aabbccddeeff" + issue = self._make_issue(body=f"") + ws: Dict[str, Any] = { + "items": [{ + "fingerprint": fp, + "delegation_state": "delegated", + "merged": False, "closed_unmerged": False, + "lane": "import_cycles", "issue_number": 1, + }] + } + # Manually test the filtering logic by checking blocked set + config = self._make_config_exec( + github_token="tok", github_repo="x/y", max_concurrent_delegated=5 + ) + # The issue's fingerprint is in-flight, so it should be blocked + in_flight = [it for it in ws["items"] if it.get("delegation_state") == "delegated" + and not it.get("merged") and not it.get("closed_unmerged")] + blocked_fps = {it["fingerprint"] for it in in_flight} + body = issue.get("body") or "" + extracted = ra._extract_fingerprint_from_body(body) + self.assertIn(extracted, blocked_fps, "Fingerprint should be in blocked set") + + def test_lifecycle_labels_exclude_issue(self) -> None: + """Issues with blocking lifecycle labels must be excluded.""" + config = self._make_config_exec( + github_token="tok", github_repo="x/y" + ) + blocking = {"blocked", "superseded", "in-progress", "pr-open", "merged"} + issue_labels = {"arch-gap", "copilot-task", "needs-implementation", "in-progress"} + self.assertTrue(issue_labels & blocking, "in-progress should block selection") + + def test_priority_ordering(self) -> None: + """Priority critical < high < medium < low in rank (lower index = higher priority).""" + rank = {p: i for i, p in enumerate(ra.ISSUE_PRIORITY_LEVELS)} + self.assertLess(rank["critical"], rank["high"]) + self.assertLess(rank["high"], rank["medium"]) + self.assertLess(rank["medium"], rank["low"]) + + def test_extract_fingerprint_from_body(self) -> None: + body = "Some text\n\nMore text" + fp = ra._extract_fingerprint_from_body(body) + self.assertEqual(fp, "abc123def456") + + def test_extract_fingerprint_not_found(self) -> None: + self.assertIsNone(ra._extract_fingerprint_from_body("no fingerprint here")) + + def test_extract_lane_from_body(self) -> None: + body = "This fixes issues in Lane: import_cycles for the repo." + lane = ra._extract_lane_from_body(body) + self.assertEqual(lane, "import_cycles") + + def test_active_fingerprints_in_work_state(self) -> None: + ws: Dict[str, Any] = { + "items": [ + {"fingerprint": "aaa", "delegation_state": "delegated", + "merged": False, "closed_unmerged": False}, + {"fingerprint": "bbb", "delegation_state": "pending", + "merged": False, "closed_unmerged": False}, + {"fingerprint": "ccc", "delegation_state": "done", + "merged": True, "closed_unmerged": False}, + {"fingerprint": "ddd", "delegation_state": "delegated", + "merged": True, "closed_unmerged": False}, + ] + } + active = ra._active_fingerprints_in_work_state(ws) + self.assertIn("aaa", active) + self.assertIn("bbb", active) + self.assertNotIn("ccc", active) + self.assertNotIn("ddd", active) + + +# --------------------------------------------------------------------------- +# 37. Delegation dry-run behavior +# --------------------------------------------------------------------------- + +class TestDelegationDryRun(unittest.TestCase): + """Verify delegation dry-run behavior (no real API calls).""" + + def _make_issue(self, number: int = 10) -> Dict[str, Any]: + return { + "number": number, + "title": "Fix import cycles in backend.core", + "html_url": f"https://github.com/x/y/issues/{number}", + "body": "\nLane: import_cycles", + "labels": [ + {"name": "arch-gap"}, {"name": "copilot-task"}, + {"name": "needs-implementation"}, {"name": "runtime"}, + ], + "state": "open", + } + + def test_dry_run_does_not_mutate_state_action(self) -> None: + """Dry-run delegation records 'pending' delegation_state and 'dry_run' action.""" + config = _make_config(mode="execution") + # enable_live_delegation defaults to False → dry_run=True + self.assertFalse(config.enable_live_delegation) + ws: Dict[str, Any] = {"items": []} + issue = self._make_issue() + result = ra.delegate_to_copilot(config, issue, ws, run_id="run-dryrun") + self.assertEqual(result["action"], "dry_run") + self.assertTrue(result["dry_run"]) + self.assertIsNone(result["assignee"]) + # Work state should be updated with pending state + self.assertEqual(len(ws["items"]), 1) + self.assertEqual(ws["items"][0]["delegation_state"], "pending") + self.assertEqual(ws["items"][0]["fingerprint"], "aabbccddeeff") + + def test_dry_run_no_credentials_still_records(self) -> None: + """Dry-run works even without GitHub credentials.""" + config = _make_config(mode="execution") + self.assertIsNone(config.github_token) + ws: Dict[str, Any] = {"items": []} + issue = self._make_issue(number=7) + result = ra.delegate_to_copilot(config, issue, ws, run_id="run-nodeps") + self.assertEqual(result["action"], "dry_run") + self.assertEqual(len(ws["items"]), 1) + + def test_live_mode_missing_credentials_returns_error(self) -> None: + """Live delegation with no GitHub credentials returns error action.""" + config = _make_config(mode="execution") + config = dataclasses.replace(config, enable_live_delegation=True) + ws: Dict[str, Any] = {"items": []} + issue = self._make_issue() + result = ra.delegate_to_copilot(config, issue, ws, run_id="run-live") + self.assertEqual(result["action"], "error") + self.assertIn("GITHUB_TOKEN", result.get("error", "")) + + def test_upsert_updates_existing_work_item(self) -> None: + """A second delegation call updates the existing work item rather than inserting.""" + config = _make_config(mode="execution") + ws: Dict[str, Any] = {"items": []} + issue = self._make_issue() + ra.delegate_to_copilot(config, issue, ws, run_id="run1") + ra.delegate_to_copilot(config, issue, ws, run_id="run2") + self.assertEqual(len(ws["items"]), 1, "Should upsert, not append duplicate") + + def test_run_execution_cycle_no_credentials_returns_no_selection(self) -> None: + """run_execution_cycle without credentials returns no selected issue.""" + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + config = _make_config(root, mode="execution") + result = ra.run_execution_cycle(config) + self.assertEqual(result["status"], "execution_cycle_complete") + self.assertIsNone(result["selected_issue"]) + + +# --------------------------------------------------------------------------- +# 38. PR reconciliation +# --------------------------------------------------------------------------- + +class TestPRReconciliation(unittest.TestCase): + """Verify PR state ingestion and work state reconciliation.""" + + def _make_pr(self, number: int, state: str = "open", + body: str = "", merged_at: Optional[str] = None, + draft: bool = False) -> Dict[str, Any]: + return { + "number": number, + "title": f"PR #{number}", + "html_url": f"https://github.com/x/y/pull/{number}", + "state": state, + "draft": draft, + "body": body, + "merged_at": merged_at, + } + + def test_classify_pr_merged(self) -> None: + pr = self._make_pr(1, state="closed", merged_at="2025-01-01T00:00:00Z") + self.assertEqual(ra._classify_pr(pr), "merged") + + def test_classify_pr_closed_unmerged(self) -> None: + pr = self._make_pr(2, state="closed", merged_at=None) + self.assertEqual(ra._classify_pr(pr), "closed_unmerged") + + def test_classify_pr_draft(self) -> None: + pr = self._make_pr(3, state="open", draft=True) + self.assertEqual(ra._classify_pr(pr), "draft") + + def test_classify_pr_open(self) -> None: + pr = self._make_pr(4, state="open", draft=False) + self.assertEqual(ra._classify_pr(pr), "open") + + def test_pr_mentions_issue_hash(self) -> None: + pr = self._make_pr(10, body="Fixes #42 in this PR.") + self.assertTrue(ra._pr_mentions_issue(pr, 42)) + self.assertFalse(ra._pr_mentions_issue(pr, 99)) + + def test_pr_mentions_issue_closes(self) -> None: + pr = self._make_pr(11, body="closes 55") + self.assertTrue(ra._pr_mentions_issue(pr, 55)) + + def test_reconcile_empty_work_state(self) -> None: + config = _make_config(mode="reconcile") + ws: Dict[str, Any] = {"items": []} + result = ra.reconcile_pr_state(config, ws) + self.assertEqual(result["status"], "reconcile_complete") + self.assertEqual(result["updated"], 0) + + def test_reconcile_marks_merged(self) -> None: + """An item linked to a merged PR should get merged=True and delegation_state=done.""" + config = _make_config(mode="reconcile") + ws: Dict[str, Any] = { + "items": [{ + "fingerprint": "aabb", "issue_number": 42, "lane": "import_cycles", + "delegation_state": "delegated", "merged": False, "closed_unmerged": False, + "blocked": False, "superseded": False, + "pr_number": None, "pr_url": None, "pr_state": None, + "updated_at": "2025-01-01T00:00:00+00:00", + "objective": "", "assignee": None, + "created_at": "2025-01-01T00:00:00+00:00", "run_id": "r1", + "gap_title": "Fix cycles", "gap_subsystem": "runtime", + "issue_state": "open", + }] + } + merged_pr = self._make_pr(100, state="closed", body="Fixes #42", merged_at="2025-01-02T00:00:00Z") + # Patch _list_prs_for_repo to return the merged PR + with patch.object(ra, "_list_prs_for_repo", return_value=[merged_pr]): + result = ra.reconcile_pr_state(config, ws) + self.assertEqual(result["updated"], 1) + self.assertTrue(ws["items"][0]["merged"]) + self.assertEqual(ws["items"][0]["delegation_state"], "done") + + def test_reconcile_marks_stale(self) -> None: + """A delegated item with no PR and old updated_at should be marked stale.""" + config = _make_config(mode="reconcile") + old_date = (dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=30)).isoformat() + ws: Dict[str, Any] = { + "items": [{ + "fingerprint": "ccdd", "issue_number": 77, "lane": "parse_errors", + "delegation_state": "delegated", "merged": False, "closed_unmerged": False, + "blocked": False, "superseded": False, + "pr_number": None, "pr_url": None, "pr_state": None, + "updated_at": old_date, + "objective": "", "assignee": None, + "created_at": old_date, "run_id": "r2", + "gap_title": "Fix parse errors", "gap_subsystem": "runtime", + "issue_state": "open", + }] + } + with patch.object(ra, "_list_prs_for_repo", return_value=[]): + result = ra.reconcile_pr_state(config, ws) + self.assertEqual(result["updated"], 1) + self.assertEqual(ws["items"][0]["pr_state"], "stale") + + def test_reconcile_stale_respects_stale_timeout_days(self) -> None: + """stale_timeout_days parameter controls when an item is marked stale.""" + config = _make_config(mode="reconcile") + # Item updated 5 days ago — should NOT be stale with default 14-day timeout + recent_date = (dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=5)).isoformat() + ws: Dict[str, Any] = { + "items": [{ + "fingerprint": "eeff00", "issue_number": 88, "lane": "hygiene", + "delegation_state": "delegated", "merged": False, "closed_unmerged": False, + "blocked": False, "superseded": False, + "pr_number": None, "pr_url": None, "pr_state": None, + "updated_at": recent_date, + "objective": "", "assignee": None, + "created_at": recent_date, "run_id": "r4", + "gap_title": "Hygiene", "gap_subsystem": "runtime", + "issue_state": "open", + }] + } + with patch.object(ra, "_list_prs_for_repo", return_value=[]): + result = ra.reconcile_pr_state(config, ws) + self.assertEqual(result["updated"], 0, "Item updated 5 days ago should not be stale with 14-day timeout") + + # Now test with a 3-day timeout: item should be stale + config_strict = dataclasses.replace(config, stale_timeout_days=3) + ws2: Dict[str, Any] = { + "items": [{ + "fingerprint": "eeff00", "issue_number": 88, "lane": "hygiene", + "delegation_state": "delegated", "merged": False, "closed_unmerged": False, + "blocked": False, "superseded": False, + "pr_number": None, "pr_url": None, "pr_state": None, + "updated_at": recent_date, + "objective": "", "assignee": None, + "created_at": recent_date, "run_id": "r4", + "gap_title": "Hygiene", "gap_subsystem": "runtime", + "issue_state": "open", + }] + } + with patch.object(ra, "_list_prs_for_repo", return_value=[]): + result2 = ra.reconcile_pr_state(config_strict, ws2) + self.assertEqual(result2["updated"], 1, "Item updated 5 days ago should be stale with 3-day timeout") + self.assertEqual(ws2["items"][0]["pr_state"], "stale") + + def test_reconcile_skips_finished_items(self) -> None: + """Items already merged or closed_unmerged are skipped during reconciliation.""" + config = _make_config(mode="reconcile") + ws: Dict[str, Any] = { + "items": [{ + "fingerprint": "eeff", "issue_number": 10, + "delegation_state": "done", "merged": True, "closed_unmerged": False, + "pr_state": "merged", "updated_at": "2025-01-01T00:00:00+00:00", + "objective": "", "lane": "hygiene", "assignee": None, + "pr_number": 5, "pr_url": None, "blocked": False, "superseded": False, + "created_at": "2025-01-01T00:00:00+00:00", "run_id": "r3", + "gap_title": "Hygiene", "gap_subsystem": "runtime", "issue_state": "closed", + }] + } + with patch.object(ra, "_list_prs_for_repo", return_value=[]): + result = ra.reconcile_pr_state(config, ws) + self.assertEqual(result["updated"], 0, "Already-merged items should not be re-updated") + + def test_run_reconciliation_cycle_no_items(self) -> None: + """run_reconciliation_cycle with empty work state returns complete status.""" + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + config = _make_config(root, mode="reconcile") + with patch.object(ra, "_list_prs_for_repo", return_value=[]): + result = ra.run_reconciliation_cycle(config) + self.assertEqual(result["status"], "reconcile_cycle_complete") + self.assertEqual(result["updated"], 0) +# --------------------------------------------------------------------------- + +class TestLifecycleLabelTransitions(unittest.TestCase): + """Verify lifecycle label sets are correct and transitions are deterministic.""" + + def test_lifecycle_labels_present(self) -> None: + expected = {"ready-for-delegation", "in-progress", "pr-open", "merged", "blocked", "superseded"} + self.assertEqual(set(ra.LIFECYCLE_LABELS), expected) + + def test_execution_eligible_labels_present(self) -> None: + self.assertIn("arch-gap", ra.EXECUTION_ELIGIBLE_LABELS) + self.assertIn("copilot-task", ra.EXECUTION_ELIGIBLE_LABELS) + self.assertIn("needs-implementation", ra.EXECUTION_ELIGIBLE_LABELS) + + def test_pr_open_lifecycle_added_on_open_pr(self) -> None: + """When a PR is open, the lifecycle labels should include pr-open.""" + current = {"arch-gap", "copilot-task", "needs-implementation", "in-progress"} + new_labels = current - set(ra.LIFECYCLE_LABELS) + new_labels.add("pr-open") + self.assertIn("pr-open", new_labels) + self.assertNotIn("in-progress", new_labels) + + def test_merged_lifecycle_added_on_merged_pr(self) -> None: + """When a PR merges, the lifecycle labels should include merged.""" + current = {"arch-gap", "copilot-task", "needs-implementation", "pr-open"} + new_labels = current - set(ra.LIFECYCLE_LABELS) + new_labels.add("merged") + self.assertIn("merged", new_labels) + self.assertNotIn("pr-open", new_labels) + + def test_copilot_assignee_constant(self) -> None: + self.assertEqual(ra.COPILOT_AGENT_ASSIGNEE, "copilot") + + +# --------------------------------------------------------------------------- +# 40. Planner skips already-in-progress objectives +# --------------------------------------------------------------------------- + +class TestPlannerSkipsInProgress(unittest.TestCase): + """Verify the planner filters out gaps already actively in work state.""" + + def _model_meta(self) -> Dict[str, Any]: + return { + "used": False, "summary": None, "requested_model": None, "actual_model": None, + "primary_model": None, "fallback_model": None, "model_used": None, + "fallback_used": False, "fallback_reason": None, + "fallback_occurred": False, "enabled": False, + } + + def test_active_fingerprints_blocks_planning(self) -> None: + """Fingerprints already in work state (delegated) should be considered active.""" + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + config = _make_config(root) + analysis: Dict[str, Any] = { + "parse_error_files": ["backend/broken.py"], + "cycles": [], + "entrypoint_clusters": {}, + "entrypoint_paths": [], + "architecture_score": 80, + "score_factors": {}, + "local_import_graph": {}, + } + gaps = ra.diagnose_gaps(config, analysis, self._model_meta()) + # Find the parse-errors gap fingerprint + parse_gaps = [g for g in gaps if g.issue_key == "parse-errors"] + if parse_gaps: + gap = parse_gaps[0] + fp = ra.issue_fingerprint(gap.subsystem, gap.issue_key) + ws: Dict[str, Any] = { + "items": [{ + "fingerprint": fp, + "delegation_state": "delegated", + "merged": False, "closed_unmerged": False, + }] + } + active = ra._active_fingerprints_in_work_state(ws) + self.assertIn(fp, active, "Delegated fingerprint should be in active set") + + +# --------------------------------------------------------------------------- +# 41. Config validation for new operator controls +# --------------------------------------------------------------------------- + +class TestConfigValidationNewControls(unittest.TestCase): + """Verify build_config validates new operator control env vars.""" + + def setUp(self) -> None: + self._orig_cwd = os.getcwd() + + def tearDown(self) -> None: + os.chdir(self._orig_cwd) + + def test_invalid_active_objective_raises(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + os.chdir(root) + with patch.dict(os.environ, {"ACTIVE_OBJECTIVE": "not-a-valid-objective"}, clear=False): + args = ra.parse_args(["--mode", "execution"]) + with self.assertRaises(ra.RepoArchitectError) as ctx: + ra.build_config(args) + self.assertIn("active_objective", str(ctx.exception)) + + def test_valid_active_objective_accepted(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + os.chdir(root) + with patch.dict(os.environ, {"ACTIVE_OBJECTIVE": "eliminate-import-cycles"}, clear=False): + args = ra.parse_args(["--mode", "execution"]) + config = ra.build_config(args) + self.assertEqual(config.active_objective, "eliminate-import-cycles") + + def test_invalid_max_concurrent_delegated_raises(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + os.chdir(root) + with patch.dict(os.environ, {"MAX_CONCURRENT_DELEGATED": "abc"}, clear=False): + args = ra.parse_args(["--mode", "execution"]) + with self.assertRaises(ra.RepoArchitectError) as ctx: + ra.build_config(args) + self.assertIn("MAX_CONCURRENT_DELEGATED", str(ctx.exception)) + + def test_invalid_stale_timeout_raises(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + os.chdir(root) + with patch.dict(os.environ, {"STALE_TIMEOUT_DAYS": "0"}, clear=False): + args = ra.parse_args(["--mode", "execution"]) + with self.assertRaises(ra.RepoArchitectError) as ctx: + ra.build_config(args) + self.assertIn("STALE_TIMEOUT_DAYS", str(ctx.exception)) + + def test_defaults_without_env_vars(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = _make_git_root(tmp) + os.chdir(root) + # Remove any env vars that might interfere + env_clean = { + k: v for k, v in os.environ.items() + if k not in ( + "ACTIVE_OBJECTIVE", "REPO_ARCHITECT_ACTIVE_OBJECTIVE", + "MAX_CONCURRENT_DELEGATED", "STALE_TIMEOUT_DAYS", + "RECONCILIATION_WINDOW_DAYS", "ENABLE_LIVE_DELEGATION", + ) + } + with patch.dict(os.environ, env_clean, clear=True): + args = ra.parse_args(["--mode", "execution"]) + config = ra.build_config(args) + self.assertEqual(config.max_concurrent_delegated, 1) + self.assertEqual(config.stale_timeout_days, 14) + self.assertEqual(config.reconciliation_window_days, 30) + self.assertFalse(config.enable_live_delegation) + + def test_objective_labels_have_valid_keys(self) -> None: + """All objective label keys must be non-empty strings.""" + for key, desc in ra.OBJECTIVE_LABELS.items(): + self.assertIsInstance(key, str) + self.assertTrue(key) + self.assertIsInstance(desc, str) + self.assertTrue(desc) + + def test_execution_mode_in_parse_args(self) -> None: + args = ra.parse_args(["--mode", "execution"]) + self.assertEqual(args.mode, "execution") + + def test_reconcile_mode_in_parse_args(self) -> None: + args = ra.parse_args(["--mode", "reconcile"]) + self.assertEqual(args.mode, "reconcile") + + def test_enable_live_delegation_flag(self) -> None: + args = ra.parse_args(["--mode", "execution", "--enable-live-delegation"]) + self.assertTrue(args.enable_live_delegation) + + if __name__ == "__main__": unittest.main() From 6ecfaaad293a83f517f43b3e855a71c5d3d6f957 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 16:03:33 +0000 Subject: [PATCH 3/8] fix(repo-architect): harden delegation proof, PR linkage evidence, and factual lifecycle states Co-authored-by: Steake <530040+Steake@users.noreply.github.com> --- docs/repo_architect/OPERATOR_GUIDE.md | 106 ++++-- repo_architect.py | 526 ++++++++++++++++++++------ tests/test_repo_architect.py | 188 +++++++-- 3 files changed, 653 insertions(+), 167 deletions(-) diff --git a/docs/repo_architect/OPERATOR_GUIDE.md b/docs/repo_architect/OPERATOR_GUIDE.md index 0c333ad..40d7e1b 100644 --- a/docs/repo_architect/OPERATOR_GUIDE.md +++ b/docs/repo_architect/OPERATOR_GUIDE.md @@ -195,14 +195,14 @@ python repo_architect.py --mode issue --allow-dirty --max-issues 3 1. Loads durable work state. 2. Runs lightweight PR reconciliation to refresh issue states. -3. Selects at most one issue that is ready for delegation (eligible labels + not blocked/in-progress). -4. Delegates to Copilot: applies `in-progress` label, assigns to `@copilot`, posts delegation comment. -5. Records the delegation in work state. +3. Selects at most one issue that is ready for delegation (eligible labels + not already in factual terminal/in-progress states). +4. Requests delegation to Copilot by assignment + machine-linkage comment. +5. Records delegation events with explicit outcomes in work state (`delegation-requested`, `delegation-confirmed`, `delegation-failed`, `delegation-unconfirmed`). ### Selection rules - Issue must have all of: `arch-gap`, `copilot-task`, `needs-implementation` -- Issue must NOT have: `blocked`, `superseded`, `in-progress`, `pr-open`, `merged` +- Issue must NOT have: `delegation-requested`, `in-progress`, `pr-open`, `pr-draft`, `merged`, `closed-unmerged`, `failed-delegation`, `blocked-by-dependency`, `superseded-by-issue`, `superseded-by-pr` - Fingerprint must not already be delegated - At most one issue per charter lane at a time - Respects `MAX_CONCURRENT_DELEGATED` limit (default: 1) @@ -250,11 +250,14 @@ python repo_architect.py --mode execution --allow-dirty --lane-filter import_cyc In live mode, the execution lane: -1. Applies `in-progress` label and removes `ready-for-delegation`. -2. Assigns the issue to `@copilot` (GitHub Copilot coding agent username). -3. Posts a delegation comment with the active objective, lane, and fingerprint. +1. Applies factual lifecycle labels: `delegation-requested` + `in-progress`. +2. Assigns the issue to `@copilot+gpt-5.3-codex`. +3. Posts a delegation comment containing a machine linkage block (`repo-architect-linkage`) and fingerprint marker. +4. Records per-mechanism evidence from GitHub API responses: + - assignment evidence (assignee list confirmation) + - comment evidence (comment id/url confirmation) -GitHub Copilot coding agent is automatically triggered when an issue is assigned to `@copilot`. It reads the issue body (including the Copilot implementation prompt) and opens a PR. The reconciliation lane then detects this PR and updates the work state. +Delegation is only considered **confirmed** when at least one reliable mechanism is confirmed by API response. Label changes alone are not treated as proof of execution. --- @@ -264,7 +267,12 @@ GitHub Copilot coding agent is automatically triggered when an issue is assigned 1. Loads durable work state. 2. Fetches all recent PRs from the repository. -3. For each tracked issue, detects linked PRs (by `#issue_number` reference in PR body/title). +3. For each tracked issue, detects linked PRs using evidence order: + 1) explicit fingerprint marker in PR body + 2) explicit `repo-architect-linkage` block in PR body + 3) branch naming convention tied to issue/fingerprint + 4) closing keywords / linked references + 5) fallback `#issue_number` mention 4. Updates item state: `merged`, `closed_unmerged`, `open`, `draft`, `stale`. 5. Updates lifecycle labels on the GitHub Issue. 6. Saves updated work state. @@ -299,10 +307,11 @@ python repo_architect.py --mode reconcile --allow-dirty --stale-timeout-days 7 - ### How issues and PRs feed the next planning cycle After reconciliation: -- Issues with `merged` PRs are marked `done` in work state → planner can generate follow-on work. -- Issues with `open` PRs are blocked from re-delegation → planner skips them. -- Issues with `stale` status are flagged → operator can review or unblock. -- Issues with `closed_unmerged` PRs are marked done (superseded) → planner can re-raise if still needed. +- Issues with `merged` PRs are marked factually merged. +- Issues with `open`/`draft` PRs are marked `pr-open` / `pr-draft`. +- Issues with no match past timeout are marked `stale` (not implicitly blocked-by-dependency). +- Issues with `closed_unmerged` PRs remain factual `closed-unmerged` (not automatically superseded). +- Planning may infer priority/deprioritisation, but factual and inferred states are stored separately. --- @@ -316,6 +325,15 @@ The work state is stored in `.agent/work_state.json` (gitignored, refreshed each { "version": "2.1.0", "updated_at": "2026-01-01T00:00:00+00:00", + "delegation_events": [ + { + "ts": "2026-01-01T00:01:02+00:00", + "issue_number": 42, + "fingerprint": "a1b2c3d4e5f6", + "mechanism": "assignment+comment", + "outcome": "delegation-confirmed" + } + ], "items": [ { "fingerprint": "a1b2c3d4e5f6", @@ -323,8 +341,8 @@ The work state is stored in `.agent/work_state.json` (gitignored, refreshed each "lane": "import_cycles", "issue_number": 42, "issue_state": "open", - "delegation_state": "delegated", - "assignee": "copilot", + "delegation_state": "delegation-confirmed", + "assignee": "copilot+gpt-5.3-codex", "pr_number": 101, "pr_url": "https://github.com/org/repo/pull/101", "pr_state": "merged", @@ -332,6 +350,18 @@ The work state is stored in `.agent/work_state.json` (gitignored, refreshed each "closed_unmerged": false, "blocked": false, "superseded": false, + "delegation_mechanism": "assignment+comment", + "delegation_requested_at": "2026-01-01T00:01:00+00:00", + "delegation_confirmed_at": "2026-01-01T00:01:02+00:00", + "delegation_confirmation_evidence": {"assignment": {"confirmed": true}}, + "delegation_comment_url": "https://github.com/org/repo/issues/42#issuecomment-123", + "delegation_comment_id": 123, + "delegation_assignment_evidence": {"assignees": ["copilot+gpt-5.3-codex"], "confirmed": true}, + "pr_match_method": "fingerprint_marker", + "pr_match_confidence": "exact", + "pr_match_evidence": {"fingerprint": "a1b2c3d4e5f6"}, + "lifecycle_fact_state": "merged", + "lifecycle_inferred_state": "completed", "created_at": "2026-01-01T00:00:00+00:00", "updated_at": "2026-01-02T00:00:00+00:00", "run_id": "12345678-1", @@ -342,6 +372,8 @@ The work state is stored in `.agent/work_state.json` (gitignored, refreshed each } ``` +`delegation_events[]` is an append-only audit log of each delegation attempt with mechanism and outcome evidence. + ### Field reference | Field | Type | Description | @@ -351,7 +383,7 @@ The work state is stored in `.agent/work_state.json` (gitignored, refreshed each | `lane` | string | Charter lane name | | `issue_number` | int\|null | GitHub Issue number | | `issue_state` | string | `open` or `closed` | -| `delegation_state` | string | `pending`, `delegated`, `done`, `blocked`, or `superseded` | +| `delegation_state` | string | `ready-for-delegation`, `delegation-requested`, `delegation-confirmed`, `delegation-unconfirmed`, `delegation-failed` | | `assignee` | string\|null | GitHub username delegated to | | `pr_number` | int\|null | Linked PR number | | `pr_url` | string\|null | Linked PR URL | @@ -360,6 +392,18 @@ The work state is stored in `.agent/work_state.json` (gitignored, refreshed each | `closed_unmerged` | bool | Whether the linked PR was closed without merging | | `blocked` | bool | Whether the item is manually blocked | | `superseded` | bool | Whether the item has been superseded | +| `delegation_mechanism` | string\|null | Delegation mechanism used (`assignment+comment`) | +| `delegation_requested_at` | ISO-8601\|null | Delegation request timestamp | +| `delegation_confirmed_at` | ISO-8601\|null | Delegation confirmation timestamp | +| `delegation_confirmation_evidence` | object\|null | Confirmed mechanism evidence map | +| `delegation_comment_url` | string\|null | URL of delegation comment (if created) | +| `delegation_comment_id` | int\|null | GitHub comment id for delegation comment | +| `delegation_assignment_evidence` | object\|null | Assignment API evidence payload | +| `pr_match_method` | string\|null | PR linkage method used | +| `pr_match_confidence` | string\|null | `exact`, `strong`, or `weak` | +| `pr_match_evidence` | object\|null | Evidence payload proving PR linkage | +| `lifecycle_fact_state` | string | Factual lifecycle state label | +| `lifecycle_inferred_state` | string\|null | Optional planning interpretation (separate from facts) | | `created_at` | ISO-8601 | Creation timestamp | | `updated_at` | ISO-8601 | Last update timestamp | | `run_id` | string | Workflow run provenance | @@ -370,27 +414,26 @@ The work state is stored in `.agent/work_state.json` (gitignored, refreshed each ## Label Lifecycle -Labels transition deterministically through the following states: +Labels represent factual observed states; planning interpretations are separate: ``` [created] - └─ arch-gap + copilot-task + needs-implementation - └─ [optional] ready-for-delegation (added by planner on first synthesis) + └─ arch-gap + copilot-task + needs-implementation + ready-for-delegation [selected for execution] - └─ in-progress (removes ready-for-delegation) + └─ delegation-requested + in-progress (removes ready-for-delegation) [PR opened] - └─ pr-open (removes in-progress) + └─ pr-open or pr-draft (removes execution labels) [PR merged] └─ merged (removes pr-open) [PR closed unmerged] - └─ superseded (removes pr-open) + └─ closed-unmerged (factual terminal state) [stale: delegated > STALE_TIMEOUT_DAYS with no PR] - └─ blocked (flags for operator review) + └─ stale (does not imply dependency block) ``` ### All labels used by repo-architect @@ -401,11 +444,17 @@ Labels transition deterministically through the following states: | `copilot-task` | Base | Ready for Copilot to implement | | `needs-implementation` | Base | Awaiting a code PR | | `ready-for-delegation` | Lifecycle | Ready for execution selection | +| `delegation-requested` | Lifecycle | Delegation has been requested | | `in-progress` | Lifecycle | Currently delegated to Copilot | | `pr-open` | Lifecycle | PR exists and is open | +| `pr-draft` | Lifecycle | PR exists and is draft | | `merged` | Lifecycle | PR has merged | -| `blocked` | Lifecycle | Stale or manually blocked | -| `superseded` | Lifecycle | PR closed unmerged or issue replaced | +| `closed-unmerged` | Lifecycle | PR closed without merge | +| `stale` | Lifecycle | No linked PR within stale timeout | +| `blocked-by-dependency` | Lifecycle | Explicit dependency block only | +| `superseded-by-issue` | Lifecycle | Explicit superseding issue only | +| `superseded-by-pr` | Lifecycle | Explicit superseding PR only | +| `failed-delegation` | Lifecycle | Delegation attempt failed with no confirmation | | `priority:critical` | Priority | Critical priority (applied automatically) | | `priority:high` | Priority | High priority (applied automatically) | | Subsystem labels | Subsystem | `runtime`, `core`, `agents`, etc. | @@ -417,7 +466,7 @@ Labels transition deterministically through the following states: In dry-run mode (`--dry-run` flag, or `ENABLE_LIVE_DELEGATION=false` for execution mode), the system operates without GitHub API side-effects: - **Issue mode dry-run**: writes issue bodies to `docs/repo_architect/issues/.md` instead of calling the Issues API. -- **Execution mode dry-run** (default): reports which issue would be delegated but does not assign labels, assignees, or post comments. Work state is still updated with `delegation_state: pending`. +- **Execution mode dry-run** (default): reports which issue would be delegated but does not assign labels, assignees, or post comments. Work state records `delegation_state: delegation-requested` with `dry_run: true` event evidence. - **Reconcile mode dry-run**: reads PR state but does not update lifecycle labels on issues. --- @@ -459,7 +508,7 @@ These groups ensure that: ### Automated (via execution lane) -1. Execution lane selects the issue and assigns it to `@copilot`. +1. Execution lane selects the issue and assigns it to `@copilot+gpt-5.3-codex`. 2. GitHub Copilot coding agent is triggered automatically on assignment. 3. Copilot reads the issue body and opens a PR. 4. Reconciliation lane detects the PR and updates work state + lifecycle labels. @@ -654,7 +703,7 @@ Each gap has a deterministic 12-hex-character fingerprint derived from `subsyste On each run: - If a matching **open** issue exists → add a re-scan comment (no new issue). - If no matching issue exists → create a new one. -- If the fingerprint is already tracked as `delegated` in work state → the planner skips it (no duplicate issue). +- If the fingerprint is already tracked as `delegation-requested`/`delegation-confirmed` in work state → the planner skips it (no duplicate issue). --- @@ -697,4 +746,3 @@ The implementation charter (§15) requires machine-readable policy files that en | [`dependency_contract.json`](dependency_contract.json) | §6 | Layer order, allowed dependency direction, hard prohibitions, circular import policy, ownership hints | Agents should consume these files before proposing code changes. The constants `CHARTER_COMPANION_FILES`, `CHARTER_PRIORITY_ORDER`, and `AGENT_INSTRUCTION_CONTRACT` in `repo_architect.py` encode the same data as Python tuples for runtime use. - diff --git a/repo_architect.py b/repo_architect.py index b67fc6b..99fffba 100644 --- a/repo_architect.py +++ b/repo_architect.py @@ -83,7 +83,11 @@ # Standard labels for the issue-first governance system ARCH_GAP_LABELS: Tuple[str, ...] = ( "arch-gap", "copilot-task", "needs-implementation", - "ready-for-validation", "blocked", "superseded", + "ready-for-validation", + "ready-for-delegation", "delegation-requested", "in-progress", + "pr-open", "pr-draft", "merged", "closed-unmerged", "stale", + "blocked-by-dependency", "superseded-by-issue", "superseded-by-pr", + "failed-delegation", ) SUBSYSTEM_LABELS: Tuple[str, ...] = ( "workflow", "runtime", "reporting", "docs", @@ -164,19 +168,37 @@ # New operating modes for the execution and reconciliation lanes EXECUTION_MODE = "execution" RECONCILE_MODE = "reconcile" -# Lifecycle labels — document and drive issue → PR state transitions +# Factual lifecycle labels — represent observed facts, not planning interpretations. LIFECYCLE_LABELS: Tuple[str, ...] = ( "ready-for-delegation", + "delegation-requested", "in-progress", "pr-open", + "pr-draft", "merged", - "blocked", - "superseded", + "closed-unmerged", + "stale", + "blocked-by-dependency", + "superseded-by-issue", + "superseded-by-pr", + "failed-delegation", +) +# Backward-compatible legacy lifecycle labels still recognised for filtering. +LEGACY_LIFECYCLE_LABELS: Tuple[str, ...] = ("blocked", "superseded") +# Ranking used when reconciling multiple candidate PR matches. +MATCH_CONFIDENCE_RANK: Dict[str, int] = {"exact": 3, "strong": 2, "weak": 1} +# Priority order for PR linkage evidence. +PR_MATCH_METHOD_PRIORITY: Tuple[str, ...] = ( + "fingerprint_marker", + "linkage_block", + "branch_convention", + "closing_reference", + "issue_reference", ) # Labels required for an issue to be eligible for execution selection EXECUTION_ELIGIBLE_LABELS: Tuple[str, ...] = ("arch-gap", "copilot-task", "needs-implementation") # GitHub Copilot coding agent assignee username -COPILOT_AGENT_ASSIGNEE = "copilot" +COPILOT_AGENT_ASSIGNEE = "copilot+gpt-5.3-codex" # Canonical architectural objectives aligned with charter §14 priority order OBJECTIVE_LABELS: Dict[str, str] = { "restore-parse-correctness": "Restore or preserve parse correctness (Lane 2)", @@ -307,7 +329,7 @@ class WorkItem: lane: str # charter lane name (e.g. "import_cycles") issue_number: Optional[int] issue_state: str # "open" | "closed" - delegation_state: str # "pending" | "delegated" | "done" | "blocked" | "superseded" + delegation_state: str # "ready-for-delegation" | "delegation-requested" | "delegation-confirmed" | "delegation-failed" | "delegation-unconfirmed" assignee: Optional[str] # GitHub username delegated to (e.g. "copilot") pr_number: Optional[int] pr_url: Optional[str] @@ -321,6 +343,18 @@ class WorkItem: run_id: str # workflow run provenance gap_title: str gap_subsystem: str + delegation_mechanism: Optional[str] = None + delegation_requested_at: Optional[str] = None + delegation_confirmed_at: Optional[str] = None + delegation_confirmation_evidence: Optional[Dict[str, Any]] = None + delegation_comment_url: Optional[str] = None + delegation_comment_id: Optional[int] = None + delegation_assignment_evidence: Optional[Dict[str, Any]] = None + pr_match_method: Optional[str] = None + pr_match_confidence: Optional[str] = None + pr_match_evidence: Optional[Dict[str, Any]] = None + lifecycle_fact_state: str = "ready-for-delegation" + lifecycle_inferred_state: Optional[str] = None def log(message: str, *, data: Optional[Dict[str, Any]] = None, json_mode: bool = False) -> None: @@ -909,7 +943,7 @@ def synthesize_issue( """ fp = issue_fingerprint(gap.subsystem, gap.issue_key) body = render_issue_body(gap, config, run_id) - labels: List[str] = ["arch-gap", "copilot-task", "needs-implementation"] + labels: List[str] = ["arch-gap", "copilot-task", "needs-implementation", "ready-for-delegation"] if gap.subsystem in SUBSYSTEM_LABELS: labels.append(gap.subsystem) if gap.priority in ("critical", "high"): @@ -2109,7 +2143,7 @@ def load_work_state(config: Config) -> Dict[str, Any]: """ return read_json( _work_state_path(config), - {"version": VERSION, "updated_at": None, "items": []}, + {"version": VERSION, "updated_at": None, "items": [], "delegation_events": []}, ) @@ -2130,6 +2164,19 @@ def upsert_work_item(work_state: Dict[str, Any], item: WorkItem) -> None: items.append(item_dict) +def _iso_now() -> str: + return dt.datetime.now(dt.timezone.utc).isoformat() + + +def append_delegation_event(work_state: Dict[str, Any], event: Dict[str, Any]) -> None: + """Append one delegation event to work state for auditability.""" + events: List[Dict[str, Any]] = work_state.setdefault("delegation_events", []) + events.append(event) + # keep bounded history + if len(events) > 500: + work_state["delegation_events"] = events[-500:] + + def persist_analysis(config: Config, analysis: Dict[str, Any]) -> None: atomic_write_json(config.analysis_path, analysis) atomic_write_json(config.graph_path, analysis.get("local_import_graph", {})) @@ -2199,10 +2246,10 @@ def select_ready_issue( items: List[Dict[str, Any]] = work_state.get("items", []) - # Count currently in-flight items (delegated, not yet done) + # Count currently in-flight items (delegation requested/confirmed, not yet done) in_flight = [ it for it in items - if it.get("delegation_state") == "delegated" + if it.get("delegation_state") in ("delegation-requested", "delegation-confirmed") and not it.get("merged") and not it.get("closed_unmerged") ] @@ -2235,7 +2282,11 @@ def select_ready_issue( ) filtered: List[Tuple[Dict[str, Any], Optional[str], Optional[str]]] = [] - blocking_lifecycle = {"blocked", "superseded", "in-progress", "pr-open", "merged"} + blocking_lifecycle = { + "blocked-by-dependency", "superseded-by-issue", "superseded-by-pr", + "delegation-requested", "in-progress", "pr-open", "pr-draft", + "merged", "closed-unmerged", "failed-delegation", + } | set(LEGACY_LIFECYCLE_LABELS) for issue in candidate_issues: issue_labels: Set[str] = { @@ -2317,121 +2368,193 @@ def delegate_to_copilot( lbl["name"] for lbl in issue.get("labels", []) if isinstance(lbl, dict) } subsystem = next((s for s in SUBSYSTEM_LABELS if s in issue_labels), "runtime") - now = dt.datetime.now(dt.timezone.utc).isoformat() + now = _iso_now() + delegation_mechanism = "assignment+comment" + + linkage_block = ( + "" + ) + + assignment_evidence: Optional[Dict[str, Any]] = None + comment_evidence: Optional[Dict[str, Any]] = None + errors: List[str] = [] + delegation_state = "delegation-requested" + lifecycle_fact_state = "delegation-requested" + delegation_confirmed_at: Optional[str] = None + delegation_confirmation_evidence: Dict[str, Any] = {} result: Dict[str, Any] = { - "action": "dry_run" if dry_run else "delegated", + "action": "dry_run" if dry_run else "delegation_requested", "issue_number": issue_number, "issue_url": issue_url, "issue_title": issue_title, "fingerprint": fp, - "assignee": COPILOT_AGENT_ASSIGNEE if not dry_run else None, - "labels_added": ["in-progress"], + "assignee": None, + "labels_added": ["delegation-requested", "in-progress"], "labels_removed": ["ready-for-delegation"], "dry_run": dry_run, + "delegation_mechanism": delegation_mechanism, + "delegation_assignment_evidence": None, + "delegation_comment_evidence": None, + "delegation_confirmation_evidence": None, } if dry_run: log( - f"[dry-run] Would delegate issue #{issue_number} " + f"[dry-run] Would request delegation for issue #{issue_number} " f"to @{COPILOT_AGENT_ASSIGNEE}: {issue_title}", json_mode=config.log_json, ) else: if not config.github_token or not config.github_repo: - result["action"] = "error" + result["action"] = "delegation_failed" result["error"] = "Missing GITHUB_TOKEN or GITHUB_REPO for live delegation." + errors.append(result["error"]) + delegation_state = "delegation-failed" + lifecycle_fact_state = "failed-delegation" else: - errors: List[str] = [] # 1. Update lifecycle labels - new_labels = (issue_labels - {"ready-for-delegation"}) | {"in-progress"} + new_labels = (issue_labels - {"ready-for-delegation"}) | {"delegation-requested", "in-progress"} try: ensure_github_labels(config, sorted(new_labels)) set_github_issue_labels(config, issue_number, sorted(new_labels)) except RepoArchitectError as exc: errors.append(f"label update: {exc}") - # 2. Assign to Copilot + + # 2. Assign to Copilot agent try: - github_request( + assign_resp = github_request( config.github_token, f"/repos/{config.github_repo}/issues/{issue_number}/assignees", method="POST", payload={"assignees": [COPILOT_AGENT_ASSIGNEE]}, ) + assignees = assign_resp.get("assignees", []) if isinstance(assign_resp, dict) else [] + assignment_confirmed = any( + isinstance(a, dict) and a.get("login") == COPILOT_AGENT_ASSIGNEE + for a in assignees + ) + assignment_evidence = { + "confirmed": assignment_confirmed, + "assignees": [a.get("login") for a in assignees if isinstance(a, dict)], + } + if assignment_confirmed: + delegation_confirmation_evidence["assignment"] = assignment_evidence except RepoArchitectError as exc: errors.append(f"assignment: {exc}") - # 3. Post delegation comment + + # 3. Post delegation comment with machine linkage material comment = ( - f"**repo-architect delegation** (run `{run_id}`): " - f"this issue has been selected for execution and assigned to " - f"`@{COPILOT_AGENT_ASSIGNEE}`.\n\n" - f"**Active objective**: `{config.active_objective or 'general'}`\n" - f"**Lane**: `{lane}`\n" - f"**Fingerprint**: `{fp}`\n\n" - f"When a PR is opened, repo-architect reconciliation will ingest its state " - f"and update this issue's lifecycle labels." + f"**repo-architect delegation request** (run `{run_id}`)\n\n" + f"- active objective: `{config.active_objective or 'general'}`\n" + f"- lane: `{lane}`\n" + f"- issue fingerprint: `{fp}`\n" + f"- target assignee: `@{COPILOT_AGENT_ASSIGNEE}`\n\n" + f"{linkage_block}\n\n" + "When opening a PR, include this exact linkage block (or the fingerprint marker) " + "in the PR body so reconciliation can match with exact confidence." ) try: - update_github_issue_api(config, issue_number, comment) + comment_resp = update_github_issue_api(config, issue_number, comment) + comment_evidence = { + "id": comment_resp.get("id") if isinstance(comment_resp, dict) else None, + "url": comment_resp.get("html_url") if isinstance(comment_resp, dict) else None, + "confirmed": isinstance(comment_resp, dict) and bool(comment_resp.get("id")), + } + if comment_evidence.get("confirmed"): + delegation_confirmation_evidence["comment"] = comment_evidence except RepoArchitectError as exc: errors.append(f"comment: {exc}") + + confirmation_count = len(delegation_confirmation_evidence) + if errors and confirmation_count == 0: + delegation_state = "delegation-failed" + lifecycle_fact_state = "failed-delegation" + result["action"] = "delegation_failed" + elif confirmation_count > 0: + delegation_state = "delegation-confirmed" + lifecycle_fact_state = "in-progress" + delegation_confirmed_at = now + result["action"] = "delegation_confirmed" + result["assignee"] = COPILOT_AGENT_ASSIGNEE + else: + delegation_state = "delegation-unconfirmed" + lifecycle_fact_state = "delegation-requested" + result["action"] = "delegation_unconfirmed" if errors: - result["action"] = "partial" result["errors"] = errors - result["assignee"] = COPILOT_AGENT_ASSIGNEE + result["delegation_assignment_evidence"] = assignment_evidence + result["delegation_comment_evidence"] = comment_evidence + result["delegation_confirmation_evidence"] = delegation_confirmation_evidence or None + + if dry_run: + delegation_state = "delegation-requested" + lifecycle_fact_state = "delegation-requested" + + # Record a top-level delegation event for auditability. + append_delegation_event( + work_state, + { + "ts": now, + "run_id": run_id, + "issue_number": issue_number, + "issue_url": issue_url, + "fingerprint": fp, + "mechanism": delegation_mechanism, + "outcome": delegation_state, + "dry_run": dry_run, + "assignment_evidence": assignment_evidence, + "comment_evidence": comment_evidence, + "errors": errors or None, + }, + ) - # Record in work state - delegation_state = "pending" if dry_run else "delegated" + # Record/update work item existing_item_dict: Optional[Dict[str, Any]] = None for it in work_state.get("items", []): if it.get("fingerprint") == fp: existing_item_dict = it break - if existing_item_dict: - work_item = WorkItem( - fingerprint=fp, - objective=existing_item_dict.get("objective") or config.active_objective or "", - lane=existing_item_dict.get("lane") or lane, - issue_number=issue_number, - issue_state=existing_item_dict.get("issue_state") or "open", - delegation_state=delegation_state, - assignee=COPILOT_AGENT_ASSIGNEE if not dry_run else existing_item_dict.get("assignee"), - pr_number=existing_item_dict.get("pr_number"), - pr_url=existing_item_dict.get("pr_url"), - pr_state=existing_item_dict.get("pr_state"), - merged=bool(existing_item_dict.get("merged")), - closed_unmerged=bool(existing_item_dict.get("closed_unmerged")), - blocked=bool(existing_item_dict.get("blocked")), - superseded=bool(existing_item_dict.get("superseded")), - created_at=existing_item_dict.get("created_at") or now, - updated_at=now, - run_id=run_id, - gap_title=issue_title, - gap_subsystem=subsystem, - ) - else: - work_item = WorkItem( - fingerprint=fp, - objective=config.active_objective or "", - lane=lane, - issue_number=issue_number, - issue_state="open", - delegation_state=delegation_state, - assignee=COPILOT_AGENT_ASSIGNEE if not dry_run else None, - pr_number=None, - pr_url=None, - pr_state=None, - merged=False, - closed_unmerged=False, - blocked=False, - superseded=False, - created_at=now, - updated_at=now, - run_id=run_id, - gap_title=issue_title, - gap_subsystem=subsystem, - ) + existing_requested_at = existing_item_dict.get("delegation_requested_at") if existing_item_dict else None + work_item = WorkItem( + fingerprint=fp, + objective=(existing_item_dict.get("objective") if existing_item_dict else None) or config.active_objective or "", + lane=(existing_item_dict.get("lane") if existing_item_dict else None) or lane, + issue_number=issue_number, + issue_state=(existing_item_dict.get("issue_state") if existing_item_dict else None) or "open", + delegation_state=delegation_state, + assignee=COPILOT_AGENT_ASSIGNEE if not dry_run else (existing_item_dict.get("assignee") if existing_item_dict else None), + pr_number=existing_item_dict.get("pr_number") if existing_item_dict else None, + pr_url=existing_item_dict.get("pr_url") if existing_item_dict else None, + pr_state=existing_item_dict.get("pr_state") if existing_item_dict else None, + merged=bool(existing_item_dict.get("merged")) if existing_item_dict else False, + closed_unmerged=bool(existing_item_dict.get("closed_unmerged")) if existing_item_dict else False, + blocked=bool(existing_item_dict.get("blocked")) if existing_item_dict else False, + superseded=bool(existing_item_dict.get("superseded")) if existing_item_dict else False, + created_at=(existing_item_dict.get("created_at") if existing_item_dict else None) or now, + updated_at=now, + run_id=run_id, + gap_title=issue_title, + gap_subsystem=subsystem, + delegation_mechanism=delegation_mechanism, + delegation_requested_at=existing_requested_at or now, + delegation_confirmed_at=delegation_confirmed_at, + delegation_confirmation_evidence=delegation_confirmation_evidence or None, + delegation_comment_url=(comment_evidence or {}).get("url"), + delegation_comment_id=(comment_evidence or {}).get("id"), + delegation_assignment_evidence=assignment_evidence, + pr_match_method=existing_item_dict.get("pr_match_method") if existing_item_dict else None, + pr_match_confidence=existing_item_dict.get("pr_match_confidence") if existing_item_dict else None, + pr_match_evidence=existing_item_dict.get("pr_match_evidence") if existing_item_dict else None, + lifecycle_fact_state=lifecycle_fact_state, + lifecycle_inferred_state=("execution-in-progress" if delegation_state == "delegation-confirmed" else None), + ) upsert_work_item(work_state, work_item) return result @@ -2465,8 +2588,30 @@ def _classify_pr(pr: Dict[str, Any]) -> str: return "open" +def _extract_fingerprint_from_pr(pr: Dict[str, Any]) -> Optional[str]: + """Extract arch-gap fingerprint marker from PR body.""" + body = pr.get("body") or "" + return _extract_fingerprint_from_body(body) + + +def _extract_linkage_block(pr: Dict[str, Any]) -> Dict[str, str]: + """Parse repo-architect linkage block from PR body, if present.""" + body = pr.get("body") or "" + m = re.search(r"", body, re.DOTALL | re.IGNORECASE) + if not m: + return {} + block = m.group(1) + pairs: Dict[str, str] = {} + for line in block.splitlines(): + if ":" not in line: + continue + k, v = line.split(":", 1) + pairs[k.strip().lower()] = v.strip() + return pairs + + def _pr_mentions_issue(pr: Dict[str, Any], issue_number: int) -> bool: - """Return True if the PR body or title references the given issue number.""" + """Return True if the PR body/title references the given issue number.""" body = (pr.get("body") or "").lower() title = (pr.get("title") or "").lower() needle = f"#{issue_number}" @@ -2479,6 +2624,126 @@ def _pr_mentions_issue(pr: Dict[str, Any], issue_number: int) -> bool: return False +def _pr_branch_matches_issue(pr: Dict[str, Any], issue_number: int, fingerprint: str) -> bool: + """Return True if PR head branch suggests linkage to issue/fingerprint.""" + head = pr.get("head", {}) if isinstance(pr.get("head"), dict) else {} + ref = (head.get("ref") or "").lower() + return ( + f"issue-{issue_number}" in ref + or f"/{issue_number}" in ref + or fingerprint in ref + ) + + +def _evaluate_pr_linkage(pr: Dict[str, Any], issue_number: int, fingerprint: str) -> Optional[Dict[str, Any]]: + """Evaluate PR linkage evidence in priority order. + + Returns an evidence dict with fields: + - method: one of PR_MATCH_METHOD_PRIORITY + - confidence: exact|strong|weak + - evidence: machine-readable details + """ + # 1) explicit fingerprint marker in PR body + pr_fp = _extract_fingerprint_from_pr(pr) + if pr_fp and pr_fp == fingerprint: + return { + "method": "fingerprint_marker", + "confidence": "exact", + "evidence": {"fingerprint": pr_fp}, + } + + # 2) explicit linkage block metadata in PR body + linkage = _extract_linkage_block(pr) + if linkage: + linkage_issue = linkage.get("issue_number") + linkage_fp = linkage.get("fingerprint") + if (linkage_issue and linkage_issue == str(issue_number)) or (linkage_fp and linkage_fp == fingerprint): + return { + "method": "linkage_block", + "confidence": "exact", + "evidence": linkage, + } + + # 3) branch naming convention linked to issue/fingerprint + if _pr_branch_matches_issue(pr, issue_number, fingerprint): + head = pr.get("head", {}) if isinstance(pr.get("head"), dict) else {} + return { + "method": "branch_convention", + "confidence": "strong", + "evidence": {"branch": head.get("ref")}, + } + + # 4) closing keywords / linked issue references + body = (pr.get("body") or "").lower() + closing_patterns = ( + f"closes #{issue_number}", f"fixes #{issue_number}", f"resolves #{issue_number}", + f"closes {issue_number}", f"fixes {issue_number}", f"resolves {issue_number}", + ) + for pat in closing_patterns: + if pat in body: + return { + "method": "closing_reference", + "confidence": "strong", + "evidence": {"pattern": pat}, + } + + # 5) fallback text mention of #issue_number in title/body + if _pr_mentions_issue(pr, issue_number): + return { + "method": "issue_reference", + "confidence": "weak", + "evidence": {"issue_number": issue_number}, + } + return None + + +def _best_pr_match(prs: List[Dict[str, Any]], issue_number: int, fingerprint: str) -> Optional[Dict[str, Any]]: + """Select strongest PR match and keep ambiguity evidence.""" + candidates: List[Dict[str, Any]] = [] + state_rank = {"merged": 4, "open": 3, "draft": 2, "closed_unmerged": 1} + method_rank = {m: i for i, m in enumerate(PR_MATCH_METHOD_PRIORITY[::-1], start=1)} + for pr in prs: + linkage = _evaluate_pr_linkage(pr, issue_number, fingerprint) + if not linkage: + continue + pr_state = _classify_pr(pr) + confidence = linkage["confidence"] + candidates.append({ + "pr": pr, + "method": linkage["method"], + "confidence": confidence, + "evidence": linkage["evidence"], + "confidence_rank": MATCH_CONFIDENCE_RANK.get(confidence, 0), + "method_rank": method_rank.get(linkage["method"], 0), + "state_rank": state_rank.get(pr_state, 0), + "pr_state": pr_state, + "updated_at": pr.get("updated_at") or "", + }) + if not candidates: + return None + + candidates.sort( + key=lambda c: ( + c["confidence_rank"], + c["method_rank"], + c["state_rank"], + c["updated_at"], + ), + reverse=True, + ) + best = candidates[0] + best["ambiguous_matches"] = [ + { + "pr_number": c["pr"].get("number"), + "method": c["method"], + "confidence": c["confidence"], + } + for c in candidates[1:] + if c["confidence_rank"] == best["confidence_rank"] and c["method_rank"] == best["method_rank"] + ] + return best + + def _update_issue_lifecycle_labels_for_pr( config: Config, issue_number: int, pr_class: str ) -> None: @@ -2493,15 +2758,17 @@ def _update_issue_lifecycle_labels_for_pr( except RepoArchitectError: return current = {lbl["name"] for lbl in issue_data.get("labels", []) if isinstance(lbl, dict)} - new_labels = current - set(LIFECYCLE_LABELS) + new_labels = current - set(LIFECYCLE_LABELS) - set(LEGACY_LIFECYCLE_LABELS) if pr_class == "merged": new_labels.add("merged") elif pr_class == "closed_unmerged": - new_labels.add("superseded") - elif pr_class in ("open", "draft"): + new_labels.add("closed-unmerged") + elif pr_class == "draft": + new_labels.add("pr-draft") + elif pr_class == "open": new_labels.add("pr-open") elif pr_class == "stale": - new_labels.add("blocked") + new_labels.add("stale") try: ensure_github_labels(config, sorted(new_labels)) set_github_issue_labels(config, issue_number, sorted(new_labels)) @@ -2552,20 +2819,29 @@ def reconcile_pr_state( if item.get("merged") or item.get("closed_unmerged"): continue - matching = [pr for pr in recent_prs if _pr_mentions_issue(pr, int(issue_number))] - if not matching: + fingerprint = item.get("fingerprint") or "" + best_match = _best_pr_match(recent_prs, int(issue_number), fingerprint) + if not best_match: # Check for stale (delegated with no PR for too long) updated_str = item.get("updated_at") - if item.get("delegation_state") == "delegated" and updated_str: + if item.get("delegation_state") in ("delegation-requested", "delegation-confirmed", "delegation-unconfirmed") and updated_str: try: updated_at = dt.datetime.fromisoformat(updated_str.replace("Z", "+00:00")) if updated_at < stale_cutoff and item.get("pr_state") != "stale": new_item = dict(item) new_item["pr_state"] = "stale" + new_item["lifecycle_fact_state"] = "stale" + new_item["lifecycle_inferred_state"] = "needs-attention" new_item["updated_at"] = now items[i] = new_item updated += 1 - details.append({"issue": issue_number, "pr_state": "stale"}) + details.append({ + "issue": issue_number, + "pr_state": "stale", + "pr_match_method": None, + "pr_match_confidence": None, + "pr_match_evidence": {"reason": "no_pr_match_within_window"}, + }) if not config.dry_run: _update_issue_lifecycle_labels_for_pr( config, int(issue_number), "stale" @@ -2574,32 +2850,46 @@ def reconcile_pr_state( pass continue - prs_found += len(matching) - merged_pr = next((pr for pr in matching if pr.get("merged_at")), None) - open_pr = next( - (pr for pr in matching if pr.get("state") == "open" and not pr.get("merged_at")), - None, - ) - closed_pr = next( - (pr for pr in matching if pr.get("state") == "closed" and not pr.get("merged_at")), - None, - ) - best_pr = merged_pr or open_pr or closed_pr or matching[0] - pr_class = _classify_pr(best_pr) + best_pr = best_match["pr"] + pr_class = best_match["pr_state"] + prs_found += 1 new_item = dict(item) + existing_conf = MATCH_CONFIDENCE_RANK.get(str(item.get("pr_match_confidence") or "").lower(), 0) + new_conf = MATCH_CONFIDENCE_RANK.get(best_match["confidence"], 0) + # Never overwrite a stronger existing match with weaker evidence. + if existing_conf > new_conf and item.get("pr_number"): + continue new_item["pr_number"] = best_pr.get("number") new_item["pr_url"] = best_pr.get("html_url") new_item["pr_state"] = pr_class + new_item["pr_match_method"] = best_match["method"] + new_item["pr_match_confidence"] = best_match["confidence"] + new_item["pr_match_evidence"] = { + "selected_pr_number": best_pr.get("number"), + "method": best_match["method"], + "confidence": best_match["confidence"], + "evidence": best_match["evidence"], + "ambiguous_matches": best_match.get("ambiguous_matches", []), + } new_item["updated_at"] = now if pr_class == "merged": new_item["merged"] = True - new_item["delegation_state"] = "done" + new_item["delegation_state"] = "delegation-confirmed" + new_item["lifecycle_fact_state"] = "merged" + new_item["lifecycle_inferred_state"] = "completed" elif pr_class == "closed_unmerged": new_item["closed_unmerged"] = True - new_item["delegation_state"] = "done" - elif pr_class in ("open", "draft"): - new_item["delegation_state"] = "delegated" + new_item["lifecycle_fact_state"] = "closed-unmerged" + new_item["lifecycle_inferred_state"] = "needs-replanning" + elif pr_class == "draft": + new_item["delegation_state"] = "delegation-confirmed" + new_item["lifecycle_fact_state"] = "pr-draft" + new_item["lifecycle_inferred_state"] = "in-progress" + elif pr_class == "open": + new_item["delegation_state"] = "delegation-confirmed" + new_item["lifecycle_fact_state"] = "pr-open" + new_item["lifecycle_inferred_state"] = "in-progress" if new_item != item: items[i] = new_item @@ -2610,6 +2900,10 @@ def reconcile_pr_state( "pr_state": pr_class, "old_delegation": item.get("delegation_state"), "new_delegation": new_item.get("delegation_state"), + "pr_match_method": best_match["method"], + "pr_match_confidence": best_match["confidence"], + "pr_match_evidence": best_match["evidence"], + "ambiguous_matches": best_match.get("ambiguous_matches", []), }) if not config.dry_run: _update_issue_lifecycle_labels_for_pr(config, int(issue_number), pr_class) @@ -2660,6 +2954,7 @@ def ingest_issue_actions_to_work_state( new_it["issue_state"] = "open" new_it["updated_at"] = now new_it["run_id"] = run_id + new_it.setdefault("lifecycle_fact_state", "ready-for-delegation") upsert_work_item(work_state, WorkItem(**new_it)) else: work_item = WorkItem( @@ -2668,7 +2963,7 @@ def ingest_issue_actions_to_work_state( lane="unknown", issue_number=issue_number, issue_state="open", - delegation_state="pending", + delegation_state="ready-for-delegation", assignee=None, pr_number=None, pr_url=None, @@ -2682,6 +2977,7 @@ def ingest_issue_actions_to_work_state( run_id=run_id, gap_title=gap_title, gap_subsystem=gap_subsystem, + lifecycle_fact_state="ready-for-delegation", ) upsert_work_item(work_state, work_item) @@ -2695,7 +2991,9 @@ def _active_fingerprints_in_work_state(work_state: Dict[str, Any]) -> Set[str]: it["fingerprint"] for it in work_state.get("items", []) if it.get("fingerprint") - and it.get("delegation_state") in ("pending", "delegated") + and it.get("delegation_state") in ( + "delegation-requested", "delegation-confirmed", "delegation-unconfirmed" + ) and not it.get("merged") and not it.get("closed_unmerged") } @@ -2740,12 +3038,16 @@ def run_execution_cycle(config: Config) -> Dict[str, Any]: delegation_result = delegate_to_copilot(config, selected, work_state, run_id) save_work_state(config, work_state) - summary_line = ( - f"[dry-run] " if not config.enable_live_delegation else "" - ) + ( - f"delegated issue #{selected.get('number')} " - f"— {selected.get('title', '')}" - ) + if not config.enable_live_delegation: + summary_line = ( + f"[dry-run] delegation requested for issue #{selected.get('number')} " + f"— {selected.get('title', '')}" + ) + else: + summary_line = ( + f"{delegation_result.get('action', 'delegation_requested')} issue " + f"#{selected.get('number')} — {selected.get('title', '')}" + ) log(summary_line, json_mode=config.log_json) result: Dict[str, Any] = { diff --git a/tests/test_repo_architect.py b/tests/test_repo_architect.py index ca5ac1a..9cba314 100644 --- a/tests/test_repo_architect.py +++ b/tests/test_repo_architect.py @@ -2486,11 +2486,11 @@ def _make_work_item(self, fp: str = "aabbccddeeff", **kwargs: Any) -> ra.WorkIte defaults: Dict[str, Any] = dict( fingerprint=fp, objective="eliminate-import-cycles", lane="import_cycles", issue_number=42, issue_state="open", - delegation_state="pending", assignee=None, + delegation_state="ready-for-delegation", assignee=None, pr_number=None, pr_url=None, pr_state=None, merged=False, closed_unmerged=False, blocked=False, superseded=False, created_at=now, updated_at=now, run_id="run123", - gap_title="Test gap", gap_subsystem="runtime", + gap_title="Test gap", gap_subsystem="runtime", lifecycle_fact_state="ready-for-delegation", ) defaults.update(kwargs) return ra.WorkItem(**defaults) @@ -2514,10 +2514,10 @@ def test_upsert_updates_existing_item(self) -> None: ws: Dict[str, Any] = {"version": ra.VERSION, "updated_at": None, "items": []} item = self._make_work_item() ra.upsert_work_item(ws, item) - updated = self._make_work_item(delegation_state="delegated") + updated = self._make_work_item(delegation_state="delegation-confirmed") ra.upsert_work_item(ws, updated) self.assertEqual(len(ws["items"]), 1) - self.assertEqual(ws["items"][0]["delegation_state"], "delegated") + self.assertEqual(ws["items"][0]["delegation_state"], "delegation-confirmed") def test_save_and_reload_work_state(self) -> None: with tempfile.TemporaryDirectory() as tmp: @@ -2603,7 +2603,7 @@ def test_max_concurrent_blocks_selection(self) -> None: ws: Dict[str, Any] = { "items": [{ "fingerprint": "aabb001122cc", - "delegation_state": "delegated", + "delegation_state": "delegation-confirmed", "merged": False, "closed_unmerged": False, "lane": "import_cycles", "issue_number": 5, }] @@ -2618,7 +2618,7 @@ def test_blocked_fingerprint_excluded(self) -> None: ws: Dict[str, Any] = { "items": [{ "fingerprint": fp, - "delegation_state": "delegated", + "delegation_state": "delegation-confirmed", "merged": False, "closed_unmerged": False, "lane": "import_cycles", "issue_number": 1, }] @@ -2628,7 +2628,7 @@ def test_blocked_fingerprint_excluded(self) -> None: github_token="tok", github_repo="x/y", max_concurrent_delegated=5 ) # The issue's fingerprint is in-flight, so it should be blocked - in_flight = [it for it in ws["items"] if it.get("delegation_state") == "delegated" + in_flight = [it for it in ws["items"] if it.get("delegation_state") == "delegation-confirmed" and not it.get("merged") and not it.get("closed_unmerged")] blocked_fps = {it["fingerprint"] for it in in_flight} body = issue.get("body") or "" @@ -2640,7 +2640,7 @@ def test_lifecycle_labels_exclude_issue(self) -> None: config = self._make_config_exec( github_token="tok", github_repo="x/y" ) - blocking = {"blocked", "superseded", "in-progress", "pr-open", "merged"} + blocking = {"blocked-by-dependency", "superseded-by-issue", "superseded-by-pr", "in-progress", "pr-open", "pr-draft", "merged"} issue_labels = {"arch-gap", "copilot-task", "needs-implementation", "in-progress"} self.assertTrue(issue_labels & blocking, "in-progress should block selection") @@ -2667,13 +2667,13 @@ def test_extract_lane_from_body(self) -> None: def test_active_fingerprints_in_work_state(self) -> None: ws: Dict[str, Any] = { "items": [ - {"fingerprint": "aaa", "delegation_state": "delegated", + {"fingerprint": "aaa", "delegation_state": "delegation-confirmed", "merged": False, "closed_unmerged": False}, - {"fingerprint": "bbb", "delegation_state": "pending", + {"fingerprint": "bbb", "delegation_state": "delegation-requested", "merged": False, "closed_unmerged": False}, {"fingerprint": "ccc", "delegation_state": "done", "merged": True, "closed_unmerged": False}, - {"fingerprint": "ddd", "delegation_state": "delegated", + {"fingerprint": "ddd", "delegation_state": "delegation-confirmed", "merged": True, "closed_unmerged": False}, ] } @@ -2705,7 +2705,7 @@ def _make_issue(self, number: int = 10) -> Dict[str, Any]: } def test_dry_run_does_not_mutate_state_action(self) -> None: - """Dry-run delegation records 'pending' delegation_state and 'dry_run' action.""" + """Dry-run delegation records requested state and dry-run action.""" config = _make_config(mode="execution") # enable_live_delegation defaults to False → dry_run=True self.assertFalse(config.enable_live_delegation) @@ -2715,10 +2715,11 @@ def test_dry_run_does_not_mutate_state_action(self) -> None: self.assertEqual(result["action"], "dry_run") self.assertTrue(result["dry_run"]) self.assertIsNone(result["assignee"]) - # Work state should be updated with pending state + # Work state should be updated with delegation-requested state self.assertEqual(len(ws["items"]), 1) - self.assertEqual(ws["items"][0]["delegation_state"], "pending") + self.assertEqual(ws["items"][0]["delegation_state"], "delegation-requested") self.assertEqual(ws["items"][0]["fingerprint"], "aabbccddeeff") + self.assertEqual(ws["items"][0]["lifecycle_fact_state"], "delegation-requested") def test_dry_run_no_credentials_still_records(self) -> None: """Dry-run works even without GitHub credentials.""" @@ -2731,14 +2732,59 @@ def test_dry_run_no_credentials_still_records(self) -> None: self.assertEqual(len(ws["items"]), 1) def test_live_mode_missing_credentials_returns_error(self) -> None: - """Live delegation with no GitHub credentials returns error action.""" + """Live delegation with no GitHub credentials returns failed delegation action.""" config = _make_config(mode="execution") config = dataclasses.replace(config, enable_live_delegation=True) ws: Dict[str, Any] = {"items": []} issue = self._make_issue() result = ra.delegate_to_copilot(config, issue, ws, run_id="run-live") - self.assertEqual(result["action"], "error") + self.assertEqual(result["action"], "delegation_failed") self.assertIn("GITHUB_TOKEN", result.get("error", "")) + self.assertEqual(ws["items"][0]["delegation_state"], "delegation-failed") + + def test_live_mode_assignment_and_comment_confirmed(self) -> None: + """Live delegation should be confirmed when assignment+comment evidence exists.""" + config = dataclasses.replace( + _make_config(mode="execution"), + enable_live_delegation=True, + github_token="tok", + github_repo="owner/repo", + ) + ws: Dict[str, Any] = {"items": []} + issue = self._make_issue() + + def _fake_github_request(token: str, path: str, *, method: str = "GET", payload: Any = None) -> Dict[str, Any]: + if path.endswith("/assignees"): + return {"assignees": [{"login": ra.COPILOT_AGENT_ASSIGNEE}]} + return {} + + with patch.object(ra, "github_request", side_effect=_fake_github_request), \ + patch.object(ra, "set_github_issue_labels", return_value={}), \ + patch.object(ra, "ensure_github_labels", return_value=None), \ + patch.object(ra, "update_github_issue_api", return_value={"id": 123, "html_url": "https://github.com/c"}): + result = ra.delegate_to_copilot(config, issue, ws, run_id="run-live-ok") + self.assertEqual(result["action"], "delegation_confirmed") + self.assertEqual(ws["items"][0]["delegation_state"], "delegation-confirmed") + self.assertIsNotNone(ws["items"][0]["delegation_confirmed_at"]) + self.assertTrue(ws["delegation_events"], "Delegation event should be recorded") + + def test_live_mode_unconfirmed_when_no_confirmation_evidence(self) -> None: + """If API calls return no proof, delegation should be unconfirmed not confirmed.""" + config = dataclasses.replace( + _make_config(mode="execution"), + enable_live_delegation=True, + github_token="tok", + github_repo="owner/repo", + ) + ws: Dict[str, Any] = {"items": []} + issue = self._make_issue() + with patch.object(ra, "github_request", return_value={"assignees": []}), \ + patch.object(ra, "set_github_issue_labels", return_value={}), \ + patch.object(ra, "ensure_github_labels", return_value=None), \ + patch.object(ra, "update_github_issue_api", return_value={}): + result = ra.delegate_to_copilot(config, issue, ws, run_id="run-live-unconfirmed") + self.assertEqual(result["action"], "delegation_unconfirmed") + self.assertEqual(ws["items"][0]["delegation_state"], "delegation-unconfirmed") def test_upsert_updates_existing_work_item(self) -> None: """A second delegation call updates the existing work item rather than inserting.""" @@ -2777,6 +2823,9 @@ def _make_pr(self, number: int, state: str = "open", "draft": draft, "body": body, "merged_at": merged_at, + "head": {"ref": f"feature/issue-{number}"}, + "created_at": dt.datetime.now(dt.timezone.utc).isoformat().replace("+00:00", "Z"), + "updated_at": dt.datetime.now(dt.timezone.utc).isoformat().replace("+00:00", "Z"), } def test_classify_pr_merged(self) -> None: @@ -2804,6 +2853,54 @@ def test_pr_mentions_issue_closes(self) -> None: pr = self._make_pr(11, body="closes 55") self.assertTrue(ra._pr_mentions_issue(pr, 55)) + def test_reconcile_linkage_by_fingerprint_marker(self) -> None: + pr = self._make_pr(12, body="") + match = ra._evaluate_pr_linkage(pr, issue_number=42, fingerprint="aabbccddeeff") + self.assertIsNotNone(match) + self.assertEqual(match["method"], "fingerprint_marker") + self.assertEqual(match["confidence"], "exact") + + def test_reconcile_linkage_by_explicit_linkage_block(self) -> None: + body = ( + "" + ) + pr = self._make_pr(13, body=body) + match = ra._evaluate_pr_linkage(pr, issue_number=42, fingerprint="aabbccddeeff") + self.assertIsNotNone(match) + self.assertEqual(match["method"], "linkage_block") + self.assertEqual(match["confidence"], "exact") + + def test_reconcile_linkage_by_branch_convention(self) -> None: + pr = self._make_pr(14, body="no refs") + pr["head"]["ref"] = "repo-architect/issue-42-aabbccddeeff" + match = ra._evaluate_pr_linkage(pr, issue_number=42, fingerprint="aabbccddeeff") + self.assertIsNotNone(match) + self.assertEqual(match["method"], "branch_convention") + self.assertEqual(match["confidence"], "strong") + + def test_reconcile_fallback_issue_reference(self) -> None: + pr = self._make_pr(15, body="related to #42") + match = ra._evaluate_pr_linkage(pr, issue_number=42, fingerprint="aabbccddeeff") + self.assertIsNotNone(match) + self.assertEqual(match["method"], "issue_reference") + self.assertEqual(match["confidence"], "weak") + + def test_reconcile_ambiguous_multiple_prs_prefers_exact(self) -> None: + exact = self._make_pr(16, body="") + weak = self._make_pr(17, body="mentions #42 only") + best = ra._best_pr_match([weak, exact], issue_number=42, fingerprint="aabbccddeeff") + self.assertIsNotNone(best) + self.assertEqual(best["pr"]["number"], 16) + self.assertEqual(best["confidence"], "exact") + + def test_reconcile_no_match(self) -> None: + pr = self._make_pr(18, body="unrelated body") + match = ra._evaluate_pr_linkage(pr, issue_number=99, fingerprint="ffffffffffff") + self.assertIsNone(match) + def test_reconcile_empty_work_state(self) -> None: config = _make_config(mode="reconcile") ws: Dict[str, Any] = {"items": []} @@ -2812,12 +2909,12 @@ def test_reconcile_empty_work_state(self) -> None: self.assertEqual(result["updated"], 0) def test_reconcile_marks_merged(self) -> None: - """An item linked to a merged PR should get merged=True and delegation_state=done.""" + """An item linked to a merged PR should get merged=True and factual merged lifecycle state.""" config = _make_config(mode="reconcile") ws: Dict[str, Any] = { "items": [{ "fingerprint": "aabb", "issue_number": 42, "lane": "import_cycles", - "delegation_state": "delegated", "merged": False, "closed_unmerged": False, + "delegation_state": "delegation-confirmed", "merged": False, "closed_unmerged": False, "blocked": False, "superseded": False, "pr_number": None, "pr_url": None, "pr_state": None, "updated_at": "2025-01-01T00:00:00+00:00", @@ -2833,7 +2930,8 @@ def test_reconcile_marks_merged(self) -> None: result = ra.reconcile_pr_state(config, ws) self.assertEqual(result["updated"], 1) self.assertTrue(ws["items"][0]["merged"]) - self.assertEqual(ws["items"][0]["delegation_state"], "done") + self.assertEqual(ws["items"][0]["lifecycle_fact_state"], "merged") + self.assertEqual(ws["items"][0]["pr_match_method"], "closing_reference") def test_reconcile_marks_stale(self) -> None: """A delegated item with no PR and old updated_at should be marked stale.""" @@ -2842,7 +2940,7 @@ def test_reconcile_marks_stale(self) -> None: ws: Dict[str, Any] = { "items": [{ "fingerprint": "ccdd", "issue_number": 77, "lane": "parse_errors", - "delegation_state": "delegated", "merged": False, "closed_unmerged": False, + "delegation_state": "delegation-confirmed", "merged": False, "closed_unmerged": False, "blocked": False, "superseded": False, "pr_number": None, "pr_url": None, "pr_state": None, "updated_at": old_date, @@ -2865,7 +2963,7 @@ def test_reconcile_stale_respects_stale_timeout_days(self) -> None: ws: Dict[str, Any] = { "items": [{ "fingerprint": "eeff00", "issue_number": 88, "lane": "hygiene", - "delegation_state": "delegated", "merged": False, "closed_unmerged": False, + "delegation_state": "delegation-confirmed", "merged": False, "closed_unmerged": False, "blocked": False, "superseded": False, "pr_number": None, "pr_url": None, "pr_state": None, "updated_at": recent_date, @@ -2884,7 +2982,7 @@ def test_reconcile_stale_respects_stale_timeout_days(self) -> None: ws2: Dict[str, Any] = { "items": [{ "fingerprint": "eeff00", "issue_number": 88, "lane": "hygiene", - "delegation_state": "delegated", "merged": False, "closed_unmerged": False, + "delegation_state": "delegation-confirmed", "merged": False, "closed_unmerged": False, "blocked": False, "superseded": False, "pr_number": None, "pr_url": None, "pr_state": None, "updated_at": recent_date, @@ -2905,7 +3003,7 @@ def test_reconcile_skips_finished_items(self) -> None: ws: Dict[str, Any] = { "items": [{ "fingerprint": "eeff", "issue_number": 10, - "delegation_state": "done", "merged": True, "closed_unmerged": False, + "delegation_state": "delegation-confirmed", "merged": True, "closed_unmerged": False, "pr_state": "merged", "updated_at": "2025-01-01T00:00:00+00:00", "objective": "", "lane": "hygiene", "assignee": None, "pr_number": 5, "pr_url": None, "blocked": False, "superseded": False, @@ -2917,6 +3015,40 @@ def test_reconcile_skips_finished_items(self) -> None: result = ra.reconcile_pr_state(config, ws) self.assertEqual(result["updated"], 0, "Already-merged items should not be re-updated") + def test_closed_unmerged_not_auto_superseded(self) -> None: + """Closed-unmerged should stay factual, not be auto-labelled as superseded.""" + config = _make_config(mode="reconcile") + ws: Dict[str, Any] = {"items": [{ + "fingerprint": "aa11bb22cc33", "issue_number": 42, "lane": "runtime", + "delegation_state": "delegation-confirmed", "merged": False, "closed_unmerged": False, + "blocked": False, "superseded": False, "pr_number": None, "pr_url": None, "pr_state": None, + "updated_at": "2025-01-01T00:00:00+00:00", "objective": "", "assignee": None, + "created_at": "2025-01-01T00:00:00+00:00", "run_id": "r5", + "gap_title": "x", "gap_subsystem": "runtime", "issue_state": "open", + }]} + closed_pr = self._make_pr(201, state="closed", body="fixes #42", merged_at=None) + with patch.object(ra, "_list_prs_for_repo", return_value=[closed_pr]): + ra.reconcile_pr_state(config, ws) + self.assertEqual(ws["items"][0]["lifecycle_fact_state"], "closed-unmerged") + self.assertNotEqual(ws["items"][0]["lifecycle_fact_state"], "superseded-by-pr") + + def test_stale_not_auto_blocked_dependency(self) -> None: + """Stale state should not be encoded as blocked-by-dependency without explicit evidence.""" + config = _make_config(mode="reconcile") + old = (dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=30)).isoformat() + ws: Dict[str, Any] = {"items": [{ + "fingerprint": "f0f0f0f0f0f0", "issue_number": 9, "lane": "runtime", + "delegation_state": "delegation-confirmed", "merged": False, "closed_unmerged": False, + "blocked": False, "superseded": False, "pr_number": None, "pr_url": None, "pr_state": None, + "updated_at": old, "objective": "", "assignee": None, + "created_at": old, "run_id": "r6", "gap_title": "x", "gap_subsystem": "runtime", + "issue_state": "open", + }]} + with patch.object(ra, "_list_prs_for_repo", return_value=[]): + ra.reconcile_pr_state(config, ws) + self.assertEqual(ws["items"][0]["lifecycle_fact_state"], "stale") + self.assertNotEqual(ws["items"][0]["lifecycle_fact_state"], "blocked-by-dependency") + def test_run_reconciliation_cycle_no_items(self) -> None: """run_reconciliation_cycle with empty work state returns complete status.""" with tempfile.TemporaryDirectory() as tmp: @@ -2932,7 +3064,11 @@ class TestLifecycleLabelTransitions(unittest.TestCase): """Verify lifecycle label sets are correct and transitions are deterministic.""" def test_lifecycle_labels_present(self) -> None: - expected = {"ready-for-delegation", "in-progress", "pr-open", "merged", "blocked", "superseded"} + expected = { + "ready-for-delegation", "delegation-requested", "in-progress", "pr-open", "pr-draft", + "merged", "closed-unmerged", "stale", "blocked-by-dependency", + "superseded-by-issue", "superseded-by-pr", "failed-delegation", + } self.assertEqual(set(ra.LIFECYCLE_LABELS), expected) def test_execution_eligible_labels_present(self) -> None: @@ -2957,7 +3093,7 @@ def test_merged_lifecycle_added_on_merged_pr(self) -> None: self.assertNotIn("pr-open", new_labels) def test_copilot_assignee_constant(self) -> None: - self.assertEqual(ra.COPILOT_AGENT_ASSIGNEE, "copilot") + self.assertEqual(ra.COPILOT_AGENT_ASSIGNEE, "copilot+gpt-5.3-codex") # --------------------------------------------------------------------------- @@ -2998,7 +3134,7 @@ def test_active_fingerprints_blocks_planning(self) -> None: ws: Dict[str, Any] = { "items": [{ "fingerprint": fp, - "delegation_state": "delegated", + "delegation_state": "delegation-confirmed", "merged": False, "closed_unmerged": False, }] } From 71329a519feee0b5b495c65e18accf6e162c677b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 17:42:30 +0000 Subject: [PATCH 4/8] fix(repo-architect): make assignment the sole delegation trigger, post linkage comment before assignment, document workflow approval gate Co-authored-by: Steake <530040+Steake@users.noreply.github.com> --- docs/repo_architect/OPERATOR_GUIDE.md | 50 +++++++++------- repo_architect.py | 82 ++++++++++++++++----------- tests/test_repo_architect.py | 75 ++++++++++++++++++++++-- 3 files changed, 149 insertions(+), 58 deletions(-) diff --git a/docs/repo_architect/OPERATOR_GUIDE.md b/docs/repo_architect/OPERATOR_GUIDE.md index 40d7e1b..8ca9adb 100644 --- a/docs/repo_architect/OPERATOR_GUIDE.md +++ b/docs/repo_architect/OPERATOR_GUIDE.md @@ -248,16 +248,23 @@ python repo_architect.py --mode execution --allow-dirty --lane-filter import_cyc ### How Copilot execution is triggered -In live mode, the execution lane: +GitHub Copilot coding agent is triggered **by issue assignment**. At the moment of assignment, Copilot receives the issue body and all comments that already exist on the issue. Copilot does **not** react to issue comments posted after assignment. + +In live mode, the execution lane performs these steps **in order**: 1. Applies factual lifecycle labels: `delegation-requested` + `in-progress`. -2. Assigns the issue to `@copilot+gpt-5.3-codex`. -3. Posts a delegation comment containing a machine linkage block (`repo-architect-linkage`) and fingerprint marker. -4. Records per-mechanism evidence from GitHub API responses: - - assignment evidence (assignee list confirmation) - - comment evidence (comment id/url confirmation) +2. Posts a **pre-assignment audit comment** containing a machine linkage block (`repo-architect-linkage`) and fingerprint marker. Because this comment is posted before assignment, Copilot will receive it as part of the issue context. +3. Assigns the issue to `@copilot+gpt-5.3-codex` — **this is the sole execution trigger**. + +Delegation is considered **confirmed** when the assignment API response lists the target assignee. The audit comment is recorded for traceability but is **not** part of the confirmation contract. Label changes alone are never treated as proof of execution. + +### Workflow approval gate -Delegation is only considered **confirmed** when at least one reliable mechanism is confirmed by API response. Label changes alone are not treated as proof of execution. +When Copilot opens a PR, the associated GitHub Actions workflows are **not triggered by default** unless: +- A user with **write access** approves the workflow run, or +- The repository settings explicitly allow automatic workflow runs from Copilot / first-time contributors. + +Operators should ensure that the repository's **Actions → General → Fork pull request workflows** setting (or equivalent) is configured to match the desired level of automation. Until a workflow run is approved, CI checks will not appear on the Copilot PR. --- @@ -330,7 +337,7 @@ The work state is stored in `.agent/work_state.json` (gitignored, refreshed each "ts": "2026-01-01T00:01:02+00:00", "issue_number": 42, "fingerprint": "a1b2c3d4e5f6", - "mechanism": "assignment+comment", + "mechanism": "assignment", "outcome": "delegation-confirmed" } ], @@ -350,7 +357,7 @@ The work state is stored in `.agent/work_state.json` (gitignored, refreshed each "closed_unmerged": false, "blocked": false, "superseded": false, - "delegation_mechanism": "assignment+comment", + "delegation_mechanism": "assignment", "delegation_requested_at": "2026-01-01T00:01:00+00:00", "delegation_confirmed_at": "2026-01-01T00:01:02+00:00", "delegation_confirmation_evidence": {"assignment": {"confirmed": true}}, @@ -392,12 +399,12 @@ The work state is stored in `.agent/work_state.json` (gitignored, refreshed each | `closed_unmerged` | bool | Whether the linked PR was closed without merging | | `blocked` | bool | Whether the item is manually blocked | | `superseded` | bool | Whether the item has been superseded | -| `delegation_mechanism` | string\|null | Delegation mechanism used (`assignment+comment`) | +| `delegation_mechanism` | string\|null | Delegation mechanism used (`assignment` — the sole trigger) | | `delegation_requested_at` | ISO-8601\|null | Delegation request timestamp | -| `delegation_confirmed_at` | ISO-8601\|null | Delegation confirmation timestamp | -| `delegation_confirmation_evidence` | object\|null | Confirmed mechanism evidence map | -| `delegation_comment_url` | string\|null | URL of delegation comment (if created) | -| `delegation_comment_id` | int\|null | GitHub comment id for delegation comment | +| `delegation_confirmed_at` | ISO-8601\|null | Delegation confirmation timestamp (from assignment API) | +| `delegation_confirmation_evidence` | object\|null | Assignment confirmation evidence (sole basis for confirmation) | +| `delegation_comment_url` | string\|null | URL of pre-assignment audit comment (not a trigger) | +| `delegation_comment_id` | int\|null | GitHub comment id for pre-assignment audit comment | | `delegation_assignment_evidence` | object\|null | Assignment API evidence payload | | `pr_match_method` | string\|null | PR linkage method used | | `pr_match_confidence` | string\|null | `exact`, `strong`, or `weak` | @@ -466,7 +473,7 @@ Labels represent factual observed states; planning interpretations are separate: In dry-run mode (`--dry-run` flag, or `ENABLE_LIVE_DELEGATION=false` for execution mode), the system operates without GitHub API side-effects: - **Issue mode dry-run**: writes issue bodies to `docs/repo_architect/issues/.md` instead of calling the Issues API. -- **Execution mode dry-run** (default): reports which issue would be delegated but does not assign labels, assignees, or post comments. Work state records `delegation_state: delegation-requested` with `dry_run: true` event evidence. +- **Execution mode dry-run** (default): reports which issue would be delegated but does not assign labels, assignees, or post comments. Work state records `delegation_state: delegation-requested` with `dry_run: true` event evidence. No assignment is made, so no Copilot agent is triggered. - **Reconcile mode dry-run**: reads PR state but does not update lifecycle labels on issues. --- @@ -508,11 +515,14 @@ These groups ensure that: ### Automated (via execution lane) -1. Execution lane selects the issue and assigns it to `@copilot+gpt-5.3-codex`. -2. GitHub Copilot coding agent is triggered automatically on assignment. -3. Copilot reads the issue body and opens a PR. -4. Reconciliation lane detects the PR and updates work state + lifecycle labels. -5. Next planning run sees the in-progress state and skips re-raising the issue. +1. Execution lane selects the issue and posts a pre-assignment audit comment with linkage material. +2. Execution lane assigns the issue to `@copilot+gpt-5.3-codex` — **this is the sole execution trigger**. +3. GitHub Copilot coding agent receives the issue body + all existing comments (including the linkage material) and opens a PR. +4. A user with write access may need to **approve the workflow run** on the Copilot PR before CI checks execute (see [Workflow approval gate](#workflow-approval-gate)). +5. Reconciliation lane detects the PR and updates work state + lifecycle labels. +6. Next planning run sees the in-progress state and skips re-raising the issue. + +> **Note:** `@copilot` comments on issues after assignment are for human-readable audit only. Copilot does not react to post-assignment issue comments. To iterate on an in-progress PR, use PR review comments instead. --- diff --git a/repo_architect.py b/repo_architect.py index 99fffba..812dd83 100644 --- a/repo_architect.py +++ b/repo_architect.py @@ -2351,8 +2351,16 @@ def delegate_to_copilot( Live mode (config.enable_live_delegation is True): - Adds 'in-progress' label, removes 'ready-for-delegation'. - - Assigns the issue to COPILOT_AGENT_ASSIGNEE ("copilot"). - - Posts a delegation comment. + - Posts a pre-assignment audit comment with machine linkage material + (Copilot receives the issue body + all existing comments at the + moment of assignment, so the linkage comment must precede it). + - Assigns the issue to COPILOT_AGENT_ASSIGNEE — this is the sole + execution trigger for the Copilot coding agent. + + Delegation confirmation is based exclusively on the assignment API + response. The audit comment is recorded for traceability but is NOT + part of the confirmation contract — Copilot does not react to + post-assignment issue comments. Always records the delegation event in work_state. """ @@ -2369,7 +2377,8 @@ def delegate_to_copilot( } subsystem = next((s for s in SUBSYSTEM_LABELS if s in issue_labels), "runtime") now = _iso_now() - delegation_mechanism = "assignment+comment" + # Assignment is the sole execution trigger; the comment is audit-only. + delegation_mechanism = "assignment" linkage_block = ( "