Merge pull request #82 from dmoliveira/my_opencode-e14-integrations-observability

dmoliveira · web-flow · commit ae31851f2579 · 2026-02-13T21:55:28.000+11:00
Integrate E14-T3 plan execution observability
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -46,6 +46,7 @@ All notable changes to this project are documented in this file.
 - Added `instructions/plan_artifact_contract.md` defining `/start-work` plan metadata/checklist format, validation rules, step transitions, and deviation capture requirements.
 - Added `scripts/start_work_command.py` with `/start-work <plan>` execution, persisted checkpoint status, and deviation reporting (`status`, `deviations`).
 - Added `/start-work`, `/start-work-status`, and `/start-work-deviations` aliases in `opencode.json`.
+- Added `/start-work-bg` and `/start-work-doctor-json` aliases for background-safe queueing and execution health diagnostics.
 
 ### Changes
 - Documented extension evaluation outcomes and when each tool is the better fit.
@@ -88,6 +89,7 @@ All notable changes to this project are documented in this file.
 - Expanded README wizard/browser guidance with provider trade-offs, stable-first defaults, and `/browser` usage examples.
 - Expanded browser verification coverage to assert provider reset readiness and added install smoke checks that run browser status/doctor after switching across providers.
 - Expanded install/selftest coverage for `/start-work` plan validation, execution state persistence, and deviation diagnostics.
+- Expanded `/start-work` integrations with background queue handoff, digest recap payloads, and unified `/doctor` visibility.
 
 ## v0.2.0 - 2026-02-12
 
diff --git a/IMPLEMENTATION_ROADMAP.md b/IMPLEMENTATION_ROADMAP.md
@@ -50,7 +50,7 @@ Use this map to avoid overlapping implementations.
 | E11 | Context-Window Resilience Toolkit | done | High | E4 | bd-2tj, bd-n9y, bd-2t0, bd-18e | Improve long-session stability and recovery |
 | E12 | Provider/Model Fallback Visibility | done | Medium | E5 | bd-1jq, bd-298, bd-194, bd-2gq | Explain why model routing decisions happen |
 | E13 | Browser Automation Profile Switching | done | Medium | E1 | bd-3rs, bd-2qy, bd-f6g, bd-393 | Toggle Playwright/agent-browser with checks |
-| E14 | Plan-to-Execution Bridge Command | in_progress | Medium | E2, E3 | bd-1z6, bd-2te | Execute validated plans with progress tracking |
+| E14 | Plan-to-Execution Bridge Command | in_progress | Medium | E2, E3 | bd-1z6, bd-2te, bd-3sg | Execute validated plans with progress tracking |
 | E15 | Todo Enforcer and Plan Compliance | planned | High | E14 | TBD | Keep execution aligned with approved checklists |
 | E16 | Comment and Output Quality Checker Loop | merged | Medium | E23 | TBD | Merged into E23 (PR Review Copilot) |
 | E17 | Auto-Resume and Recovery Loop | planned | High | E11, E14 | TBD | Resume interrupted work from checkpoints safely |
@@ -578,10 +578,11 @@ Every command-oriented epic must ship all of the following:
   - [x] Subtask 14.2.2: Execute steps sequentially with checkpoint updates
   - [x] Subtask 14.2.3: Capture and report deviations from original plan
   - [x] Notes: Added `scripts/start_work_command.py` with plan parsing + validation, sequential checkpoint transitions, persisted execution status, and deviation reporting; wired aliases and smoke/selftest coverage.
-- [ ] Task 14.3: Integrations and observability
-  - [ ] Subtask 14.3.1: Integrate with background subsystem where safe
-  - [ ] Subtask 14.3.2: Integrate with digest summaries for end-of-run recap
-  - [ ] Subtask 14.3.3: Expose execution status in doctor/debug outputs
+- [x] Task 14.3: Integrations and observability
+  - [x] Subtask 14.3.1: Integrate with background subsystem where safe
+  - [x] Subtask 14.3.2: Integrate with digest summaries for end-of-run recap
+  - [x] Subtask 14.3.3: Expose execution status in doctor/debug outputs
+  - [x] Notes: Added background-safe `/start-work` queueing (`--background` + `/start-work-bg`), digest `plan_execution` recap output, and `/doctor` integration via `/start-work doctor --json`.
 - [ ] Task 14.4: Validation and docs
   - [ ] Subtask 14.4.1: Add tests for plan parsing and execution flow
   - [ ] Subtask 14.4.2: Add recovery tests for interrupted plan runs
diff --git a/Makefile b/Makefile
@@ -48,7 +48,7 @@ install-test: ## Run installer smoke test in temp HOME
 	HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/stack_profile_command.py" apply focus; \
 	HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/stack_profile_command.py" status; \
 	if [ -f "$$TMP_HOME/.config/opencode/my_opencode/scripts/browser_command.py" ]; then HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/browser_command.py" status --json; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/browser_command.py" profile agent-browser; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/browser_command.py" doctor --json; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/browser_command.py" profile playwright; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/browser_command.py" status --json; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/browser_command.py" doctor --json; fi; \
-	if [ -f "$$TMP_HOME/.config/opencode/my_opencode/scripts/start_work_command.py" ]; then PLAN_FILE="$$TMP_HOME/.config/opencode/my_opencode/.install-test-plan.md"; python3 -c "from pathlib import Path; Path('$$PLAN_FILE').write_text('---\nid: install-test-plan\ntitle: Install Test Plan\nowner: install-test\ncreated_at: 2026-02-13T00:00:00Z\nversion: 1\n---\n\n# Plan\n\n- [ ] 1. Validate command availability\n- [ ] 2. Validate status persistence\n', encoding='utf-8')"; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/start_work_command.py" "$$PLAN_FILE" --deviation "install smoke" --json; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/start_work_command.py" status --json; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/start_work_command.py" deviations --json; fi; \
+	if [ -f "$$TMP_HOME/.config/opencode/my_opencode/scripts/start_work_command.py" ]; then PLAN_FILE="$$TMP_HOME/.config/opencode/my_opencode/.install-test-plan.md"; python3 -c "from pathlib import Path; Path('$$PLAN_FILE').write_text('---\nid: install-test-plan\ntitle: Install Test Plan\nowner: install-test\ncreated_at: 2026-02-13T00:00:00Z\nversion: 1\n---\n\n# Plan\n\n- [ ] 1. Validate command availability\n- [ ] 2. Validate status persistence\n', encoding='utf-8')"; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/start_work_command.py" "$$PLAN_FILE" --deviation "install smoke" --json; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/start_work_command.py" "$$PLAN_FILE" --background --json; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/background_task_manager.py" run --max-jobs 1; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/start_work_command.py" status --json; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/start_work_command.py" deviations --json; HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/start_work_command.py" doctor --json; fi; \
 	HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/nvim_integration_command.py" install minimal --link-init; \
 	HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/nvim_integration_command.py" status; \
 	HOME="$$TMP_HOME" python3 "$$TMP_HOME/.config/opencode/my_opencode/scripts/devtools_command.py" status; \
diff --git a/README.md b/README.md
@@ -592,10 +592,18 @@ Epic 14 Task 14.1 defines the baseline plan format and execution-state rules for
 Use:
 ```text
 /start-work path/to/plan.md --json
+/start-work-bg path/to/plan.md
+/bg run --id <job-id>
 /start-work status --json
 /start-work deviations --json
+/start-work-doctor-json
 ```
 
+Integration notes:
+- use `/start-work-bg` when you want queued, reviewable execution via the background subsystem before running `/bg run`
+- `/digest run` now includes a `plan_execution` recap block (status, plan id, step counts, deviation count)
+- `/doctor run` includes `start-work` health diagnostics for execution-state visibility
+
 ## Context resilience policy
 
 Epic 11 Task 11.1 defines the baseline policy schema for context-window resilience:
diff --git a/install.sh b/install.sh
@@ -114,8 +114,11 @@ if [ "$SKIP_SELF_CHECK" = false ]; then
     SELF_CHECK_PLAN="$HOME/.config/opencode/my_opencode/.install-selfcheck-plan.md"
     python3 -c "from pathlib import Path; Path('$SELF_CHECK_PLAN').write_text('---\nid: install-selfcheck-plan\ntitle: Install Selfcheck Plan\nowner: installer\ncreated_at: 2026-02-13T00:00:00Z\nversion: 1\n---\n\n# Plan\n\n- [ ] 1. Confirm command wiring\n- [ ] 2. Confirm checkpoint persistence\n', encoding='utf-8')"
     python3 "$INSTALL_DIR/scripts/start_work_command.py" "$SELF_CHECK_PLAN" --deviation "install self-check" --json
+    python3 "$INSTALL_DIR/scripts/start_work_command.py" "$SELF_CHECK_PLAN" --background --json
+    python3 "$INSTALL_DIR/scripts/background_task_manager.py" run --max-jobs 1
     python3 "$INSTALL_DIR/scripts/start_work_command.py" status --json
     python3 "$INSTALL_DIR/scripts/start_work_command.py" deviations --json
+    python3 "$INSTALL_DIR/scripts/start_work_command.py" doctor --json
   fi
   python3 "$INSTALL_DIR/scripts/nvim_integration_command.py" status
   python3 "$INSTALL_DIR/scripts/devtools_command.py" status
@@ -176,8 +179,10 @@ printf "  /browser status\n"
 printf "  /browser profile agent-browser\n"
 printf "  /browser doctor --json\n"
 printf "  /start-work ~/.config/opencode/my_opencode/plan.md --json\n"
+printf "  /start-work-bg ~/.config/opencode/my_opencode/plan.md\n"
 printf "  /start-work status --json\n"
 printf "  /start-work deviations --json\n"
+printf "  /start-work-doctor-json\n"
 printf "  /nvim status\n"
 printf "  /devtools status\n"
 printf "  /devtools install all\n"
diff --git a/opencode.json b/opencode.json
@@ -282,6 +282,14 @@
       "description": "Show captured plan execution deviations",
       "template": "!`python3 \"$HOME/.config/opencode/my_opencode/scripts/start_work_command.py\" deviations --json`\nShow only the command output."
     },
+    "start-work-bg": {
+      "description": "Queue plan execution through background task manager",
+      "template": "!`python3 \"$HOME/.config/opencode/my_opencode/scripts/start_work_command.py\" $ARGUMENTS --background --json`\nShow only the command output."
+    },
+    "start-work-doctor-json": {
+      "description": "Run plan execution health diagnostics in JSON",
+      "template": "!`python3 \"$HOME/.config/opencode/my_opencode/scripts/start_work_command.py\" doctor --json`\nShow only the command output."
+    },
     "nvim": {
       "description": "Manage Neovim OpenCode integration (status|doctor|snippet|install|uninstall)",
       "template": "!`python3 \"$HOME/.config/opencode/my_opencode/scripts/nvim_integration_command.py\" $ARGUMENTS`\nShow only the command output."
diff --git a/scripts/doctor_command.py b/scripts/doctor_command.py
@@ -180,6 +180,18 @@ def script_path(name: str) -> Path:
             "--json",
         ],
     },
+    {
+        "name": "start-work",
+        "kind": "doctor-json",
+        "optional": True,
+        "required_path": str(script_path("start_work_command.py")),
+        "command": [
+            sys.executable,
+            str(script_path("start_work_command.py")),
+            "doctor",
+            "--json",
+        ],
+    },
 ]
 
 
diff --git a/scripts/selftest.py b/scripts/selftest.py
@@ -1732,6 +1732,80 @@ def run_bg(*args: str) -> subprocess.CompletedProcess[str]:
             "start-work deviations should return captured deviation entries",
         )
 
+        start_work_background = subprocess.run(
+            [
+                sys.executable,
+                str(START_WORK_SCRIPT),
+                str(plan_path),
+                "--background",
+                "--json",
+            ],
+            capture_output=True,
+            text=True,
+            env=refactor_env,
+            check=False,
+            cwd=REPO_ROOT,
+        )
+        expect(
+            start_work_background.returncode == 0,
+            "start-work should enqueue background-safe execution",
+        )
+        start_work_background_report = parse_json_output(start_work_background.stdout)
+        expect(
+            start_work_background_report.get("status") == "queued"
+            and bool(start_work_background_report.get("job_id")),
+            "start-work background mode should return queued job id",
+        )
+        queued_job_id = str(start_work_background_report.get("job_id"))
+
+        bg_run_plan = subprocess.run(
+            [
+                sys.executable,
+                str(BG_MANAGER_SCRIPT),
+                "run",
+                "--id",
+                queued_job_id,
+                "--max-jobs",
+                "1",
+            ],
+            capture_output=True,
+            text=True,
+            env=refactor_env,
+            check=False,
+            cwd=REPO_ROOT,
+        )
+        expect(
+            bg_run_plan.returncode == 0,
+            "bg run should execute queued start-work job successfully",
+        )
+
+        digest_plan_path = home / ".config" / "opencode" / "digests" / "plan-run.json"
+        digest_plan_env = refactor_env.copy()
+        digest_plan_env["MY_OPENCODE_DIGEST_PATH"] = str(digest_plan_path)
+        digest_after_plan = subprocess.run(
+            [sys.executable, str(DIGEST_SCRIPT), "run", "--reason", "manual"],
+            capture_output=True,
+            text=True,
+            env=digest_plan_env,
+            check=False,
+            cwd=REPO_ROOT,
+        )
+        expect(
+            digest_after_plan.returncode == 0,
+            "digest run should succeed after start-work execution",
+        )
+        digest_after_plan_payload = load_json_file(digest_plan_path)
+        plan_digest_block = digest_after_plan_payload.get("plan_execution", {})
+        expect(
+            isinstance(plan_digest_block, dict)
+            and plan_digest_block.get("status") == "completed",
+            "digest should include completed plan execution recap",
+        )
+        expect(
+            plan_digest_block.get("plan_id") == "selftest-plan-001",
+            "digest plan execution recap should include plan id",
+        )
+
         invalid_plan_path = tmp / "invalid_plan_execution_selftest.md"
         invalid_plan_path.write_text(
             """---
@@ -2690,6 +2764,20 @@ def run_bg(*args: str) -> subprocess.CompletedProcess[str]:
             "doctor browser check should pass",
         )
 
+        start_work_checks = [
+            check
+            for check in report.get("checks", [])
+            if check.get("name") == "start-work"
+        ]
+        expect(
+            bool(start_work_checks),
+            "doctor summary should include start-work check",
+        )
+        expect(
+            start_work_checks[0].get("ok") is True,
+            "doctor start-work check should pass",
+        )
+
     print("selftest: PASS")
     return 0
 
diff --git a/scripts/session_digest.py b/scripts/session_digest.py
@@ -67,6 +67,55 @@ def build_digest(reason: str, cwd: Path) -> dict:
         "reason": reason,
         "cwd": str(cwd),
         "git": collect_git_snapshot(cwd),
+        "plan_execution": collect_plan_execution_snapshot(),
+    }
+
+
+def collect_plan_execution_snapshot() -> dict:
+    try:
+        layered, _ = load_layered_config()
+    except Exception:
+        return {"status": "unknown", "available": False}
+
+    section = layered.get("plan_execution")
+    if not isinstance(section, dict) or not section:
+        return {"status": "idle", "available": False}
+
+    raw_steps = section.get("steps")
+    steps = raw_steps if isinstance(raw_steps, list) else []
+    counts = {
+        "total": len(steps),
+        "completed": sum(
+            1
+            for step in steps
+            if isinstance(step, dict) and step.get("state") == "completed"
+        ),
+        "failed": sum(
+            1
+            for step in steps
+            if isinstance(step, dict) and step.get("state") == "failed"
+        ),
+        "in_progress": sum(
+            1
+            for step in steps
+            if isinstance(step, dict) and step.get("state") == "in_progress"
+        ),
+    }
+    raw_plan = section.get("plan")
+    plan: dict = raw_plan if isinstance(raw_plan, dict) else {}
+    raw_metadata = plan.get("metadata")
+    metadata: dict = raw_metadata if isinstance(raw_metadata, dict) else {}
+    raw_deviations = section.get("deviations")
+    deviations: list = raw_deviations if isinstance(raw_deviations, list) else []
+
+    return {
+        "status": str(section.get("status") or "idle"),
+        "available": True,
+        "plan_id": metadata.get("id"),
+        "plan_path": plan.get("path"),
+        "finished_at": section.get("finished_at"),
+        "step_counts": counts,
+        "deviation_count": len(deviations),
     }
 
 
@@ -179,6 +228,15 @@ def print_summary(path: Path, digest: dict) -> None:
     if isinstance(post, dict) and post.get("attempted"):
         status = "timeout" if post.get("timed_out") else f"exit {post.get('exit_code')}"
         print(f"post_session: {status}")
+    plan_exec = (
+        digest.get("plan_execution")
+        if isinstance(digest.get("plan_execution"), dict)
+        else {}
+    )
+    if plan_exec:
+        print(f"plan_execution: {plan_exec.get('status', 'idle')}")
+        if plan_exec.get("plan_id"):
+            print(f"plan_id: {plan_exec.get('plan_id')}")
 
 
 def usage() -> int:
@@ -282,6 +340,10 @@ def collect_doctor(path: Path) -> dict:
         if field not in digest:
             warnings.append(f"missing digest field: {field}")
 
+    plan_exec = digest.get("plan_execution")
+    if plan_exec is not None and not isinstance(plan_exec, dict):
+        warnings.append("plan_execution block is invalid")
+
     git_block = digest.get("git")
     if not isinstance(git_block, dict):
         warnings.append("git block is missing or invalid")
diff --git a/scripts/start_work_command.py b/scripts/start_work_command.py