add risk-based-testing

kanfil · kanfil · commit dad002fcc593 · 2025-10-07T15:47:33.000+03:00
diff --git a/.specify/templates/spec-template.md b/.specify/templates/spec-template.md
@@ -82,6 +82,18 @@ When creating this spec from a user prompt:
 - **[Entity 1]**: [What it represents, key attributes without implementation]
 - **[Entity 2]**: [What it represents, relationships to other entities]
 
+## Risk Register *(mandatory)*
+Document the critical risks the team must mitigate. Capture each risk on its own line using the canonical format so downstream automation can parse it:
+
+- `- RISK: R1 | Statement: Unauthorized access to admin endpoints | Impact: High | Likelihood: Medium | Test: Assert RBAC denies non-admin roles`
+
+Guidance:
+- **RISK**: Stable identifier (`R1`, `SEC-02`, etc.)
+- **Statement**: Concise summary of the exposure in business terms
+- **Impact/Likelihood**: Qualitative level (`Low`, `Medium`, `High`)
+- **Test**: The validation focus needed to prove the mitigation works (e.g., "Fuzz payload to detect overflow")
+- Add additional pipe-delimited attributes when needed (`Owner`, `Notes`, etc.); keep the key names capitalized for readability.
+
 ---
 
 ## Review & Acceptance Checklist
diff --git a/scripts/bash/check-prerequisites.sh b/scripts/bash/check-prerequisites.sh
@@ -78,6 +78,57 @@ done
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "$SCRIPT_DIR/common.sh"
 
+# Extract risk entries from a markdown file's Risk Register section
+extract_risks() {
+    local file="$1"
+    if [[ ! -f "$file" ]]; then
+        echo "[]"
+        return
+    fi
+
+    python3 - "$file" <<'PY'
+import json
+import re
+import sys
+from pathlib import Path
+
+path = Path(sys.argv[1])
+pattern = re.compile(r"^-\s*RISK:\s*(.+)$", re.IGNORECASE)
+risks = []
+
+for line in path.read_text().splitlines():
+    match = pattern.match(line.strip())
+    if not match:
+        continue
+
+    parts = [p.strip() for p in match.group(1).split("|") if p.strip()]
+    data = {}
+
+    if parts and ":" not in parts[0]:
+        data["id"] = parts[0]
+        parts = parts[1:]
+
+    for part in parts:
+        if ":" not in part:
+            continue
+        key, value = part.split(":", 1)
+        key = key.strip()
+        value = value.strip()
+        normalized = key.lower().replace(" ", "_")
+        if normalized == "risk":
+            data["id"] = value
+        else:
+            data[normalized] = value
+
+    if data:
+        if "id" not in data:
+            data["id"] = f"missing-id-{len(risks)+1}"
+        risks.append(data)
+
+print(json.dumps(risks, ensure_ascii=False))
+PY
+}
+
 # Get feature paths and validate branch
 eval $(get_feature_paths)
 check_feature_branch "$CURRENT_BRANCH" "$HAS_GIT" || exit 1
@@ -160,7 +211,9 @@ if $JSON_MODE; then
         json_docs="[${json_docs%,}]"
     fi
     
-    printf '{"FEATURE_DIR":"%s","AVAILABLE_DOCS":%s}\n' "$FEATURE_DIR" "$json_docs"
+    SPEC_RISKS=$(extract_risks "$FEATURE_SPEC")
+    PLAN_RISKS=$(extract_risks "$IMPL_PLAN")
+    printf '{"FEATURE_DIR":"%s","AVAILABLE_DOCS":%s,"SPEC_RISKS":%s,"PLAN_RISKS":%s}\n' "$FEATURE_DIR" "$json_docs" "$SPEC_RISKS" "$PLAN_RISKS"
 else
     # Text output
     echo "FEATURE_DIR:$FEATURE_DIR"
@@ -175,4 +228,26 @@ else
     if $INCLUDE_TASKS; then
         check_file "$TASKS" "tasks.md"
     fi
+
+    spec_risks_count=$(extract_risks "$FEATURE_SPEC" | python3 - <<'PY'
+import json, sys
+try:
+    data = json.load(sys.stdin)
+except json.JSONDecodeError:
+    data = []
+print(len(data))
+PY
+    )
+    plan_risks_count=$(extract_risks "$IMPL_PLAN" | python3 - <<'PY'
+import json, sys
+try:
+    data = json.load(sys.stdin)
+except json.JSONDecodeError:
+    data = []
+print(len(data))
+PY
+    )
+
+    echo "SPEC_RISKS: $spec_risks_count"
+    echo "PLAN_RISKS: $plan_risks_count"
 fi
diff --git a/scripts/powershell/check-prerequisites.ps1 b/scripts/powershell/check-prerequisites.ps1
@@ -56,6 +56,64 @@ EXAMPLES:
 # Source common functions
 . "$PSScriptRoot/common.ps1"
 
+function Get-RisksFromFile {
+    param([string]$Path)
+
+    if (-not (Test-Path $Path -PathType Leaf)) {
+        return @()
+    }
+
+    $risks = @()
+    $pattern = '^-\s*RISK:\s*(.+)$'
+    $index = 1
+
+    foreach ($line in Get-Content -Path $Path) {
+        $trim = $line.Trim()
+        if ($trim -notmatch $pattern) { continue }
+
+        $content = $Matches[1]
+        $parts = @()
+        foreach ($piece in ($content -split '\|')) {
+            $trimmed = $piece.Trim()
+            if ($trimmed) { $parts += $trimmed }
+        }
+
+        $data = @{}
+
+        if ($parts.Count -gt 0 -and $parts[0] -notmatch ':') {
+            $data['id'] = $parts[0]
+            if ($parts.Count -gt 1) {
+                $parts = $parts[1..($parts.Count - 1)]
+            } else {
+                $parts = @()
+            }
+        }
+
+        foreach ($part in $parts) {
+            if ($part -match '^\s*([^:]+):\s*(.+)$') {
+                $key = $Matches[1].Trim()
+                $value = $Matches[2].Trim()
+                $normalized = $key.ToLower().Replace(' ', '_')
+
+                if ($normalized -eq 'risk') {
+                    $data['id'] = $value
+                } else {
+                    $data[$normalized] = $value
+                }
+            }
+        }
+
+        if (-not $data.ContainsKey('id')) {
+            $data['id'] = "missing-id-$index"
+        }
+        $index += 1
+
+        $risks += [PSCustomObject]$data
+    }
+
+    return $risks
+}
+
 # Get feature paths and validate branch
 $paths = Get-FeaturePathsEnv
 
@@ -125,11 +183,16 @@ if ($IncludeTasks -and (Test-Path $paths.TASKS)) {
 }
 
 # Output results
+    $specRisks = Get-RisksFromFile -Path $paths.FEATURE_SPEC
+    $planRisks = Get-RisksFromFile -Path $paths.IMPL_PLAN
+
 if ($Json) {
     # JSON output
     [PSCustomObject]@{ 
         FEATURE_DIR = $paths.FEATURE_DIR
         AVAILABLE_DOCS = $docs 
+        SPEC_RISKS = $specRisks
+        PLAN_RISKS = $planRisks
     } | ConvertTo-Json -Compress
 } else {
     # Text output
@@ -145,4 +208,7 @@ if ($Json) {
     if ($IncludeTasks) {
         Test-FileExists -Path $paths.TASKS -Description 'tasks.md' | Out-Null
     }
+
+    Write-Output "SPEC_RISKS: $($specRisks.Count)"
+    Write-Output "PLAN_RISKS: $($planRisks.Count)"
 }
diff --git a/templates/commands/implement.md b/templates/commands/implement.md
@@ -11,7 +11,7 @@ User input:
 
 $ARGUMENTS
 
-1. Run `{SCRIPT}` from repo root and parse FEATURE_DIR and AVAILABLE_DOCS list. All paths must be absolute.
+1. Run `{SCRIPT}` from repo root and parse FEATURE_DIR, AVAILABLE_DOCS, SPEC_RISKS, and PLAN_RISKS. All paths must be absolute.
 
 2. Verify feature context:
    - Load `context.md` from FEATURE_DIR.
@@ -25,6 +25,7 @@ $ARGUMENTS
    - **IF EXISTS**: Read research.md for technical decisions and constraints
    - **IF EXISTS**: Read quickstart.md for integration scenarios
    - Extract the Execution Mode for each task (`[SYNC]` or `[ASYNC]`) and ensure every task is explicitly tagged. STOP and raise an error if any tasks lack a mode.
+   - Consolidate the Risk Register by merging SPEC_RISKS and PLAN_RISKS. Ensure every Risk ID listed in tasks.md has a corresponding definition (statement, mitigation, test strategy, evidence artefact). STOP if any gaps remain.
 
 4. Parse tasks.md structure and extract:
    - **Task phases**: Setup, Tests, Core, Integration, Polish
@@ -48,6 +49,7 @@ $ARGUMENTS
    - **Core development**: Implement models, services, CLI commands, endpoints
    - **Integration work**: Database connections, middleware, logging, external services
    - **Polish and validation**: Unit tests, performance optimization, documentation
+   - **Risk validation**: For each `[SYNC]` risk test task (`TRxx`), execute the specified tests, capture pass/fail output, and record the evidence in the artefact defined by plan.md (e.g., `risk-tests/R1.log` or the Risk Evidence Log table).
 
 7. Progress tracking and error handling:
    - Report progress after each completed task
@@ -56,6 +58,7 @@ $ARGUMENTS
    - Provide clear error messages with context for debugging
    - Suggest next steps if implementation cannot proceed
     - **IMPORTANT** For completed tasks, mark the task off as `[X]` in tasks.md while preserving `[SYNC]/[ASYNC]` tags and [P] markers.
+    - Update the **Risk Evidence Log** in tasks.md with the test task ID, evidence artefact, and a concise summary (include command executed and assertion outcome). Missing evidence is a blocker for completion.
 
 8. Completion validation:
    - Verify all required tasks are completed
diff --git a/templates/commands/tasks.md b/templates/commands/tasks.md
@@ -11,7 +11,7 @@ User input:
 
 $ARGUMENTS
 
-1. Run `{SCRIPT}` from repo root and parse FEATURE_DIR and AVAILABLE_DOCS list. All paths must be absolute.
+1. Run `{SCRIPT}` from repo root and parse FEATURE_DIR, AVAILABLE_DOCS, SPEC_RISKS, and PLAN_RISKS. All paths must be absolute.
 2. Load and validate `context.md` from the feature directory:
    - STOP if the file is missing or contains `[NEEDS INPUT]` markers.
    - Capture mission highlights, relevant code paths, directives, and gateway status for downstream reasoning.
@@ -28,7 +28,12 @@ $ARGUMENTS
    - Simple libraries might not need data-model.md
    - Generate tasks based on what's available
 
-4. Generate tasks following the template:
+4. Build a consolidated Risk Register:
+   - Merge SPEC_RISKS and PLAN_RISKS by `id` (Risk ID). If the same risk appears in both, prefer the richer description from plan.md for mitigation details.
+   - STOP and prompt the developer if the spec declares risks but the plan lacks matching mitigation/test strategy rows.
+   - For each consolidated risk, capture: `id`, `statement`, `impact`, `likelihood`, `test` (focus), and `evidence` (artefact path). Missing fields must be escalated to the developer.
+
+5. Generate tasks following the template:
    - Use `/templates/tasks-template.md` as the base
    - Replace example tasks with actual tasks based on:
      * **Setup tasks**: Project init, dependencies, linting
@@ -37,34 +42,37 @@ $ARGUMENTS
      * **Integration tasks**: DB connections, middleware, logging
      * **Polish tasks [P]**: Unit tests, performance, docs
     - For every task, append the Execution Mode tag `[SYNC]` or `[ASYNC]` as dictated by the plan. Never invent a mode—ask the developer when absent.
+     - Introduce dedicated `[SYNC]` risk test tasks (prefix `TR`) for each Risk ID, referencing the exact file path to implement and the evidence artefact where `/implement` must store test output.
 
-5. Task generation rules:
+6. Task generation rules:
    - Each contract file → contract test task marked [P]
    - Each entity in data-model → model creation task marked [P]
    - Each endpoint → implementation task (not parallel if shared files)
    - Each user story → integration test marked [P]
    - Different files = can be parallel [P]
    - Same file = sequential (no [P])
     - Preserve the Execution Mode from the plan so downstream tooling can enforce SYNC vs ASYNC workflows.
+    - Every Risk ID MUST have at least one `[SYNC]` test task with a clearly defined evidence artefact. If multiple mitigations exist, generate a task per mitigation/test.
 
-6. Order tasks by dependencies:
+7. Order tasks by dependencies:
    - Setup before everything
    - Tests before implementation (TDD)
    - Models before services
    - Services before endpoints
    - Core before integration
    - Everything before polish
 
-7. Include parallel execution examples:
+8. Include parallel execution examples:
    - Group [P] tasks that can run together
    - Show actual Task agent commands
 
-8. Create FEATURE_DIR/tasks.md with:
+9. Create FEATURE_DIR/tasks.md with:
    - Correct feature name from implementation plan
    - Numbered tasks (T001, T002, etc.)
    - Clear file paths for each task
    - Dependency notes
    - Parallel execution guidance
+   - Updated **Risk Evidence Log** table populated with Risk IDs and placeholder evidence entries (`TBD`) for `/implement` to update.
 
 Context for task generation: {ARGS}
 
diff --git a/templates/plan-template.md b/templates/plan-template.md
@@ -75,6 +75,17 @@ specs/[###-feature]/
 └── tasks.md             # Phase 2 output (/tasks command - NOT created by /plan)
 ```
 
+## Risk Mitigation & Test Strategy
+*Map every risk from the specification's Risk Register to a concrete mitigation and validation artefact.*
+
+| Risk ID | Statement | Mitigation Owner | Test Strategy | Evidence Artefact |
+|---------|-----------|------------------|---------------|--------------------|
+| R1 | Unauthorized access to admin endpoints | Security | RBAC denies non-admin roles via integration test | risk-tests/R1.log |
+
+- Keep `Risk ID` identical to the spec entry (e.g., `R1`).
+- `Test Strategy` describes the exact validation (unit, integration, fuzz, chaos, etc.)
+- `Evidence Artefact` states where `/implement` must record the passing test output (e.g., `specs/001-feature/risk-test-evidence.md`).
+
 ### Source Code (repository root)
 <!--
   ACTION REQUIRED: Replace the placeholder tree below with the concrete layout
@@ -186,6 +197,7 @@ directories captured above]
 - Each user story → integration test task
 - Implementation tasks to make tests pass
 - Carry forward the Execution Mode from the Triage Overview: annotate each generated task with `[SYNC]` or `[ASYNC]` based on the finalized plan decision.
+- For every risk entry, generate at least one dedicated `[SYNC]` test task referencing the Risk ID and the evidence artefact.
 
 **Ordering Strategy**:
 - TDD order: Tests before implementation 
diff --git a/templates/tasks-template.md b/templates/tasks-template.md
@@ -55,6 +55,10 @@
 - [ ] T006 [SYNC] [P] Integration test user registration in tests/integration/test_registration.py
 - [ ] T007 [SYNC] [P] Integration test auth flow in tests/integration/test_auth.py
 
+## Phase 3.2b: Risk-Based Tests (Spec Risk Register)
+- [ ] TR01 [SYNC] Risk R1 — RBAC denies non-admin roles (tests/integration/test_admin_access.py) → Capture evidence in risk-tests/R1.log
+- [ ] TR02 [SYNC] Risk R2 — Prevent data loss during retries (tests/integration/test_retry_durability.py) → Capture evidence in risk-tests/R2.log
+
 ## Phase 3.3: Core Implementation (ONLY after tests are failing)
 - [ ] T008 [ASYNC] [P] User model in src/models/user.py
 - [ ] T009 [ASYNC] [P] UserService CRUD in src/services/user_service.py
@@ -98,6 +102,7 @@ Task: "Integration test auth in tests/integration/test_auth.py"
 - Verify tests fail before implementing
 - Commit after each task
 - Avoid: vague tasks, same file conflicts
+- Risk tasks (`TRxx`) must reference the exact Risk ID and evidence location defined in plan.md.
 
 ## Task Generation Rules
 *Applied during main() execution*
@@ -118,6 +123,11 @@ Task: "Integration test auth in tests/integration/test_auth.py"
    - Setup → Tests → Models → Services → Endpoints → Polish
    - Dependencies block parallel execution
 
+5. **Risk Coverage**:
+   - For each Risk ID in spec/plan, create at least one `[SYNC]` test task prefixed `TR` that references the risk, test path, and evidence artefact.
+   - If multiple mitigations exist, create a task per mitigation/test.
+   - Document required evidence capture in the task description (e.g., `→ Capture evidence in risk-tests/R1.log`).
+
 ## Validation Checklist
 *GATE: Checked by main() before returning*
 
@@ -126,4 +136,10 @@ Task: "Integration test auth in tests/integration/test_auth.py"
 - [ ] All tests come before implementation
 - [ ] Parallel tasks truly independent
 - [ ] Each task specifies exact file path
-- [ ] No task modifies same file as another [P] task
+- [ ] No task modifies same file as another [P] task
+- [ ] Every Risk ID has at least one `[SYNC]` risk test task with evidence path
+
+## Risk Evidence Log (maintained during /implement)
+| Risk ID | Test Task ID | Evidence Artefact | Evidence Summary |
+|---------|--------------|-------------------|------------------|
+| R1 | TR01 | risk-tests/R1.log | TBD – populate with `/implement` test output |
diff --git a/tests/test_check_prerequisites_risks.py b/tests/test_check_prerequisites_risks.py