Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,58 @@ new WebSocket(`wss://api.example.com/ws?token=${authToken}`);
new WebSocket('wss://api.example.com/ws');
```

### E2E Test Architecture Policy

**CRITICAL: Never use `test.skip()` inside test logic.**

Using `test.skip()` within test execution masks bugs. If UI doesn't show up due to a bug, the test silently skips instead of failing.

**Correct patterns:**
```typescript
// ✅ Skip at describe level for environmental conditions
test.describe('Feature requiring API key', () => {
test.skip(!process.env.ANTHROPIC_API_KEY, 'Requires ANTHROPIC_API_KEY');

test('does something with API', async () => {
// Test runs OR entire suite is skipped - never silently passes
});
});

// ✅ Assert UI elements exist - FAIL if missing
test('should show approve button', async ({ page }) => {
const button = page.getByRole('button', { name: /approve/i });
await expect(button).toBeVisible(); // FAILS if not visible
await button.click();
});

// ✅ Use separate test projects for different states
const PROJECT_ID = TEST_PROJECT_IDS.PLANNING; // Seeded in planning phase
```

**Anti-patterns to avoid:**
```typescript
// ❌ NEVER: Skip inside test logic
test('approves tasks', async ({ page }) => {
const button = page.getByRole('button', { name: /approve/i });
if (!(await button.isVisible())) {
test.skip(true, 'Button not visible'); // MASKS BUGS
return;
}
// ...
});

// ❌ NEVER: Silent pass on missing elements
if (await element.isVisible()) {
// do assertions
}
// Test passes even if element never shows up!
```

**Test data guarantees:**
- `TEST_PROJECT_IDS.PLANNING` is seeded in planning phase with tasks
- `TEST_PROJECT_IDS.ACTIVE` is seeded in active phase with agents
- If test data doesn't match expectations, that's a bug in `seed-test-data.py`

Comment on lines +344 to +395
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Good policy; tighten scope wording + avoid duplicated policy text in-file.

  • Consider explicitly stating the rule also applies to beforeEach/afterEach hooks (same “inside test execution” pitfall).
  • AI summary mentions the section is inserted twice; if so, prefer one authoritative section + cross-links to prevent drift.
🤖 Prompt for AI Agents
In @CLAUDE.md around lines 344 - 395, The "E2E Test Architecture Policy" needs
two edits: add an explicit sentence that the "Never use test.skip() inside test
logic" rule also applies to beforeEach/afterEach hooks (i.e., avoid calling
test.skip or conditional skips inside those hooks), and remove the duplicated
policy block by keeping this single authoritative "E2E Test Architecture Policy"
section (or merge differences) and replace other copies with a cross-reference
note pointing to this section; reference the policy heading "E2E Test
Architecture Policy" and the examples/anti-patterns in the same section when
making the edits.

### E2E Test Limitations (Issue #172)
Current E2E tests have coverage gaps:
- Tests verify DOM exists, not API success
Expand Down
134 changes: 131 additions & 3 deletions codeframe/ui/routers/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
- Task updates
- Task status management
- Task approval (for planning phase automation)
- Multi-agent execution trigger (P0 fix)
"""

import asyncio
import logging
from typing import List, Optional
import os
from datetime import datetime, UTC
from typing import Any, List, Optional

from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
from pydantic import BaseModel, Field

from codeframe.core.models import Task, TaskStatus
Expand All @@ -21,6 +25,7 @@
from codeframe.ui.websocket_broadcasts import broadcast_development_started
from codeframe.auth.dependencies import get_current_user
from codeframe.auth.models import User
from codeframe.agents.lead_agent import LeadAgent

logger = logging.getLogger(__name__)

Expand All @@ -30,6 +35,107 @@
project_router = APIRouter(prefix="/api/projects", tags=["tasks"])


# ============================================================================
# Background Task for Multi-Agent Execution (P0 Fix)
# ============================================================================


async def start_development_execution(
project_id: int,
db: Database,
ws_manager: Any,
api_key: str
) -> None:
"""
Background task to start multi-agent execution after task approval.

This function:
1. Creates a LeadAgent instance for the project
2. Calls start_multi_agent_execution() to create agents and assign tasks
3. Handles errors gracefully with logging and WebSocket notifications

Workflow:
- LeadAgent loads all approved tasks from database
- Builds dependency graph for task ordering
- Creates agents on-demand via AgentPoolManager
- Assigns tasks to agents and executes in parallel
- Broadcasts agent_created and task_assigned events via WebSocket
- Continues until all tasks complete or fail

Args:
project_id: Project ID to start execution for
db: Database instance
ws_manager: WebSocket manager for broadcasts
api_key: Anthropic API key for agent creation
"""
try:
logger.info(f"🚀 Starting multi-agent execution for project {project_id}")

# Create LeadAgent instance with WebSocket manager for event broadcasts
lead_agent = LeadAgent(
project_id=project_id,
db=db,
api_key=api_key,
ws_manager=ws_manager
)

# Start multi-agent execution (creates agents and assigns tasks)
# This is the main coordination loop that:
# 1. Loads all tasks and builds dependency graph
# 2. Creates agents on-demand via agent_pool_manager.get_or_create_agent()
# 3. Assigns tasks to agents and executes in parallel
# 4. Broadcasts agent_created events when agents are created
# 5. Broadcasts task_assigned events when tasks are assigned
# 6. Continues until all tasks complete or fail
summary = await lead_agent.start_multi_agent_execution(
max_retries=3,
max_concurrent=5,
timeout=300
)

logger.info(
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lead_agent.start_multi_agent_execution() may return None. Then summary.get(...) crashes and incorrectly reports a failure. Consider guarding with summary = summary or {} before logging.

Suggested change
logger.info(
summary = summary or {}
logger.info(

🚀 Want me to fix this? Reply ex: "fix it for me".

f"✅ Multi-agent execution completed for project {project_id}: "
f"{summary.get('completed', 0)}/{summary.get('total_tasks', 0)} tasks completed, "
f"{summary.get('failed', 0)} failed, {summary.get('execution_time', 0):.2f}s"
)

except asyncio.TimeoutError:
logger.error(
f"❌ Multi-agent execution timed out for project {project_id} after 300s"
)
# Broadcast timeout error to UI (guarded to prevent masking original error)
try:
await ws_manager.broadcast(
{
"type": "development_failed",
"project_id": project_id,
"error": "Multi-agent execution timed out after 300 seconds",
"timestamp": datetime.now(UTC).isoformat().replace("+00:00", "Z")
},
project_id=project_id
)
except Exception:
logger.exception("Failed to broadcast development_failed (timeout)")
except Exception as e:
logger.error(
f"❌ Failed to start multi-agent execution for project {project_id}: {e}",
exc_info=True
)
# Broadcast error to UI (guarded to prevent masking original error)
try:
await ws_manager.broadcast(
{
"type": "development_failed",
"project_id": project_id,
"error": str(e),
"timestamp": datetime.now(UTC).isoformat().replace("+00:00", "Z")
},
project_id=project_id
)
except Exception:
logger.exception("Failed to broadcast development_failed (error)")


class TaskCreateRequest(BaseModel):
"""Request model for creating a task."""
project_id: int
Expand Down Expand Up @@ -136,18 +242,21 @@ class TaskApprovalResponse(BaseModel):
async def approve_tasks(
project_id: int,
request: TaskApprovalRequest,
background_tasks: BackgroundTasks,
db: Database = Depends(get_db),
current_user: User = Depends(get_current_user),
) -> TaskApprovalResponse:
"""Approve tasks and transition project to development phase.

This endpoint allows users to approve generated tasks after reviewing them.
Approved tasks are updated to 'pending' status and the project phase
transitions to 'active' (development).
transitions to 'active' (development). After approval, multi-agent execution
is triggered in the background.

Args:
project_id: Project ID
request: Approval request with approved flag and optional exclusions
background_tasks: FastAPI background tasks for async execution
db: Database connection
current_user: Authenticated user

Expand Down Expand Up @@ -230,6 +339,25 @@ async def approve_tasks(
excluded_count=len(excluded_tasks),
)

# START MULTI-AGENT EXECUTION IN BACKGROUND
# Schedule background task to create agents and start task execution
# This follows the same pattern as start_project_agent in agents.py
api_key = os.environ.get("ANTHROPIC_API_KEY")
if api_key:
# Schedule background task to start multi-agent execution
background_tasks.add_task(
start_development_execution,
project_id,
db,
manager,
api_key
)
logger.info(f"✅ Scheduled multi-agent execution for project {project_id}")
else:
logger.warning(
f"⚠️ ANTHROPIC_API_KEY not configured - cannot start agents for project {project_id}"
)

logger.info(
f"Tasks approved for project {project_id}: "
f"{len(approved_tasks)} approved, {len(excluded_tasks)} excluded"
Expand Down
29 changes: 27 additions & 2 deletions tests/config/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,18 @@
class TestGlobalConfig:
"""Test GlobalConfig class."""

def test_default_values(self):
def test_default_values(self, monkeypatch):
"""Test that default values are set correctly."""
config = GlobalConfig()
# Clear environment variables that would override defaults
monkeypatch.delenv("LOG_LEVEL", raising=False)
monkeypatch.delenv("DEBUG", raising=False)
monkeypatch.delenv("HOT_RELOAD", raising=False)
monkeypatch.delenv("DATABASE_PATH", raising=False)
monkeypatch.delenv("API_HOST", raising=False)
monkeypatch.delenv("API_PORT", raising=False)
monkeypatch.delenv("DEFAULT_PROVIDER", raising=False)

config = GlobalConfig(_env_file=None)
assert config.database_path == ".codeframe/state.db"
assert config.api_host == "0.0.0.0"
assert config.api_port == 8080
Expand Down Expand Up @@ -138,7 +147,23 @@ def test_validate_for_sprint_missing_key(self, tmp_path, monkeypatch):
# Ensure no API key in environment
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)

# Prevent load_environment from reading .env files
monkeypatch.setattr("codeframe.core.config.load_environment", lambda *args, **kwargs: None)

config = Config(tmp_path)
# Override _global_config to ensure fresh GlobalConfig without env file
config._global_config = None

# Patch get_global to not read from .env
from codeframe.core.config import GlobalConfig

def patched_get_global():
if not config._global_config:
config._global_config = GlobalConfig(_env_file=None)
return config._global_config

monkeypatch.setattr(config, "get_global", patched_get_global)

with pytest.raises(ValueError, match="ANTHROPIC_API_KEY is required"):
config.validate_for_sprint(sprint=1)

Expand Down
Loading
Loading