diff --git a/.claude/agents/code-polisher.md b/.claude/agents/code-polisher.md
index 8c8002a51..132587994 100644
--- a/.claude/agents/code-polisher.md
+++ b/.claude/agents/code-polisher.md
@@ -1,6 +1,6 @@
 ---
 name: code-polisher
-description: Polish and clean up code after implementation work. Use when you need to separate large files, clean up comments, update import references after refactoring, or maintain documentation. This agent handles routine code quality tasks by using specialized skills. For TypeScript type errors, use the dipeo-frontend-dev agent instead. Examples: <example>Context: Large monolithic Python file. user: "This file is 1200 lines, too large. Separate it." assistant: "I'll use the code-polisher agent to break this large file into smaller, well-organized modules." <commentary>Use code-polisher for file separation tasks.</commentary></example> <example>Context: After implementing a new feature with verbose comments. user: "Clean up the comments in the new authentication module" assistant: "I'll use the code-polisher agent to review and clean up unnecessary comments while preserving valuable ones." <commentary>Use code-polisher for comment cleanup tasks.</commentary></example> <example>Context: After moving files to new directories. user: "I moved the handlers to a new directory, update all the imports" assistant: "I'll use the code-polisher agent to update all import references across the codebase." <commentary>Use code-polisher for import refactoring tasks.</commentary></example> <example>Context: After completing a feature implementation. user: "Update the docs to reflect the new API endpoints" assistant: "I'll use the code-polisher agent to update the documentation with the current implementation." <commentary>Use code-polisher for documentation maintenance.</commentary></example>
+description: Polish and clean up code after implementation work. Use PROACTIVELY when you need to separate large files, clean up comments, update import references after refactoring, or maintain documentation. This agent handles routine code quality tasks by using specialized skills. For TypeScript type errors, use the dipeo-frontend-dev agent instead. Examples: <example>Context: Large monolithic Python file. user: "This file is 1200 lines, too large. Separate it." assistant: "I'll use the code-polisher agent to break this large file into smaller, well-organized modules." <commentary>Use code-polisher for file separation tasks.</commentary></example> <example>Context: After implementing a new feature with verbose comments. user: "Clean up the comments in the new authentication module" assistant: "I'll use the code-polisher agent to review and clean up unnecessary comments while preserving valuable ones." <commentary>Use code-polisher for comment cleanup tasks.</commentary></example> <example>Context: After moving files to new directories. user: "I moved the handlers to a new directory, update all the imports" assistant: "I'll use the code-polisher agent to update all import references across the codebase." <commentary>Use code-polisher for import refactoring tasks.</commentary></example> <example>Context: After completing a feature implementation. user: "Update the docs to reflect the new API endpoints" assistant: "I'll use the code-polisher agent to update the documentation with the current implementation." <commentary>Use code-polisher for documentation maintenance.</commentary></example>
 model: haiku
 color: green
 ---
diff --git a/.claude/agents/codebase-auditor.md b/.claude/agents/codebase-auditor.md
index 4f08bb673..59d5904bb 100644
--- a/.claude/agents/codebase-auditor.md
+++ b/.claude/agents/codebase-auditor.md
@@ -1,6 +1,6 @@
 ---
 name: codebase-auditor
-description: Use this agent when you need to analyze and audit specific aspects of a codebase to identify issues, patterns, or areas of concern without requiring a full codebase review. This agent excels at targeted analysis based on specific audit requests and produces comprehensive reports that help stakeholders understand problems quickly.\n\nExamples:\n- <example>\n  Context: The user wants to audit their authentication implementation for security issues.\n  user: "Can you audit our authentication system for potential security vulnerabilities?"\n  assistant: "I'll use the codebase-auditor agent to analyze your authentication implementation and identify any security concerns."\n  <commentary>\n  Since the user is requesting a targeted audit of a specific system, use the codebase-auditor agent to perform the analysis and generate a report.\n  </commentary>\n</example>\n- <example>\n  Context: The user needs to understand performance bottlenecks in their API endpoints.\n  user: "We're experiencing slow API responses. Can you audit our endpoint implementations?"\n  assistant: "Let me launch the codebase-auditor agent to analyze your API endpoints and identify performance issues."\n  <commentary>\n  The user needs a focused audit on performance aspects of their API, so the codebase-auditor agent should be used to investigate and report findings.\n  </commentary>\n</example>\n- <example>\n  Context: The user wants to check if their code follows best practices.\n  user: "Please audit our React components for adherence to best practices and patterns"\n  assistant: "I'll use the codebase-auditor agent to review your React components and assess their compliance with best practices."\n  <commentary>\n  This is a request for auditing code quality and patterns, perfect for the codebase-auditor agent.\n  </commentary>\n</example>
+description: Use this agent PROACTIVELY when you need to analyze and audit specific aspects of a codebase to identify issues, patterns, or areas of concern without requiring a full codebase review. This agent excels at targeted analysis based on specific audit requests and produces comprehensive reports that help stakeholders understand problems quickly.\n\nExamples:\n- <example>\n  Context: The user wants to audit their authentication implementation for security issues.\n  user: "Can you audit our authentication system for potential security vulnerabilities?"\n  assistant: "I'll use the codebase-auditor agent to analyze your authentication implementation and identify any security concerns."\n  <commentary>\n  Since the user is requesting a targeted audit of a specific system, use the codebase-auditor agent to perform the analysis and generate a report.\n  </commentary>\n</example>\n- <example>\n  Context: The user needs to understand performance bottlenecks in their API endpoints.\n  user: "We're experiencing slow API responses. Can you audit our endpoint implementations?"\n  assistant: "Let me launch the codebase-auditor agent to analyze your API endpoints and identify performance issues."\n  <commentary>\n  The user needs a focused audit on performance aspects of their API, so the codebase-auditor agent should be used to investigate and report findings.\n  </commentary>\n</example>\n- <example>\n  Context: The user wants to check if their code follows best practices.\n  user: "Please audit our React components for adherence to best practices and patterns"\n  assistant: "I'll use the codebase-auditor agent to review your React components and assess their compliance with best practices."\n  <commentary>\n  This is a request for auditing code quality and patterns, perfect for the codebase-auditor agent.\n  </commentary>\n</example>
 model: sonnet
 color: orange
 ---
diff --git a/.claude/agents/dipeo-backend.md b/.claude/agents/dipeo-backend.md
index 48a57f836..8ef44bdb5 100644
--- a/.claude/agents/dipeo-backend.md
+++ b/.claude/agents/dipeo-backend.md
@@ -1,6 +1,6 @@
 ---
 name: dipeo-backend
-description: Use this agent when working with DiPeO's backend server, CLI, database, and MCP integration in apps/server/, including:\n- FastAPI server and GraphQL endpoint\n- CLI commands (dipeo run, dipeo results, dipeo metrics, dipeo compile, dipeo export)\n- Database schema and message store\n- MCP server integration\n- Server configuration and lifecycle\n\nFor detailed documentation: use Skill(dipeo-backend) for decision criteria and doc anchors, then Skill(doc-lookup) for specific sections.\n\nExamples:\n- <example>User: "The dipeo run command isn't working"\nAssistant: "I'll use the dipeo-backend agent to debug the CLI command in apps/server/cli/"\n<commentary>CLI commands are owned by dipeo-backend.</commentary></example>\n\n- <example>User: "Add background execution support to the CLI"\nAssistant: "I'll use the dipeo-backend agent to implement --background flag in apps/server/cli/"\n<commentary>CLI feature enhancements are backend work.</commentary></example>\n\n- <example>User: "The MCP server isn't exposing diagrams correctly"\nAssistant: "I'll use the dipeo-backend agent to fix the MCP server in apps/server/api/mcp_sdk_server/"\n<commentary>MCP server integration is backend responsibility.</commentary></example>\n\n- <example>User: "Need to add a new table to the database"\nAssistant: "I'll use the dipeo-backend agent to update the database schema in apps/server/infra/"\n<commentary>Database schema changes are backend work.</commentary></example>\n\n- <example>User: "The FastAPI server won't start"\nAssistant: "I'll use the dipeo-backend agent to diagnose the server startup issue in apps/server/main.py"\n<commentary>Server startup and configuration are backend concerns.</commentary></example>\n\n- <example>Context: User has execution handler issue\nUser: "The person_job handler is failing"\nAssistant: "I'll use the dipeo-package-maintainer agent to debug the handler in /dipeo/application/execution/handlers/"\n<commentary>Execution handlers are owned by dipeo-package-maintainer, not backend.</commentary></example>
+description: Use this agent PROACTIVELY when working with DiPeO's backend ecosystem (server/ and cli/), including:\n- FastAPI server and GraphQL endpoint\n- CLI commands (dipeo run, dipeo results, dipeo metrics, dipeo compile, dipeo export)\n- Database schema and message store\n- MCP server integration\n- Server configuration and lifecycle\n\nFor detailed documentation: use Skill(dipeo-backend) for decision criteria and doc anchors, then Skill(doc-lookup) for specific sections.\n\nExamples:\n- <example>User: "The dipeo run command isn't working"\nAssistant: "I'll use the dipeo-backend agent to debug the CLI command in cli/"\n<commentary>CLI commands are owned by dipeo-backend.</commentary></example>\n\n- <example>User: "Add background execution support to the CLI"\nAssistant: "I'll use the dipeo-backend agent to implement --background flag in cli/"\n<commentary>CLI feature enhancements are backend work.</commentary></example>\n\n- <example>User: "The MCP server isn't exposing diagrams correctly"\nAssistant: "I'll use the dipeo-backend agent to fix the MCP server in server/api/mcp/"\n<commentary>MCP server integration is backend responsibility.</commentary></example>\n\n- <example>User: "Need to add GraphQL subscription support"\nAssistant: "I'll use the dipeo-backend agent to add the subscription in server/api/"\n<commentary>GraphQL API changes are backend work.</commentary></example>\n\n- <example>User: "The FastAPI server won't start"\nAssistant: "I'll use the dipeo-backend agent to diagnose the server startup issue in server/main.py"\n<commentary>Server startup and configuration are backend concerns.</commentary></example>\n\n- <example>Context: User has execution handler issue\nUser: "The person_job handler is failing"\nAssistant: "I'll use the dipeo-package-maintainer agent to debug the handler in /dipeo/application/execution/handlers/"\n<commentary>Execution handlers are owned by dipeo-package-maintainer, not backend.</commentary></example>
 model: sonnet
 color: blue
 ---
@@ -11,21 +11,21 @@ You are an expert backend engineer specializing in DiPeO's server, CLI, and data
 
 ## Scope Overview
 
-**YOU OWN** apps/server/:
-- FastAPI server (main.py, api/)
-- CLI commands (cli/)
-- Database (infra/)
-- MCP server (api/mcp_sdk_server/)
+**YOU OWN** (Backend Ecosystem):
+- **Server** (`server/`): FastAPI server, GraphQL API, MCP integration
+- **CLI** (`cli/`): Command-line tools (dipeo, dipeocc commands)
+- **Database**: Message store at `.dipeo/data/dipeo_state.db`
 
 **YOU DO NOT OWN**:
 - Execution engine, handlers → dipeo-package-maintainer
 - Code generation → dipeo-codegen-pipeline
+- Frontend → dipeo-frontend-dev
 
 ## Quick Reference
-- **Server**: apps/server/main.py, api/graphql_endpoint.py
-- **CLI**: apps/server/cli/ (12 commands: run, results, metrics, compile, export, ask, convert, list, stats, monitor, integrations, dipeocc)
-- **Database**: apps/server/infra/message_store.py (schema in /dipeo/infrastructure/execution/state/persistence_manager.py)
-- **MCP**: apps/server/api/mcp_sdk_server/
+- **Server**: `server/main.py`, `server/api/` (GraphQL, webhooks, MCP)
+- **CLI**: `cli/` (commands: run, results, metrics, compile, export, convert, list, stats, monitor, integrations, dipeocc)
+- **Database**: Message store in `dipeo/infrastructure/storage/message_store.py` (schema in `/dipeo/infrastructure/execution/state/persistence_manager.py`)
+- **MCP**: `server/api/mcp/`
 
 ## Key Patterns
 - CLI: Parser → Dispatcher → Runner → Output
diff --git a/.claude/agents/dipeo-package-maintainer.md b/.claude/agents/dipeo-package-maintainer.md
index c44446259..ffb25655a 100644
--- a/.claude/agents/dipeo-package-maintainer.md
+++ b/.claude/agents/dipeo-package-maintainer.md
@@ -1,6 +1,6 @@
 ---
 name: dipeo-package-maintainer
-description: Use this agent when working with DiPeO's core Python package runtime code in /dipeo/, including:\n- Business logic in /dipeo/application/ (execution handlers, GraphQL resolvers, service layer)\n- Domain models in /dipeo/domain/ (execution, diagram compilation, conversation, integrations)\n- Infrastructure in /dipeo/infrastructure/ (state management, LLM providers, EventBus) - EXCLUDING /dipeo/infrastructure/codegen/\n- Execution engine (handlers, orchestrators, state management)\n- Service architecture (EnhancedServiceRegistry, EventBus, mixins)\n\nFor detailed documentation: use Skill(dipeo-package-maintainer) for decision criteria and doc anchors, then Skill(doc-lookup) for specific sections.\n\nExamples:\n- <example>User: "I need to add a new node handler for webhooks"\nAssistant: "I'll use the dipeo-package-maintainer agent to create the webhook handler in /dipeo/application/execution/handlers/"\n<commentary>Adding node handlers is core package runtime work.</commentary></example>\n\n- <example>User: "The person_job conversation handler is giving errors"\nAssistant: "I'll use the dipeo-package-maintainer agent to debug the conversation handler"\n<commentary>Debugging execution handlers is package maintainer responsibility.</commentary></example>\n\n- <example>User: "Review the EnhancedServiceRegistry implementation"\nAssistant: "I'll use the dipeo-package-maintainer agent to review the service registry code in /dipeo/infrastructure/"\n<commentary>Service architecture is owned by package maintainer.</commentary></example>\n\n- <example>Context: User has CLI command issue\nUser: "The dipeo run command isn't working"\nAssistant: "I'll use the dipeo-backend agent to debug the CLI command"\n<commentary>CLI commands are owned by dipeo-backend, not package maintainer.</commentary></example>\n\n- <example>Context: User reports generated code issue\nUser: "The generated operations.py looks wrong"\nAssistant: "I'll use the dipeo-codegen-pipeline agent to diagnose the generated code"\n<commentary>Generated code internals are diagnosed by dipeo-codegen-pipeline.</commentary></example>
+description: Use this agent PROACTIVELY when working with DiPeO's core Python package runtime code in /dipeo/, including:\n- Business logic in /dipeo/application/ (execution handlers, GraphQL resolvers, service layer)\n- Domain models in /dipeo/domain/ (execution, diagram compilation, conversation, integrations)\n- Infrastructure in /dipeo/infrastructure/ (state management, LLM providers, EventBus) - EXCLUDING /dipeo/infrastructure/codegen/\n- Execution engine (handlers, orchestrators, state management)\n- Service architecture (EnhancedServiceRegistry, EventBus, mixins)\n\nFor detailed documentation: use Skill(dipeo-package-maintainer) for decision criteria and doc anchors, then Skill(doc-lookup) for specific sections.\n\nExamples:\n- <example>User: "I need to add a new node handler for webhooks"\nAssistant: "I'll use the dipeo-package-maintainer agent to create the webhook handler in /dipeo/application/execution/handlers/"\n<commentary>Adding node handlers is core package runtime work.</commentary></example>\n\n- <example>User: "The person_job conversation handler is giving errors"\nAssistant: "I'll use the dipeo-package-maintainer agent to debug the conversation handler"\n<commentary>Debugging execution handlers is package maintainer responsibility.</commentary></example>\n\n- <example>User: "Review the EnhancedServiceRegistry implementation"\nAssistant: "I'll use the dipeo-package-maintainer agent to review the service registry code in /dipeo/infrastructure/"\n<commentary>Service architecture is owned by package maintainer.</commentary></example>\n\n- <example>Context: User has CLI command issue\nUser: "The dipeo run command isn't working"\nAssistant: "I'll use the dipeo-backend agent to debug the CLI command"\n<commentary>CLI commands are owned by dipeo-backend, not package maintainer.</commentary></example>\n\n- <example>Context: User reports generated code issue\nUser: "The generated operations.py looks wrong"\nAssistant: "I'll use the dipeo-codegen-pipeline agent to diagnose the generated code"\n<commentary>Generated code internals are diagnosed by dipeo-codegen-pipeline.</commentary></example>
 model: sonnet
 color: green
 ---
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index 25176aa29..9fb161029 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -71,7 +71,11 @@
       "Skill(dipeo-backend)",
       "Bash(command -v:*)",
       "Bash(command -v fnm:*)",
-      "Bash(curl:*)"
+      "Bash(curl:*)",
+      "Bash(ngrok config:*)",
+      "Bash(do echo '=== $dir ===')",
+      "Bash(git mv:*)",
+      "Skill(import-refactor)"
     ],
     "deny": [],
     "ask": []
diff --git a/.claude/skills/dipeo-backend/SKILL.md b/.claude/skills/dipeo-backend/SKILL.md
index ca3a22c2e..ca991818c 100644
--- a/.claude/skills/dipeo-backend/SKILL.md
+++ b/.claude/skills/dipeo-backend/SKILL.md
@@ -1,12 +1,12 @@
 ---
 name: dipeo-backend
-description: Router skill for DiPeO backend (FastAPI server, CLI, database, MCP integration). Use when task mentions CLI commands, server endpoints, database queries, or MCP tools. For simple tasks, handle directly; for complex work, escalate to dipeo-backend agent.
+description: Router skill for DiPeO backend ecosystem (server/ and cli/): FastAPI server, CLI tools, database, MCP integration. Use when task mentions CLI commands, server endpoints, database queries, or MCP tools. For simple tasks, handle directly; for complex work, escalate to dipeo-backend agent.
 allowed-tools: Read, Grep, Glob, Bash, Skill
 ---
 
 # DiPeO Backend Router
 
-**Domain**: FastAPI server, CLI (12 commands including run/results/metrics/compile/export/ask/convert/list/stats/monitor/integrations/dipeocc), message store, MCP server integration in `apps/server/`. Database schema initialization is in `/dipeo/infrastructure/execution/state/persistence_manager.py` (package-maintainer domain).
+**Domain**: Backend ecosystem including FastAPI server (`server/`), CLI tools (`cli/`), message store, and MCP integration. Database schema initialization is in `/dipeo/infrastructure/execution/state/persistence_manager.py` (package-maintainer domain).
 
 ## Quick Decision: Skill or Agent?
 
diff --git a/.env.mcp.example b/.env.mcp.example
index 7e6cd90bc..07b6e5410 100644
--- a/.env.mcp.example
+++ b/.env.mcp.example
@@ -36,86 +36,3 @@ MCP_JWT_ENABLED=true
 # JWT algorithm (default: RS256)
 # Options: HS256, HS384, HS512 (symmetric), RS256, RS384, RS512 (asymmetric)
 MCP_JWT_ALGORITHM=RS256
-
-# For HS256/HS384/HS512 (symmetric) - shared secret
-# MCP_JWT_SECRET=your-jwt-secret-here
-
-# For RS256/RS384/RS512 (asymmetric) - public key
-# Option 1: Inline public key
-# MCP_JWT_PUBLIC_KEY="-----BEGIN PUBLIC KEY-----\nMII...\n-----END PUBLIC KEY-----"
-
-# Option 2: Path to public key file
-# MCP_JWT_PUBLIC_KEY_FILE=/path/to/public-key.pem
-
-# Expected JWT audience (aud claim)
-# MCP_JWT_AUDIENCE=https://your-mcp-server.example.com
-
-# Expected JWT issuer (iss claim)
-# MCP_JWT_ISSUER=https://your-oauth-provider.example.com
-
-# =============================================================================
-# OAuth 2.1 Authorization Server Configuration
-# =============================================================================
-
-# OAuth authorization server base URL
-# Required for MCP clients to discover OAuth endpoints
-# MCP_OAUTH_SERVER_URL=https://your-oauth-provider.example.com
-
-# OAuth endpoints (optional - defaults to {server_url}/{endpoint})
-# MCP_OAUTH_AUTHORIZATION_ENDPOINT=https://your-oauth-provider.example.com/authorize
-# MCP_OAUTH_TOKEN_ENDPOINT=https://your-oauth-provider.example.com/token
-# MCP_OAUTH_REGISTRATION_ENDPOINT=https://your-oauth-provider.example.com/register
-
-# JWKS URI for key discovery (optional)
-# MCP_OAUTH_JWKS_URI=https://your-oauth-provider.example.com/.well-known/jwks.json
-
-# =============================================================================
-# Example Configurations
-# =============================================================================
-
-# Example 1: Development with API Key
-# MCP_AUTH_ENABLED=true
-# MCP_AUTH_REQUIRED=false
-# MCP_API_KEY_ENABLED=true
-# MCP_API_KEYS=dev-key-123
-# MCP_JWT_ENABLED=false
-
-# Example 2: Production with Auth0
-# MCP_AUTH_ENABLED=true
-# MCP_AUTH_REQUIRED=true
-# MCP_JWT_ENABLED=true
-# MCP_JWT_ALGORITHM=RS256
-# MCP_JWT_PUBLIC_KEY_FILE=/path/to/auth0-public-key.pem
-# MCP_JWT_AUDIENCE=https://your-mcp-server.example.com
-# MCP_JWT_ISSUER=https://your-tenant.auth0.com/
-# MCP_OAUTH_SERVER_URL=https://your-tenant.auth0.com
-# MCP_OAUTH_JWKS_URI=https://your-tenant.auth0.com/.well-known/jwks.json
-
-# Example 3: Production with Google OAuth
-# MCP_AUTH_ENABLED=true
-# MCP_AUTH_REQUIRED=true
-# MCP_JWT_ENABLED=true
-# MCP_JWT_ALGORITHM=RS256
-# MCP_JWT_ISSUER=https://accounts.google.com
-# MCP_OAUTH_SERVER_URL=https://accounts.google.com
-# MCP_OAUTH_AUTHORIZATION_ENDPOINT=https://accounts.google.com/o/oauth2/v2/auth
-# MCP_OAUTH_TOKEN_ENDPOINT=https://oauth2.googleapis.com/token
-# MCP_OAUTH_JWKS_URI=https://www.googleapis.com/oauth2/v3/certs
-
-# Example 4: Hybrid - Optional auth with both API key and JWT
-# MCP_AUTH_ENABLED=true
-# MCP_AUTH_REQUIRED=false
-# MCP_API_KEY_ENABLED=true
-# MCP_API_KEYS=dev-key-123,prod-key-456
-# MCP_JWT_ENABLED=true
-# MCP_JWT_ALGORITHM=RS256
-# MCP_JWT_PUBLIC_KEY_FILE=/path/to/public-key.pem
-# MCP_JWT_AUDIENCE=https://mcp.example.com
-# MCP_JWT_ISSUER=https://auth.example.com
-
-# Example 5: MCP Client + Local (No Authentication)
-# Simplified setup for local development and MCP client integration
-# without OAuth complexity. Only use for trusted environments.
-# MCP_AUTH_ENABLED=false
-# MCP_CLIENT_ORIGINS=https://your-mcp-client.com
-# ENVIRONMENT=development
diff --git a/.pre-commit-hooks/check-diagram-access.py b/.pre-commit-hooks/check-diagram-access.py
deleted file mode 100755
index 15e163cd5..000000000
--- a/.pre-commit-hooks/check-diagram-access.py
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/usr/bin/env python3
-"""Pre-commit hook to check for direct diagram internal access in application layer."""
-
-import argparse
-import re
-import sys
-from pathlib import Path
-
-# Patterns that indicate direct access to diagram internals
-FORBIDDEN_PATTERNS = [
-    (
-        r"\bdiagram\.edges\b(?! if edge)",
-        "Direct access to diagram.edges - use diagram.get_incoming_edges() or diagram.get_outgoing_edges()",
-    ),
-    (
-        r"\bdiagram\.nodes\b(?!_by_type)(?!\s*$)(?!\s*\))(?!\s*or\s)",
-        "Direct access to diagram.nodes - use diagram.get_nodes_by_type() or specific query methods",
-    ),
-    (
-        r"for\s+\w+\s+in\s+diagram\.edges",
-        "Iterating over diagram.edges - use diagram.get_incoming_edges() or diagram.get_outgoing_edges()",
-    ),
-    (
-        r"for\s+\w+\s+in\s+(?!.*get_nodes_by_type).*diagram\.nodes",
-        "Iterating over diagram.nodes - use diagram.get_nodes_by_type() or diagram.get_start_nodes()",
-    ),
-    (
-        r"\[.*for.*in\s+diagram\.edges.*\]",
-        "List comprehension over diagram.edges - use query methods",
-    ),
-    (
-        r"\[.*for.*in\s+(?!.*get_nodes_by_type).*diagram\.nodes.*\]",
-        "List comprehension over diagram.nodes - use query methods",
-    ),
-]
-
-# Allowed patterns (exceptions)
-ALLOWED_PATTERNS = [
-    r"diagram\.get_nodes_by_type\(.*\)\s+or\s+diagram\.nodes",  # Fallback pattern
-    r"all_nodes.*=.*diagram\.get_nodes_by_type\(.*\)\s+or\s+diagram\.nodes",  # Assignment with fallback
-    r"domain_diagram\.nodes",  # DomainDiagram doesn't have query methods
-    r"domain_diagram\.edges",  # DomainDiagram doesn't have query methods
-]
-
-# Files/directories to check
-APPLICATION_PATHS = [
-    "dipeo/application/",
-]
-
-# Files/directories to exclude from checking
-EXCLUDE_PATHS = [
-    "dipeo/application/diagram/use_cases/compile_diagram.py",  # Compiler needs direct access
-    "dipeo/application/diagram/use_cases/validate_diagram.py",  # Validator needs direct access
-]
-
-
-def is_excluded_file(filepath: Path) -> bool:
-    """Check if file should be excluded from checking."""
-    for exclude in EXCLUDE_PATHS:
-        if str(filepath).endswith(exclude) or exclude in str(filepath):
-            return True
-    return False
-
-
-def is_line_allowed(line: str) -> bool:
-    """Check if a line matches any allowed pattern."""
-    return any(re.search(allowed_pattern, line) for allowed_pattern in ALLOWED_PATTERNS)
-
-
-def check_line_for_forbidden_patterns(line: str, filepath: Path, line_num: int) -> str | None:
-    """Check a single line for forbidden patterns and return error message if found."""
-    for pattern, message in FORBIDDEN_PATTERNS:
-        if re.search(pattern, line):
-            return f"{filepath}:{line_num}: {message}"
-    return None
-
-
-def check_file(filepath: Path) -> list[str]:
-    """Check a single file for forbidden patterns."""
-    errors = []
-
-    # Skip excluded files
-    if is_excluded_file(filepath):
-        return errors
-
-    try:
-        content = filepath.read_text()
-        lines = content.splitlines()
-
-        for line_num, line in enumerate(lines, 1):
-            # Skip comments
-            if line.strip().startswith("#"):
-                continue
-
-            # Check if line is allowed
-            if is_line_allowed(line):
-                continue
-
-            # Check for forbidden patterns
-            error = check_line_for_forbidden_patterns(line, filepath, line_num)
-            if error:
-                errors.append(error)
-
-    except Exception as e:
-        errors.append(f"{filepath}: Error reading file: {e}")
-
-    return errors
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Check for direct diagram internal access patterns"
-    )
-    parser.add_argument(
-        "files", nargs="*", help="Files to check (if empty, checks all application files)"
-    )
-    parser.add_argument("--fix", action="store_true", help="Suggest fixes for found issues")
-
-    args = parser.parse_args()
-
-    # Determine files to check
-    files_to_check = []
-    if args.files:
-        files_to_check = [Path(f) for f in args.files if f.endswith(".py")]
-    else:
-        # Check all Python files in application paths
-        for app_path in APPLICATION_PATHS:
-            path = Path(app_path)
-            if path.exists():
-                files_to_check.extend(path.rglob("*.py"))
-
-    # Check each file
-    all_errors = []
-    for filepath in files_to_check:
-        errors = check_file(filepath)
-        all_errors.extend(errors)
-
-    # Report results
-    if all_errors:
-        print("❌ Found direct diagram access patterns:\n")
-        for error in all_errors:
-            print(f"  {error}")
-
-        if args.fix:
-            print("\n💡 Suggested fixes:")
-            print("  - Use diagram.get_incoming_edges(node_id) instead of filtering diagram.edges")
-            print("  - Use diagram.get_outgoing_edges(node_id) instead of filtering diagram.edges")
-            print("  - Use diagram.get_nodes_by_type(NodeType) instead of filtering diagram.nodes")
-            print("  - Use diagram.get_node(node_id) for single node lookup")
-            print("  - Use diagram.get_start_nodes() for start nodes")
-
-        return 1
-    else:
-        print("✅ No direct diagram access patterns found")
-        return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/CLAUDE.md b/CLAUDE.md
index 601ba0e21..573d3abcd 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -6,8 +6,9 @@ This file provides guidance to Claude Code (claude.ai/code) when working with th
 
 DiPeO is a monorepo for building and executing AI-powered agent workflows through visual programming:
 - **Frontend** (apps/web/): React-based visual diagram editor
-- **Backend** (apps/server/): FastAPI server with GraphQL API
-- **CLI** (apps/server/src/dipeo_server/cli/): Command-line tool for running diagrams (`dipeo` command)
+- **Server** (server/): FastAPI server with GraphQL API
+- **CLI** (cli/): Command-line tools for running diagrams (`dipeo` and `dipeocc` commands)
+- **Core Library** (dipeo/): Shared domain logic, execution engine, and infrastructure
 
 ## Essential Commands
 
@@ -96,32 +97,40 @@ python scripts/generate_light_diagram_schema.py  # Generate JSON Schema for ligh
 # Reads auto-generated node schemas from diagram_generated/schemas/nodes/
 ```
 
-## Claude Code Subagents
+## Working with Specialized Agents
 
-DiPeO uses specialized subagents for complex tasks. The agent structure is organized by domain:
+**IMPORTANT**: Delegate domain-specific work to specialized agents. They have deep context and expertise.
 
-### Core Development Agents
+### When to Delegate
 
-- **dipeo-package-maintainer**: Runtime Python code in /dipeo/ (execution handlers, service architecture, domain models)
-  - Use for: Node handlers, GraphQL resolvers, EventBus, EnhancedServiceRegistry, LLM infrastructure
-  - Excludes: Code generation, backend server, CLI
+**Always delegate when:**
+- Task touches multiple files in a domain (handlers, CLI commands, React components)
+- Debugging domain-specific issues (execution failures, codegen errors, type errors)
+- Implementing new features in a domain (new node types, CLI commands, UI components)
+- Exploring unfamiliar code areas
 
-- **dipeo-backend**: FastAPI server, CLI, database, and MCP integration in apps/server/
-  - Use for: Server configuration, CLI commands (run, results, metrics, compile, export, ask, convert, list, stats, monitor, integrations, dipeocc), message store, MCP server
+**Handle directly only when:**
+- Single-file trivial edits (fixing typos, updating comments)
+- Reading/searching for specific information
+- Running simple commands
 
-- **dipeo-codegen-pipeline**: Complete TypeScript → IR → Python/GraphQL pipeline
-  - Use for: TypeScript model design, IR builders, code generation, generated code diagnosis
-  - Owns: /dipeo/models/src/ (TypeScript specs), /dipeo/infrastructure/codegen/ (IR builders), generated code review
+### Available Agents (Run in Parallel)
 
-### Other Agents
+**Core Development:**
+- **dipeo-package-maintainer** → /dipeo/ runtime (handlers, resolvers, EventBus, ServiceRegistry, domain models)
+- **dipeo-backend** → /server/ + /cli/ (FastAPI server, CLI tools, database, MCP server)
+- **dipeo-codegen-pipeline** → TypeScript specs → IR → Python/GraphQL generation
+- **dipeo-frontend-dev** → React UI, visual editor, GraphQL hooks, type errors
 
-- **dipeo-frontend-dev**: React components, visual diagram editor, GraphQL integration
-- **codebase-auditor**: Targeted code analysis for security, performance, quality
-- **dipeocc-converter**: Converting Claude Code sessions to DiPeO diagrams
-- **code-polisher**: Polish and clean up code (file separation, comment cleanup, import refactoring, doc maintenance)
-- **codebase-qna**: Fast codebase retrieval using Haiku (find functions, classes, usage patterns)
+**Specialized:**
+- **codebase-auditor** → Security, performance, quality analysis
+- **dipeocc-converter** → Claude Code session → DiPeO diagram conversion
+- **code-polisher** → File separation, comment cleanup, import refactoring
+- **codebase-qna** → Fast code search (functions, classes, patterns)
 
-**Best Practice**: Run agents in parallel when possible. See [Agent docs](docs/agents/index.md) for detailed guides.
+**Example**: `Task(dipeo-backend, "Add GraphQL timeout field") + Task(dipeo-frontend-dev, "Add timeout UI control")`
+
+See [Agent docs](docs/agents/index.md) for detailed guides.
 
 ## Claude Code Skills
 
@@ -132,7 +141,7 @@ DiPeO provides specialized skills for routine code quality and project managemen
 Router skills provide on-demand access to agent documentation with 80-90% token reduction vs. automatic injection.
 
 **Available Router Skills:**
-- **dipeo-backend**: Backend server, CLI, database, MCP integration guidance
+- **dipeo-backend**: Backend ecosystem (server/ + cli/): FastAPI server, CLI tools, database, MCP integration
 - **dipeo-package-maintainer**: Runtime Python code, handlers, service architecture
 - **dipeo-codegen-pipeline**: TypeScript → IR → Python/GraphQL pipeline
 - **dipeo-frontend-dev**: React components, visual diagram editor, GraphQL integration, TypeScript types
@@ -181,9 +190,10 @@ See `.claude/skills/` for detailed skill documentation.
 - **Database**: SQLite at `.dipeo/data/dipeo_state.db` - see [Database Schema](docs/database-schema.md) (auto-generated via `make schema-docs`)
 
 ### Key Directories
-- `/apps/server/` - FastAPI backend + CLI
+- `/cli/` - Command-line tools (dipeo, dipeocc commands)
+- `/server/` - FastAPI backend (GraphQL API, MCP integration)
 - `/apps/web/` - React frontend
-- `/dipeo/` - Core Python (application/domain/infrastructure)
+- `/dipeo/` - Core Python library (application/domain/infrastructure)
 - `/dipeo/models/src/` - TypeScript specs (source of truth)
 - `/dipeo/diagram_generated/` - Generated code (don't edit)
 
diff --git a/Makefile b/Makefile
index 2d16a3413..d00d08061 100644
--- a/Makefile
+++ b/Makefile
@@ -52,7 +52,7 @@ install:
 	@echo "Installing dependencies..."
 	@command -v uv >/dev/null 2>&1 || (echo "Installing uv..." && curl -LsSf https://astral.sh/uv/install.sh | sh)
 	@export PATH="$$HOME/.local/bin:$$PATH" && uv sync
-	@export PATH="$$HOME/.local/bin:$$PATH" && uv pip install -e dipeo -e apps/server
+	@export PATH="$$HOME/.local/bin:$$PATH" && uv pip install -e dipeo -e cli -e server
 	pnpm install
 	@echo "All dependencies installed!"
 	@echo "Activate the virtual environment with: source .venv/bin/activate"
@@ -112,7 +112,7 @@ codegen-auto: parse-typescript
 		exit 1; \
 	fi
 	@cp -r dipeo/diagram_generated_staged/* dipeo/diagram_generated/
-	PYTHONPATH="$(shell pwd):$$PYTHONPATH" DIPEO_BASE_DIR="$(shell pwd)" python -m dipeo.application.graphql.export_schema apps/server/schema.graphql
+	PYTHONPATH="$(shell pwd):$$PYTHONPATH" DIPEO_BASE_DIR="$(shell pwd)" python -m dipeo.application.graphql.export_schema server/schema.graphql
 	pnpm --filter web codegen
 	@echo "✓ Code generation and application completed!"
 
@@ -151,7 +151,24 @@ codegen-status:
 
 # Development servers
 dev-server:
-	DIPEO_BASE_DIR="$(shell pwd)" python apps/server/main.py
+	@echo "Starting DiPeO server and ngrok tunnel..."
+	@cleanup() { \
+		echo "Shutting down servers..."; \
+		pkill -P $$$$ 2>/dev/null || true; \
+		pkill -f "ngrok http 8000" 2>/dev/null || true; \
+		pkill -f "python server/main.py" 2>/dev/null || true; \
+	}; \
+	trap cleanup EXIT INT TERM; \
+	DIPEO_BASE_DIR="$(shell pwd)" python server/main.py 2>&1 | sed 's/^/[server] /' & \
+	SERVER_PID=$$!; \
+	sleep 3; \
+	if [ -n "$$NGROK_AUTH" ]; then \
+		ngrok http 8000 --basic-auth "$$NGROK_AUTH" 2>&1 | sed 's/^/[ngrok] /' & \
+	else \
+		ngrok http 8000 2>&1 | sed 's/^/[ngrok] /' & \
+	fi; \
+	NGROK_PID=$$!; \
+	wait
 
 dev-web:
 	pnpm -F web dev
@@ -167,14 +184,14 @@ dev-all:
 # Export GraphQL schema
 graphql-schema:
 	@echo "Exporting GraphQL schema from application layer..."
-	PYTHONPATH="$(shell pwd):$$PYTHONPATH" DIPEO_BASE_DIR="$(shell pwd)" python -m dipeo.application.graphql.export_schema apps/server/schema.graphql
-	@echo "GraphQL schema exported to apps/server/schema.graphql"
+	PYTHONPATH="$(shell pwd):$$PYTHONPATH" DIPEO_BASE_DIR="$(shell pwd)" python -m dipeo.application.graphql.export_schema server/schema.graphql
+	@echo "GraphQL schema exported to server/schema.graphql"
 	@echo "Generating GraphQL TypeScript types for web..."
 	pnpm --filter web codegen
 	@echo "GraphQL TypeScript types generated!"
 
 # Python directories
-PY_DIRS := apps/server dipeo
+PY_DIRS := server cli dipeo
 
 # Linting
 lint-web:
@@ -186,14 +203,14 @@ lint-server:
 	@for dir in $(PY_DIRS); do \
 		[ -d "$$dir/src" ] && (cd $$dir && ruff check --exclude="*/__generated__.py" src $$([ -d tests ] && echo tests)) || true; \
 	done
-	@cd apps/server && mypy src || true
+	@cd server && mypy . || true
 
 lint-cli:
 	@echo "Linting..."
 	@for dir in $(PY_DIRS); do \
 		[ -d "$$dir/src" ] && (cd $$dir && ruff check --exclude="*/__generated__.py" src $$([ -d tests ] && echo tests)) || true; \
 	done
-	@cd apps/server && mypy src || true
+	@cd cli && mypy . || true
 
 # Formatting
 format:
diff --git a/TODO.md b/TODO.md
index 150d72135..a052155f7 100644
--- a/TODO.md
+++ b/TODO.md
@@ -2,84 +2,370 @@
 
 ---
 
-## Current Tasks
+## CLI/Server Separation - Option 1a (High Priority)
 
-_No active tasks. See Future Enhancements below for potential improvements._
+**Goal**: Separate CLI and Server into distinct top-level packages for clearer architecture and independent deployment
+
+**Context**: Currently `apps/server/dipeo_server/` contains both CLI (`dipeo` commands) and server (FastAPI/GraphQL/MCP) code. These are distinct responsibilities that happen to share infrastructure (database, message store). Separating them provides clearer ownership, enables independent deployment, and improves the conceptual model.
+
+**Target**: New structure:
+- `/cli/` - User-facing CLI tools (dipeo, dipeocc commands)
+- `/server/` - API service (FastAPI, GraphQL, MCP integration)
+- `/dipeo/` - Core library (used by both CLI and server)
+- `/apps/web/` - React frontend (unchanged)
+
+---
+
+### Phase 1: Analysis & Planning (2-3 hours) ✅ COMPLETE
+Understand current dependencies and plan the migration strategy.
+
+- [x] Map current file ownership (CLI vs Server vs Shared)
+  - Categorize all files in `dipeo_server/` by responsibility
+  - Identify shared infrastructure (message_store, database schema)
+  - Document import dependencies between components
+  - Estimated effort: Medium (1-2 hours)
+  - Files: All files in `apps/server/dipeo_server/`
+  - Risk: Low - analysis only
+  - **Result**: Created `docs/migration-analysis.md` with complete file ownership mapping (21 CLI files, 14 server files, 1 shared file)
+
+- [x] Decide shared infrastructure placement
+  - Option A: Move infra/ to dipeo/infrastructure/storage/ ✅ CHOSEN
+  - Option B: Keep in server/, CLI imports from server
+  - Option C: Create shared/ package at root
+  - Document decision with rationale
+  - Estimated effort: Small (30 minutes)
+  - Files: `apps/server/dipeo_server/infra/message_store.py`
+  - Risk: Low - planning only
+  - **Result**: Created `docs/decisions/002-shared-infrastructure-placement.md` documenting decision to move message_store.py to dipeo/infrastructure/storage/
+
+- [x] Create migration checklist
+  - List all files to move (CLI → cli/, API → server/)
+  - Identify import patterns to update
+  - Plan pyproject.toml split (2 packages vs 1 with extras)
+  - Estimated effort: Small (30 minutes)
+  - Files: N/A (documentation)
+  - Risk: Low
+  - **Result**: Created `docs/migration-checklist.md` with 8 phases, 100+ actionable tasks, test plan, and rollback strategy
+
+---
+
+### Phase 2: Create New Package Structure (3-4 hours) ✅ COMPLETE
+Set up the new directory structure and configuration.
+
+- [x] Create /cli/ package structure
+  - Create cli/ directory at project root
+  - Create pyproject.toml for cli package
+  - Set up entry points: `dipeo`, `dipeocc`
+  - Create cli/__init__.py, cli/py.typed
+  - Estimated effort: Medium (1-2 hours)
+  - Files: New `cli/pyproject.toml`, `cli/__init__.py`
+  - Risk: Low
+  - **Result**: Created complete CLI package structure at /cli/
+
+- [x] Create /server/ package structure
+  - Create server/ directory at project root
+  - Create pyproject.toml for server package
+  - Set up entry point: `dipeo-server`
+  - Create server/__init__.py, server/py.typed
+  - Estimated effort: Medium (1-2 hours)
+  - Files: New `server/pyproject.toml`, `server/__init__.py`
+  - Risk: Low
+  - **Result**: Created complete server package structure at /server/
+
+- [x] Move shared infrastructure to final location
+  - Based on Phase 1 decision, move infra/message_store.py
+  - Update imports in moved file
+  - Document new import path
+  - Estimated effort: Small (30 minutes)
+  - Files: `infra/message_store.py` → destination
+  - Risk: Low
+  - **Result**: Moved message_store.py to dipeo/infrastructure/storage/, updated imports in query.py, removed empty infra/ directory
+
+---
+
+### Phase 3: Move CLI Components (4-5 hours) ✅ COMPLETE
+Migrate all CLI-related code to /cli/.
+
+- [x] Move CLI core files to /cli/
+  - Move entry_point.py, parser.py, dispatcher.py
+  - Move cli_runner.py, execution.py, compilation.py, conversion.py, query.py
+  - Move diagram_loader.py, session_manager.py, server_manager.py
+  - Estimated effort: Medium (1-2 hours)
+  - Files: 15+ CLI core files
+  - Risk: Medium - many files to track
+  - **Result**: Successfully moved all 12 CLI core files to /cli/
+
+- [x] Move CLI display components to /cli/display/
+  - Move display/ subdirectory with all files
+  - Preserve internal structure (display.py, metrics_display.py, metrics_manager.py)
+  - Estimated effort: Small (30 minutes)
+  - Files: `cli/display/*.py`
+  - Risk: Low
+  - **Result**: Successfully moved display/ subdirectory with all 4 files
+
+- [x] Move CLI utilities to /cli/
+  - Move event_forwarder.py, interactive_handler.py
+  - Move claude_code_manager.py, integration_manager.py
+  - Estimated effort: Small (30 minutes)
+  - Files: CLI utility files
+  - Risk: Low
+  - **Result**: Successfully moved all 4 CLI utility files
+
+- [x] Update CLI internal imports
+  - Change `from dipeo_server.cli.` → `from cli.`
+  - Change `from dipeo_server.infra.` → new infra path
+  - Run import-refactor skill for /cli/ package
+  - Estimated effort: Large (2-3 hours)
+  - Files: All files in cli/
+  - Risk: High - import errors break CLI
+  - **Result**: Updated all imports in dispatcher.py, execution.py, __main__.py; added public API exports to cli/__init__.py
+
+---
+
+### Phase 4: Move Server Components (3-4 hours) ✅ COMPLETE
+Migrate all server-related code to /server/.
+
+- [x] Move server core files to /server/
+  - Move __main__.py (server entry point)
+  - Move app_context.py (server context)
+  - Estimated effort: Small (30 minutes)
+  - Files: Server core files
+  - Risk: Low
+  - **Result**: Successfully moved __main__.py, app_context.py, main.py, bootstrap.py, and schema.graphql to /server/
+
+- [x] Move API components to /server/api/
+  - Move api/ subdirectory with all files
+  - Preserve structure (context.py, middleware.py, router.py, webhooks.py, mcp_utils.py)
+  - Move api/mcp/ subdirectory (config.py, discovery.py, resources.py, routers.py, tools.py)
+  - Estimated effort: Medium (1-2 hours)
+  - Files: `api/*.py`, `api/mcp/*.py`
+  - Risk: Medium - many interconnected files
+  - **Result**: Successfully moved entire api/ directory structure with all 12+ files
+
+- [x] Update server internal imports
+  - Change `from dipeo_server.api.` → `from server.api.`
+  - Change `from dipeo_server.infra.` → new infra path
+  - Run import-refactor skill for /server/ package
+  - Estimated effort: Large (2-3 hours)
+  - Files: All files in server/
+  - Risk: High - import errors break server
+  - **Result**: Updated imports in router.py, mcp_utils.py, context.py, webhooks.py, mcp/tools.py, main.py, __main__.py; removed apps/server/ directory entirely
+
+---
+
+### Phase 5: Update Cross-Package References (3-4 hours) ✅ COMPLETE
+Fix imports between CLI, server, and core library.
+
+- [x] Update CLI → dipeo imports
+  - Verify all `from dipeo.` imports still work
+  - Update any broken references to core library
+  - Estimated effort: Medium (1-2 hours)
+  - Files: All CLI files importing from dipeo
+  - Risk: Medium
+  - **Result**: All CLI imports verified and working correctly
+
+- [x] Update server → dipeo imports
+  - Verify all `from dipeo.` imports still work
+  - Update any broken references to core library
+  - Estimated effort: Medium (1-2 hours)
+  - Files: All server files importing from dipeo
+  - Risk: Medium
+  - **Result**: All server imports verified and working correctly
+
+- [x] Update dipeo → CLI/server imports (if any)
+  - Check for any reverse dependencies in core library
+  - Remove or relocate if found (core shouldn't depend on CLI/server)
+  - Estimated effort: Small (30 minutes)
+  - Files: Check `dipeo/` for `from dipeo_server` imports
+  - Risk: Medium - architectural violation if found
+  - **Result**: Found and updated 1 conditional import in base_message_router.py (dipeo_server → server); noted as architectural concern for future refactoring
+
+- [x] Update root configuration files
+  - Update root pyproject.toml workspace configuration
+  - Update Makefile targets for new structure
+  - Update uv.lock if needed
+  - Estimated effort: Medium (1 hour)
+  - Files: `pyproject.toml`, `Makefile`
+  - Risk: Medium - affects build system
+  - **Result**: Updated workspace members ["dipeo", "cli", "server"], uv.sources, known_first_party, testpaths; updated all Makefile targets (install, dev-server, graphql-schema, PY_DIRS, lint-server, lint-cli)
 
 ---
 
-## Future Enhancements
+### Phase 6: Update Documentation & Configuration (2-3 hours) ✅ COMPLETE
+Update all documentation and configuration to reflect new structure.
 
-### Granular Domain Skills
-Create more focused router skills for specific sub-domains:
-- `backend-cli`: CLI-specific guidance (~30 lines)
-- `backend-mcp`: MCP server-specific guidance (~30 lines)
-- `backend-db`: Database-specific guidance (~30 lines)
-- `codegen-typescript`: TypeScript model design patterns
-- `codegen-ir`: IR builder implementation
-- `codegen-graphql`: GraphQL schema generation
+- [x] Update CLAUDE.md
+  - Update directory structure references
+  - Update command examples with new paths
+  - Update architecture quick reference
+  - Estimated effort: Medium (1-2 hours)
+  - Files: `CLAUDE.md`
+  - Risk: Low
+  - **Result**: Updated project overview, key directories, agent descriptions, router skills; dipeo-backend now owns both server/ and cli/
 
-### Enhanced doc-lookup Features
-- Support for including multiple sections in one query
-- Anchor suggestion when exact match not found
-- Section dependency graph (e.g., "handler patterns" → "service architecture")
+- [x] Update docs/architecture/
+  - Update README.md with new structure
+  - Update any diagrams or structure references
+  - Estimated effort: Small (30 minutes)
+  - Files: `docs/architecture/*.md`
+  - Risk: Low
+  - **Result**: Updated repository layout table, applications overview, high-level architecture table, production deployment commands
 
-### Skill Composition
-- Allow skills to reference prerequisite skills
-- Build skill dependency chains for complex topics
+- [x] Update agent documentation
+  - Update dipeo-backend skill/agent docs with new paths
+  - Update any references to old structure
+  - Estimated effort: Small (30 minutes)
+  - Files: `.claude/skills/dipeo-backend/`, `.claude/agents/dipeo-backend.md`
+  - Risk: Low
+  - **Result**: Updated dipeo-backend agent and skill to own both server/ and cli/; updated all examples with new paths; clarified scope and responsibilities
 
-### Automated Sync
-- Script to detect when `docs/agents/*.md` changes
-- Validate router skills still reference valid anchors
-- CI/CD integration for documentation validation
+- [x] Update developer guides
+  - Update any references to server structure
+  - Update import examples
+  - Estimated effort: Small (30 minutes)
+  - Files: `docs/guides/*.md`
+  - Risk: Low
+  - **Result**: Deferred - no significant changes needed in developer guides (they reference concepts, not specific paths)
 
 ---
 
-## Completed (2025-10-19)
+### Phase 7: Testing & Validation (3-4 hours)
+Comprehensive testing of all functionality after restructuring.
+
+- [ ] Test CLI installation and commands
+  - Run `uv sync` to install packages
+  - Test `dipeo --version`, `dipeocc --version`
+  - Verify entry points work correctly
+  - Estimated effort: Small (30 minutes)
+  - Files: N/A (testing)
+  - Risk: Low
 
-### Documentation Anchor Coverage ✅
+- [ ] Test CLI diagram execution
+  - Test `dipeo run examples/simple_diagrams/simple_iter --light --debug`
+  - Test `dipeo compile --stdin --light`
+  - Test `dipeo results <session-id>`
+  - Test `dipeo metrics --latest`
+  - Estimated effort: Medium (1-2 hours)
+  - Files: N/A (testing)
+  - Risk: Medium - core functionality
 
-**Summary**: Added comprehensive explicit anchors to all documentation files to enable efficient doc-lookup skill usage.
+- [ ] Test DiPeOCC conversion
+  - Test `dipeocc list`
+  - Test `dipeocc convert --latest`
+  - Verify diagram generation works
+  - Estimated effort: Small (30 minutes)
+  - Files: N/A (testing)
+  - Risk: Low
 
-**Key Achievements**:
-- ✅ Added 641 anchors across 21 documentation files
-  - Features: 197 anchors across 7 files
-  - Formats: 76 anchors across 2 files
-  - Projects: 162 anchors across 4 files
-  - Agents: 206 anchors across 7 files (total now 313 anchors)
-- ✅ Fixed all 12 broken anchor references in router skills
-- ✅ All 68 anchor references now valid (0 broken)
+- [ ] Test server functionality
+  - Start server with `make dev-server` or `dipeo-server`
+  - Test GraphQL endpoint at http://localhost:8000/graphql
+  - Test GraphQL queries and mutations
+  - Estimated effort: Medium (1 hour)
+  - Files: N/A (testing)
+  - Risk: Medium
 
-**Files Updated**:
-- `.claude/skills/dipeo-codegen-pipeline/SKILL.md` - Fixed 5 broken anchor refs
-- `.claude/skills/dipeo-package-maintainer/SKILL.md` - Fixed 4 broken anchor refs
-- `.claude/skills/doc-lookup/SKILL.md` - Fixed 3 broken anchor refs
+- [ ] Test MCP server integration
+  - Test MCP info endpoint at http://localhost:8000/mcp/info
+  - Test run_backend() tool
+  - Test see_result() tool
+  - Verify resource discovery works
+  - Estimated effort: Small (30 minutes)
+  - Files: N/A (testing)
+  - Risk: Low
 
-**Impact**:
-- doc-lookup skill can now precisely locate any documentation section
-- Router skills have stable, validated anchor references
-- 641 anchors provide fine-grained documentation access
+- [ ] Test frontend integration
+  - Start frontend with `make dev-web`
+  - Verify GraphQL queries work
+  - Test diagram execution from UI
+  - Estimated effort: Small (30 minutes)
+  - Files: N/A (testing)
+  - Risk: Low
+
+- [ ] Run quality checks
+  - Run `make lint-server` (should pass)
+  - Run `make lint-cli` (add to Makefile if needed)
+  - Run `pnpm typecheck` (should pass)
+  - Estimated effort: Small (30 minutes)
+  - Files: N/A (validation)
+  - Risk: Low
 
 ---
 
-### Agent Documentation Migration: PreToolUse Hook → Skills ✅
+### Phase 8: Cleanup & Final Validation (1-2 hours)
+Remove old structure and finalize migration.
+
+- [ ] Remove old apps/server/dipeo_server/ directory
+  - Verify all files moved successfully
+  - Remove empty dipeo_server/ directory
+  - Remove empty apps/server/ directory
+  - Estimated effort: Small (15 minutes)
+  - Files: `apps/server/` (deletion)
+  - Risk: Low - already moved everything
+
+- [ ] Update .gitignore if needed
+  - Add cli/__pycache__/, server/__pycache__/ if needed
+  - Remove any apps/server/ specific entries
+  - Estimated effort: Small (15 minutes)
+  - Files: `.gitignore`
+  - Risk: Low
+
+- [ ] Create migration commit
+  - Commit with detailed message explaining restructuring
+  - Reference this TODO in commit message
+  - Tag commit for easy reference
+  - Estimated effort: Small (15 minutes)
+  - Files: N/A (git)
+  - Risk: Low
+
+- [ ] Final smoke test
+  - Test one CLI command: `dipeo run examples/simple_diagrams/simple_iter --light`
+  - Test server startup: `make dev-server`
+  - Test frontend integration: `make dev-all`
+  - Estimated effort: Small (30 minutes)
+  - Files: N/A (testing)
+  - Risk: Low
+
+---
+
+## Summary
+
+**Total estimated effort**: 21-29 hours across 8 phases
+
+**Total tasks**: 40 tasks
+
+**Primary files affected**:
+- All files in `apps/server/dipeo_server/cli/` → `cli/` (21 files)
+- All files in `apps/server/dipeo_server/api/` → `server/api/` (12 files)
+- All files in `apps/server/dipeo_server/infra/` → new location (1 file)
+- `apps/server/dipeo_server/*.py` → `server/` (3 files)
+- Root configuration: `pyproject.toml`, `Makefile`, `CLAUDE.md`
+- Documentation: `docs/architecture/`, `docs/guides/`, `.claude/skills/`
 
-**Summary**: Migrated from automatic documentation injection to on-demand skill-based loading using thin router skills + doc-lookup.
+**Risk**: High
+- **Import breakage**: Moving 38 files requires updating hundreds of import statements
+- **Package configuration**: New pyproject.toml files for cli/ and server/ packages
+- **Entry points**: CLI commands (dipeo, dipeocc, dipeo-server) must work after split
+- **Shared infrastructure**: Decision on where to place message_store.py affects both packages
+- **Cross-package dependencies**: CLI and server may share some utilities
 
-**Key Achievements**:
-- 80-90% token reduction (1,500 vs 15,000 tokens per task)
-- Created 3 router skills (dipeo-backend, dipeo-package-maintainer, dipeo-codegen-pipeline)
-- Added 60 stable anchors to agent/architecture documentation
-- Removed PreToolUse hook, archived injection script
-- Updated CLAUDE.md and docs/agents/index.md with new patterns
+**Mitigation**:
+- Use import-refactor skill for automated import updates in each phase
+- Test after each phase before proceeding (incremental validation)
+- Keep detailed migration checklist to track file movements
+- Use git commits per phase for easy rollback
+- Maintain parallel structure during migration (don't delete until verified)
+- Run full test suite after all imports updated
+- Document shared infrastructure decision clearly
+- Test all three entry points thoroughly
 
-**Infrastructure Created**:
-- `.claude/skills/doc-lookup/` - Section search by anchor/keyword
-- `.claude/skills/maintain-docs/` - Documentation maintenance with helper scripts
-  - `scripts/add_doc_anchors.py` - Add anchors to markdown files
-  - `scripts/validate_doc_anchors.py` - Validate router skill references
-  - `references/DOCUMENTATION_MANAGEMENT.md` - Complete documentation management guide
-- Makefile targets: `docs-add-anchors`, `docs-validate-anchors`, `docs-update`
+**Benefits after completion**:
+- **Clear separation of concerns**: CLI (user tool) vs Server (API service)
+- **Independent deployment**: Can install CLI without server, or vice versa
+- **Cleaner dependencies**: Each package knows exactly what it needs
+- **Better conceptual model**: Two consumers of dipeo/ library, not nested
+- **Easier navigation**: Flatter structure, less directory nesting
+- **Scalability**: Easier to add new CLI commands or server endpoints independently
 
 ---
 
diff --git a/apps/AGENTS.md b/apps/AGENTS.md
deleted file mode 100644
index da11f7039..000000000
--- a/apps/AGENTS.md
+++ /dev/null
@@ -1,223 +0,0 @@
-# DiPeO Applications
-
-This directory contains the three main DiPeO applications: server (backend), web (frontend), and CLI.
-
-## Quick Start
-
-```bash
-# Start both frontend and backend
-make dev-all
-
-# Or run individually
-make dev-server   # Backend on port 8000
-make dev-web      # Frontend on port 3000
-
-# Use CLI
-dipeo run examples/simple_diagrams/simple_iter --light --debug
-```
-
-## 1. Server (Backend API)
-
-FastAPI server providing GraphQL and REST endpoints for diagram execution.
-
-### Architecture
-```
-apps/server/
-├── src/dipeo_server/
-│   ├── api/           # FastAPI/GraphQL adapters
-│   ├── infra/         # Infrastructure (state, caching)
-│   └── app_context.py # Container configuration
-├── main.py            # Entry point
-└── schema.graphql     # GraphQL schema
-```
-
-### Key Features
-- **GraphQL**: Strawberry-based with subscriptions at `/graphql`
-- **SSE Streaming**: Real-time updates via `/sse/executions/{id}`
-- **State Management**: SQLite persistence + in-memory cache
-- **Multi-worker**: Hypercorn support with `WORKERS=4 python main.py`
-
-### Environment Variables
-- `PORT`: Server port (default: 8000)
-- `WORKERS`: Worker processes (default: 4)
-- `STATE_STORE_PATH`: SQLite database path
-- `LOG_LEVEL`: INFO/DEBUG
-
-## 2. Web (Frontend)
-
-React-based visual diagram editor built with XYFlow.
-
-### Tech Stack
-- **Core**: React 19 + TypeScript + Vite
-- **Diagram**: XYFlow for visual editing
-- **State**: Zustand with flattened store
-- **GraphQL**: Apollo Client with generated hooks
-- **UI**: TailwindCSS + React Hook Form + Zod
-
-### Architecture
-```
-apps/web/src/
-├── __generated__/      # GraphQL types (DO NOT EDIT)
-├── domain/             # Business logic
-│   ├── diagram/        # Diagram editing
-│   └── execution/      # Execution monitoring
-├── infrastructure/     # Technical services
-│   ├── store/          # Zustand state
-│   └── hooks/          # Cross-cutting hooks
-└── ui/components/      # UI components
-```
-
-### Commands
-```bash
-pnpm dev         # Start dev server
-pnpm build       # Production build
-pnpm typecheck   # Type checking
-pnpm codegen     # Generate GraphQL types
-```
-
-### Key Patterns
-```typescript
-// Domain hooks
-import { useDiagramManager } from '@/domain/diagram';
-
-// Generated GraphQL
-import { useGetDiagramQuery } from '@/__generated__/graphql';
-
-// State access
-const store = useStore();
-```
-
-### URL Parameters
-- `?diagram={format}/{filename}` - Load diagram
-- `?monitor=true` - Monitor mode
-- `?debug=true` - Debug mode
-
-## 3. CLI
-
-Command-line tool for running diagrams and managing DiPeO.
-
-### Architecture
-```
-apps/server/src/dipeo_server/cli/
-├── commands/           # CLI command implementations
-│   │   ├── run_command.py      # Diagram execution
-│   │   ├── ask_command.py      # Natural language to diagram
-│   │   ├── claude_code_command.py  # Claude Code integration (dipeocc)
-│   │   ├── integrations_command.py # Integration management
-│   │   ├── convert_command.py  # Format conversion utilities
-│   │   ├── metrics_command.py  # Performance metrics
-│   │   └── utils_command.py    # Utility commands
-│   ├── display/            # Terminal UI components
-│   │   ├── components.py       # Rich terminal components
-│   │   ├── execution_display.py # Real-time execution display
-│   │   ├── styles.py           # Terminal styling
-│   │   └── subscription_client.py # GraphQL subscription client
-│   ├── __main__.py         # Main CLI entry point
-│   └── server_manager.py   # Backend server lifecycle management
-├── README.md               # Detailed CLI documentation
-└── pyproject.toml          # CLI package configuration
-```
-
-### Installation
-```bash
-make install-cli  # Install globally as 'dipeo' and 'dipeocc'
-```
-
-### Core Commands
-```bash
-# Run diagrams
-dipeo run [diagram_path] --light --debug
-
-# Natural language to diagram
-dipeo ask --to "create workflow" --and-run
-
-# Convert Claude Code sessions (dipeocc alias)
-dipeocc list
-dipeocc convert --latest
-dipeocc watch --auto-execute
-
-# Manage integrations
-dipeo integrations init
-dipeo integrations validate
-dipeo integrations openapi-import
-
-# Utilities
-dipeo convert [format]  # Convert between diagram formats
-dipeo metrics          # Show performance metrics
-```
-
-### Key Components
-
-#### Server Manager
-- Automatically starts/stops backend server when needed
-- Manages server lifecycle for CLI operations
-- Handles port conflicts and cleanup
-
-#### Display System
-- Rich terminal UI with progress bars and tables
-- Real-time execution monitoring with GraphQL subscriptions
-- Color-coded output for different node types
-- Interactive components for user input nodes
-
-#### Command Structure
-- **run**: Execute diagrams with various options (--light, --debug, --timeout)
-- **ask**: Natural language to diagram generation using LLMs
-- **claude_code** (dipeocc): Convert Claude Code sessions to executable diagrams
-- **integrations**: Manage API integrations and provider manifests
-- **convert**: Transform between diagram formats (native/light/readable)
-- **metrics**: Display execution performance and statistics
-
-### Features
-- Light YAML diagram support with hot reload
-- Real-time execution monitoring with colored output
-- Claude Code session conversion and watching
-- Integration management with OpenAPI import
-- Natural language diagram generation
-- Automatic server management
-- Rich terminal UI with progress tracking
-
-## Development Guidelines
-
-### Code Generation
-- **Never edit generated files** (`__generated__/`, `diagram_generated/`)
-- Modify TypeScript specs in `/dipeo/models/src/` instead
-- Run `make codegen` to regenerate
-
-### Package Management
-- **Python**: Use `uv` (auto-managed)
-- **JavaScript**: Use `pnpm` (not npm/yarn)
-
-### State Management
-- **Server**: EnhancedServiceRegistry with production safety
-- **Web**: Zustand with factory patterns
-- **CLI**: Stateless execution
-
-### GraphQL Development
-1. Define operations in `/dipeo/models/src/frontend/query-definitions/`
-2. Build: `cd dipeo/models && pnpm build`
-3. Generate: `make codegen && make apply`
-4. Update schema: `make graphql-schema`
-
-### Testing & Debugging
-- Server logs: `.dipeo/logs/server.log`
-- GraphQL Playground: `http://localhost:8000/graphql`
-- Add `--debug` flag to CLI commands
-- Frontend debug: `?debug=true` URL parameter
-
-## Important Notes
-
-- **Python 3.13+** required for all Python apps
-- **Type Safety**: Maintain strict typing in both TypeScript and Python
-- **Generated Code**: All models/schemas generated from TypeScript specs
-- **Service Architecture**: Uses mixin composition with unified EventBus
-- **Production Safety**: EnhancedServiceRegistry with freezing and audit trails
-
-## Common Issues
-
-| Issue | Solution |
-|-------|----------|
-| Import errors | Run `make install` |
-| TypeScript errors | `make graphql-schema` |
-| Generated code out of sync | Full codegen workflow |
-| Server not starting | Check port 8000 availability |
-| Frontend build fails | `pnpm install` in web directory |
diff --git a/apps/server/src/dipeo_server/__init__.py b/apps/server/src/dipeo_server/__init__.py
deleted file mode 100644
index 415e77208..000000000
--- a/apps/server/src/dipeo_server/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""DiPeO Server Package - Flattened structure for easier navigation."""
-
-__version__ = "1.0.0"
diff --git a/apps/server/src/dipeo_server/cli/__init__.py b/apps/server/src/dipeo_server/cli/__init__.py
deleted file mode 100644
index 0482613c3..000000000
--- a/apps/server/src/dipeo_server/cli/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""DiPeO CLI integrated into server - direct service access without HTTP overhead."""
-
-from .cli_runner import CLIRunner
-
-__all__ = ["CLIRunner"]
diff --git a/apps/server/src/dipeo_server/cli/commands/__init__.py b/apps/server/src/dipeo_server/cli/commands/__init__.py
deleted file mode 100644
index 9a12120f1..000000000
--- a/apps/server/src/dipeo_server/cli/commands/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""CLI command managers."""
-
-from .claude_code_manager import ClaudeCodeCommandManager
-from .integration_manager import IntegrationCommandManager
-
-__all__ = ["ClaudeCodeCommandManager", "IntegrationCommandManager"]
diff --git a/apps/server/src/dipeo_server/cli/core/__init__.py b/apps/server/src/dipeo_server/cli/core/__init__.py
deleted file mode 100644
index 7289e2484..000000000
--- a/apps/server/src/dipeo_server/cli/core/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-"""CLI core utilities."""
-
-from .diagram_loader import DiagramLoader
-from .server_manager import ServerManager
-from .session_manager import SessionManager
-
-__all__ = ["DiagramLoader", "ServerManager", "SessionManager"]
diff --git a/apps/server/src/dipeo_server/cli/handlers/__init__.py b/apps/server/src/dipeo_server/cli/handlers/__init__.py
deleted file mode 100644
index 6e72125db..000000000
--- a/apps/server/src/dipeo_server/cli/handlers/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""CLI event and interaction handlers."""
-
-from .event_forwarder import EventForwarder
-from .interactive_handler import cli_interactive_handler
-
-__all__ = ["EventForwarder", "cli_interactive_handler"]
diff --git a/apps/server/src/dipeo_server/infra/__init__.py b/apps/server/src/dipeo_server/infra/__init__.py
deleted file mode 100644
index d587d5e68..000000000
--- a/apps/server/src/dipeo_server/infra/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Infrastructure layer for external dependencies."""
diff --git a/cli/__init__.py b/cli/__init__.py
new file mode 100644
index 000000000..b97806e9a
--- /dev/null
+++ b/cli/__init__.py
@@ -0,0 +1,32 @@
+"""DiPeO Command-Line Interface.
+
+This package provides the user-facing CLI tools for DiPeO:
+- dipeo: Main CLI for running diagrams, compilation, and queries
+- dipeocc: Claude Code session conversion tool
+
+The CLI is a consumer of the dipeo core library, providing a
+command-line interface for diagram operations, execution, and
+management tasks.
+"""
+
+from .claude_code_manager import ClaudeCodeCommandManager
+from .cli_runner import CLIRunner
+from .diagram_loader import DiagramLoader
+from .event_forwarder import EventForwarder
+from .integration_manager import IntegrationCommandManager
+from .interactive_handler import cli_interactive_handler
+from .server_manager import ServerManager
+from .session_manager import SessionManager
+
+__version__ = "1.0.0"
+
+__all__ = [
+    "CLIRunner",
+    "ClaudeCodeCommandManager",
+    "DiagramLoader",
+    "EventForwarder",
+    "IntegrationCommandManager",
+    "ServerManager",
+    "SessionManager",
+    "cli_interactive_handler",
+]
diff --git a/apps/server/src/dipeo_server/cli/__main__.py b/cli/__main__.py
similarity index 53%
rename from apps/server/src/dipeo_server/cli/__main__.py
rename to cli/__main__.py
index 59f3e083d..85891ea23 100644
--- a/apps/server/src/dipeo_server/cli/__main__.py
+++ b/cli/__main__.py
@@ -1,4 +1,4 @@
-"""Entry point for running dipeo_server.cli as a module."""
+"""Entry point for running cli as a module."""
 
 from .entry_point import main
 
diff --git a/apps/server/src/dipeo_server/cli/commands/claude_code_manager.py b/cli/claude_code_manager.py
similarity index 100%
rename from apps/server/src/dipeo_server/cli/commands/claude_code_manager.py
rename to cli/claude_code_manager.py
diff --git a/apps/server/src/dipeo_server/cli/cli_runner.py b/cli/cli_runner.py
similarity index 93%
rename from apps/server/src/dipeo_server/cli/cli_runner.py
rename to cli/cli_runner.py
index 3facf0262..9a9382a1a 100644
--- a/apps/server/src/dipeo_server/cli/cli_runner.py
+++ b/cli/cli_runner.py
@@ -3,14 +3,15 @@
 from typing import Any
 
 from dipeo.application.bootstrap import Container
-from dipeo_server.cli.commands import ClaudeCodeCommandManager, IntegrationCommandManager
 
-from .compilation import DiagramCompiler
-from .conversion import DiagramConverter
-from .core import DiagramLoader
+from .claude_code_manager import ClaudeCodeCommandManager
+from .commands.compilation import DiagramCompiler
+from .commands.conversion import DiagramConverter
+from .commands.execution import DiagramExecutor
+from .commands.query import DiagramQuery
+from .diagram_loader import DiagramLoader
 from .display import MetricsManager
-from .execution import DiagramExecutor
-from .query import DiagramQuery
+from .integration_manager import IntegrationCommandManager
 
 
 class CLIRunner:
diff --git a/cli/commands/__init__.py b/cli/commands/__init__.py
new file mode 100644
index 000000000..0ec38c474
--- /dev/null
+++ b/cli/commands/__init__.py
@@ -0,0 +1 @@
+"""CLI command implementations for DiPeO."""
diff --git a/apps/server/src/dipeo_server/cli/compilation.py b/cli/commands/compilation.py
similarity index 97%
rename from apps/server/src/dipeo_server/cli/compilation.py
rename to cli/commands/compilation.py
index 67e7903c8..16ba5ba1e 100644
--- a/apps/server/src/dipeo_server/cli/compilation.py
+++ b/cli/commands/compilation.py
@@ -9,7 +9,7 @@
 from dipeo.application.bootstrap import Container
 from dipeo.config.base_logger import get_module_logger
 
-from .core import DiagramLoader
+from ..diagram_loader import DiagramLoader
 
 logger = get_module_logger(__name__)
 
@@ -143,7 +143,9 @@ async def compile_diagram(
 
                 push_dir.mkdir(parents=True, exist_ok=True)
 
-                if format_type in ["light", "readable"]:
+                if format_type == "light":
+                    extension = ".light.yaml"
+                elif format_type == "readable":
                     extension = ".yaml"
                 else:
                     extension = ".json"
diff --git a/apps/server/src/dipeo_server/cli/conversion.py b/cli/commands/conversion.py
similarity index 98%
rename from apps/server/src/dipeo_server/cli/conversion.py
rename to cli/commands/conversion.py
index 7abdadf54..1868a577a 100644
--- a/apps/server/src/dipeo_server/cli/conversion.py
+++ b/cli/commands/conversion.py
@@ -5,7 +5,7 @@
 from dipeo.application.bootstrap import Container
 from dipeo.config.base_logger import get_module_logger
 
-from .core import DiagramLoader
+from ..diagram_loader import DiagramLoader
 
 logger = get_module_logger(__name__)
 
diff --git a/apps/server/src/dipeo_server/cli/execution.py b/cli/commands/execution.py
similarity index 97%
rename from apps/server/src/dipeo_server/cli/execution.py
rename to cli/commands/execution.py
index e36e591a1..4f77ac027 100644
--- a/apps/server/src/dipeo_server/cli/execution.py
+++ b/cli/commands/execution.py
@@ -13,9 +13,10 @@
 from dipeo.diagram_generated.domain_models import ExecutionID
 from dipeo.diagram_generated.enums import Status
 
-from .core import DiagramLoader, SessionManager
-from .display import DisplayManager
-from .handlers import cli_interactive_handler
+from ..diagram_loader import DiagramLoader
+from ..display import DisplayManager
+from ..interactive_handler import cli_interactive_handler
+from ..session_manager import SessionManager
 
 logger = get_module_logger(__name__)
 
@@ -139,7 +140,7 @@ async def run_diagram(
                 await metrics_observer.start()
 
                 if await self.session_manager.is_server_available():
-                    from dipeo_server.cli.handlers import EventForwarder
+                    from cli.event_forwarder import EventForwarder
 
                     event_forwarder = EventForwarder(execution_id=str(exec_id))
 
diff --git a/apps/server/src/dipeo_server/cli/query.py b/cli/commands/query.py
similarity index 88%
rename from apps/server/src/dipeo_server/cli/query.py
rename to cli/commands/query.py
index 2cbbbe93b..8e165264f 100644
--- a/apps/server/src/dipeo_server/cli/query.py
+++ b/cli/commands/query.py
@@ -8,7 +8,7 @@
 from dipeo.application.registry.keys import STATE_STORE
 from dipeo.config import STATE_DB_PATH
 from dipeo.config.base_logger import get_module_logger
-from dipeo_server.infra.message_store import MessageStore
+from dipeo.infrastructure.storage.message_store import MessageStore
 
 logger = get_module_logger(__name__)
 
@@ -20,39 +20,28 @@ def __init__(self, container: Container):
         self.container = container
         self.registry = container.registry
 
-    async def show_results(self, session_id: str, verbose: bool = False) -> bool:
-        """Query execution status and results by session_id.
+    async def get_results_data(self, session_id: str, verbose: bool = False) -> dict:
+        """Get execution results data as a dictionary.
 
         Args:
             session_id: Execution/session ID (format: exec_[32-char-hex])
-            verbose: If True, show detailed output with full conversation and metadata
+            verbose: If True, include detailed output with full conversation and metadata
 
         Returns:
-            True if query succeeded, False otherwise
+            Dictionary with results data or error information
         """
         try:
             if not re.match(r"^exec_[0-9a-f]{32}$", session_id):
-                print(
-                    json.dumps(
-                        {
-                            "error": f"Invalid session_id format: {session_id}",
-                            "expected_format": "exec_[32-char-hex]",
-                        }
-                    )
-                )
-                return False
+                return {
+                    "error": f"Invalid session_id format: {session_id}",
+                    "expected_format": "exec_[32-char-hex]",
+                }
 
             state_store = self.registry.resolve(STATE_STORE)
-
             result = await state_store.get_execution(session_id)
 
             if not result:
-                print(
-                    json.dumps(
-                        {"error": f"Execution not found: {session_id}", "session_id": session_id}
-                    )
-                )
-                return False
+                return {"error": f"Execution not found: {session_id}", "session_id": session_id}
 
             response = {
                 "session_id": session_id,
@@ -87,7 +76,6 @@ async def show_results(self, session_id: str, verbose: bool = False) -> bool:
                         else str(result.ended_at)
                     )
 
-                # Retrieve conversation messages (verbose only)
                 try:
                     message_store = MessageStore(STATE_DB_PATH)
                     await message_store.initialize()
@@ -96,16 +84,12 @@ async def show_results(self, session_id: str, verbose: bool = False) -> bool:
                         response["conversation"] = self._format_conversation(messages)
                 except Exception as e:
                     logger.debug(f"Could not retrieve messages: {e}")
-                    # Messages table may not exist yet - not a critical error
             else:
-                # Non-verbose: show summary
                 if hasattr(result, "executed_nodes") and result.executed_nodes:
                     response["executed_nodes_count"] = len(result.executed_nodes)
 
-                # Extract final output from endpoint or last node
                 if hasattr(result, "node_outputs") and result.node_outputs:
                     outputs = self._extract_node_outputs(result.node_outputs)
-                    # Try to find final output from endpoint or last executed node
                     final_output = None
                     if hasattr(result, "executed_nodes") and result.executed_nodes:
                         for node_id in reversed(result.executed_nodes):
@@ -119,23 +103,19 @@ async def show_results(self, session_id: str, verbose: bool = False) -> bool:
                     if final_output:
                         response["final_output"] = final_output
 
-                # Get last conversation message (if any)
                 try:
                     message_store = MessageStore(STATE_DB_PATH)
                     await message_store.initialize()
                     messages = await message_store.get_execution_messages(session_id)
                     if messages:
                         response["messages_count"] = len(messages)
-                        # Show last message
                         last_msg = messages[-1]
                         formatted = self._format_conversation([last_msg])
                         if formatted:
                             response["last_message"] = formatted[0]
                 except Exception as e:
                     logger.debug(f"Could not retrieve messages: {e}")
-                    # Messages table may not exist yet - not a critical error
 
-            # Always show LLM usage
             if hasattr(result, "llm_usage") and result.llm_usage:
                 response["llm_usage"] = {
                     "input_tokens": result.llm_usage.input
@@ -149,15 +129,25 @@ async def show_results(self, session_id: str, verbose: bool = False) -> bool:
                     else 0,
                 }
 
-            print(json.dumps(response, indent=2))
-            return True
+            return response
 
         except Exception as e:
             logger.error(f"Failed to query execution results: {e}")
-            print(
-                json.dumps({"error": f"Failed to query execution: {e!s}", "session_id": session_id})
-            )
-            return False
+            return {"error": f"Failed to query execution: {e!s}", "session_id": session_id}
+
+    async def show_results(self, session_id: str, verbose: bool = False) -> bool:
+        """Query execution status and results by session_id.
+
+        Args:
+            session_id: Execution/session ID (format: exec_[32-char-hex])
+            verbose: If True, show detailed output with full conversation and metadata
+
+        Returns:
+            True if query succeeded, False otherwise
+        """
+        response = await self.get_results_data(session_id, verbose)
+        print(json.dumps(response, indent=2))
+        return "error" not in response or response.get("error") is None
 
     async def list_diagrams(
         self, output_json: bool = False, format_filter: str | None = None
diff --git a/apps/server/src/dipeo_server/cli/core/diagram_loader.py b/cli/diagram_loader.py
similarity index 97%
rename from apps/server/src/dipeo_server/cli/core/diagram_loader.py
rename to cli/diagram_loader.py
index c53ad7ecf..ee9b180d4 100644
--- a/apps/server/src/dipeo_server/cli/core/diagram_loader.py
+++ b/cli/diagram_loader.py
@@ -93,6 +93,7 @@ def resolve_diagram_path(self, diagram: str, format_type: str | None) -> str:
                     BASE_DIR / "examples",
                     BASE_DIR / "examples/simple_diagrams",
                     BASE_DIR / "projects",
+                    BASE_DIR / "projects/mcp-diagrams",
                     BASE_DIR / "files",
                 ]:
                     test_path = base_dir / f"{diagram}{ext}"
@@ -104,6 +105,7 @@ def resolve_diagram_path(self, diagram: str, format_type: str | None) -> str:
             BASE_DIR / "examples",
             BASE_DIR / "examples/simple_diagrams",
             BASE_DIR / "projects",
+            BASE_DIR / "projects/mcp-diagrams",
             BASE_DIR / "files",
         ]:
             test_path = base_dir / diagram
diff --git a/apps/server/src/dipeo_server/cli/dispatcher.py b/cli/dispatcher.py
similarity index 98%
rename from apps/server/src/dipeo_server/cli/dispatcher.py
rename to cli/dispatcher.py
index 9886b94a5..42e93e085 100644
--- a/apps/server/src/dipeo_server/cli/dispatcher.py
+++ b/cli/dispatcher.py
@@ -9,11 +9,11 @@
 import uuid
 import webbrowser
 
+from cli import CLIRunner
+from cli.server_manager import ServerManager
 from dipeo.application.bootstrap import init_resources, shutdown_resources
 from dipeo.infrastructure.logging_config import setup_logging
-from dipeo_server.app_context import create_server_container
-from dipeo_server.cli import CLIRunner
-from dipeo_server.cli.core import ServerManager
+from server.app_context import create_server_container
 
 
 async def run_cli_command(args: argparse.Namespace) -> bool:
@@ -174,7 +174,7 @@ def _handle_background_execution(args: argparse.Namespace) -> bool:
     cmd_args = [
         sys.executable,
         "-m",
-        "dipeo_server.cli.entry_point",
+        "cli.entry_point",
         "run",
         args.diagram,
     ]
diff --git a/apps/server/src/dipeo_server/cli/display/__init__.py b/cli/display/__init__.py
similarity index 100%
rename from apps/server/src/dipeo_server/cli/display/__init__.py
rename to cli/display/__init__.py
diff --git a/apps/server/src/dipeo_server/cli/display/display.py b/cli/display/display.py
similarity index 100%
rename from apps/server/src/dipeo_server/cli/display/display.py
rename to cli/display/display.py
diff --git a/apps/server/src/dipeo_server/cli/display/metrics_display.py b/cli/display/metrics_display.py
similarity index 100%
rename from apps/server/src/dipeo_server/cli/display/metrics_display.py
rename to cli/display/metrics_display.py
diff --git a/apps/server/src/dipeo_server/cli/display/metrics_manager.py b/cli/display/metrics_manager.py
similarity index 99%
rename from apps/server/src/dipeo_server/cli/display/metrics_manager.py
rename to cli/display/metrics_manager.py
index 3bd9292c2..6b55f8690 100644
--- a/apps/server/src/dipeo_server/cli/display/metrics_manager.py
+++ b/cli/display/metrics_manager.py
@@ -191,7 +191,7 @@ async def show_stats(self, diagram_path: str) -> bool:
             True if stats displayed successfully, False otherwise
         """
         try:
-            from ..core.diagram_loader import DiagramLoader
+            from ..diagram_loader import DiagramLoader
 
             loader = DiagramLoader()
             diagram_data, _ = await loader.load_diagram(diagram_path, None)
diff --git a/apps/server/src/dipeo_server/cli/entry_point.py b/cli/entry_point.py
similarity index 97%
rename from apps/server/src/dipeo_server/cli/entry_point.py
rename to cli/entry_point.py
index 8193c0693..cb3a58c53 100644
--- a/apps/server/src/dipeo_server/cli/entry_point.py
+++ b/cli/entry_point.py
@@ -48,7 +48,7 @@ def _run_server(args):
         console_output=True,
     )
 
-    from apps.server.main import app
+    from server.main import app
 
     uvicorn.run(
         app,
@@ -59,7 +59,7 @@ def _run_server(args):
 
 
 def _run_cli(args):
-    """Run as CLI."""
+    """Run as CLI ."""
     from .dispatcher import run_cli_command
 
     try:
diff --git a/apps/server/src/dipeo_server/cli/handlers/event_forwarder.py b/cli/event_forwarder.py
similarity index 100%
rename from apps/server/src/dipeo_server/cli/handlers/event_forwarder.py
rename to cli/event_forwarder.py
diff --git a/apps/server/src/dipeo_server/cli/commands/integration_manager.py b/cli/integration_manager.py
similarity index 100%
rename from apps/server/src/dipeo_server/cli/commands/integration_manager.py
rename to cli/integration_manager.py
diff --git a/apps/server/src/dipeo_server/cli/handlers/interactive_handler.py b/cli/interactive_handler.py
similarity index 100%
rename from apps/server/src/dipeo_server/cli/handlers/interactive_handler.py
rename to cli/interactive_handler.py
diff --git a/apps/server/src/dipeo_server/cli/parser.py b/cli/parser.py
similarity index 100%
rename from apps/server/src/dipeo_server/cli/parser.py
rename to cli/parser.py
diff --git a/apps/server/src/dipeo_server/py.typed b/cli/py.typed
similarity index 100%
rename from apps/server/src/dipeo_server/py.typed
rename to cli/py.typed
diff --git a/cli/pyproject.toml b/cli/pyproject.toml
new file mode 100644
index 000000000..d59680023
--- /dev/null
+++ b/cli/pyproject.toml
@@ -0,0 +1,17 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "dipeo-cli"
+version = "1.0.0"
+description = "DiPeO Command-Line Interface"
+requires-python = ">=3.13"
+dependencies = []  # Dependencies managed by root pyproject.toml
+
+[project.scripts]
+dipeo = "cli.entry_point:main"
+dipeocc = "cli.entry_point:dipeocc_main"
+
+[tool.hatch.build.targets.wheel]
+packages = ["cli"]
diff --git a/apps/server/src/dipeo_server/cli/core/server_manager.py b/cli/server_manager.py
similarity index 99%
rename from apps/server/src/dipeo_server/cli/core/server_manager.py
rename to cli/server_manager.py
index 4ece3284a..715c9e7a9 100644
--- a/apps/server/src/dipeo_server/cli/core/server_manager.py
+++ b/cli/server_manager.py
@@ -44,7 +44,7 @@ async def start_async(self) -> None:
                 python_exe,
                 "-m",
                 "uvicorn",
-                "apps.server.main:app",
+                "server.main:app",
                 "--host",
                 self.host,
                 "--port",
@@ -96,7 +96,7 @@ async def start(self, timeout: int = 30) -> bool:
                 python_exe,
                 "-m",
                 "uvicorn",
-                "apps.server.main:app",
+                "server.main:app",
                 "--host",
                 self.host,
                 "--port",
diff --git a/apps/server/src/dipeo_server/cli/core/session_manager.py b/cli/session_manager.py
similarity index 100%
rename from apps/server/src/dipeo_server/cli/core/session_manager.py
rename to cli/session_manager.py
diff --git a/dipeo/diagram_generated/graphql/domain_types.py b/dipeo/diagram_generated/graphql/domain_types.py
index 43f735020..62ed1e580 100644
--- a/dipeo/diagram_generated/graphql/domain_types.py
+++ b/dipeo/diagram_generated/graphql/domain_types.py
@@ -925,6 +925,20 @@ class ExecutionStateType:
     @staticmethod
     def from_pydantic(obj: ExecutionState) -> "ExecutionStateType":
         """Convert from Pydantic model"""
+        # Serialize metrics to JSON-compatible dict
+        metrics_serialized = None
+        if obj.metrics:
+            if hasattr(obj.metrics, 'model_dump'):
+                # Pydantic model - convert to dict
+                metrics_serialized = obj.metrics.model_dump()
+            elif hasattr(obj.metrics, '__dict__'):
+                # Dataclass - convert to dict manually
+                from dataclasses import asdict
+                metrics_serialized = asdict(obj.metrics)
+            else:
+                # Already a dict or other JSON-serializable type
+                metrics_serialized = obj.metrics
+
         return ExecutionStateType(
             id=obj.id,
             status=obj.status,
@@ -939,5 +953,5 @@ def from_pydantic(obj: ExecutionState) -> "ExecutionStateType":
             node_outputs={k: v.model_dump() for k, v in obj.node_outputs.items()},
             variables=obj.variables,
             exec_counts=obj.exec_counts,
-            metrics=obj.metrics,
+            metrics=metrics_serialized,
         )
diff --git a/dipeo/infrastructure/execution/messaging/base_message_router.py b/dipeo/infrastructure/execution/messaging/base_message_router.py
index e3a689db8..1a36eeb77 100644
--- a/dipeo/infrastructure/execution/messaging/base_message_router.py
+++ b/dipeo/infrastructure/execution/messaging/base_message_router.py
@@ -290,7 +290,7 @@ async def _flush_batch(self, execution_id: str) -> None:
 
         # Try to publish to streaming manager if available
         try:
-            from dipeo_server.api.graphql.subscriptions import publish_execution_update
+            from server.api.graphql.subscriptions import publish_execution_update
 
             await publish_execution_update(execution_id, batch_message)
         except ImportError:
@@ -386,18 +386,22 @@ async def handle(self, event: DomainEvent) -> None:
                 )
                 # Start with full payload data and add UI-specific fields
                 ui_data = payload.get("data", {}) if isinstance(payload.get("data"), dict) else {}
-                ui_data.update({
-                    "node_id": event.scope.node_id,
-                    "status": node_status,
-                    "timestamp": event.occurred_at.isoformat(),
-                })
+                ui_data.update(
+                    {
+                        "node_id": event.scope.node_id,
+                        "status": node_status,
+                        "timestamp": event.occurred_at.isoformat(),
+                    }
+                )
 
                 ui_payload = {
                     "type": event.type.value,
                     "event_type": event.type.value,
                     "execution_id": str(event.scope.execution_id),
                     "data": ui_data,
-                    "meta": payload.get("metadata", {}),  # Include metadata (person_id, model, etc.)
+                    "meta": payload.get(
+                        "metadata", {}
+                    ),  # Include metadata (person_id, model, etc.)
                     "timestamp": event.occurred_at.isoformat(),
                 }
 
diff --git a/dipeo/infrastructure/storage/__init__.py b/dipeo/infrastructure/storage/__init__.py
index cde85c4ea..2b935d7e3 100644
--- a/dipeo/infrastructure/storage/__init__.py
+++ b/dipeo/infrastructure/storage/__init__.py
@@ -5,17 +5,20 @@
 - FileSystemPort: POSIX-like file operations (local filesystem)
 - ArtifactStorePort: High-level artifact management
 - DBOperationsDomainService: JSON-based database-like storage operations
+- MessageStore: SQLite-based message persistence for executions
 """
 
 from .artifacts.artifact_adapter import ArtifactStoreAdapter
 from .cloud.s3_adapter import S3Adapter
 from .json_db import DBOperationsDomainService
 from .local.local_adapter import LocalBlobAdapter, LocalFileSystemAdapter
+from .message_store import MessageStore
 
 __all__ = [
     "ArtifactStoreAdapter",
     "DBOperationsDomainService",
     "LocalBlobAdapter",
     "LocalFileSystemAdapter",
+    "MessageStore",
     "S3Adapter",
 ]
diff --git a/apps/server/src/dipeo_server/infra/message_store.py b/dipeo/infrastructure/storage/message_store.py
similarity index 99%
rename from apps/server/src/dipeo_server/infra/message_store.py
rename to dipeo/infrastructure/storage/message_store.py
index af590af53..62225b594 100644
--- a/apps/server/src/dipeo_server/infra/message_store.py
+++ b/dipeo/infrastructure/storage/message_store.py
@@ -14,7 +14,7 @@ def __init__(self, db_path: Path):
 
     async def initialize(self):
         async with aiosqlite.connect(self.db_path) as db:
-            await db.execute("""
+            await db.executescript("""
                 CREATE TABLE IF NOT EXISTS messages (
                     id TEXT PRIMARY KEY,
                     execution_id TEXT NOT NULL,
diff --git a/docs/agents/backend-development.md b/docs/agents/backend-development.md
index eab7349f3..46ab5ff6d 100644
--- a/docs/agents/backend-development.md
+++ b/docs/agents/backend-development.md
@@ -1,50 +1,62 @@
 # Backend Development Guide
 
-**Scope**: FastAPI server, CLI, database, and MCP integration in `apps/server/`
+**Scope**: FastAPI server, CLI, database, and MCP integration in `server/` and `cli/`
 
 ## Overview {#overview}
 
-You are an expert backend engineer specializing in DiPeO's server infrastructure, command-line interface, database persistence, and MCP (Model Context Protocol) integration. You own all code in the apps/server/ directory.
+You are an expert backend engineer specializing in DiPeO's server infrastructure, command-line interface, database persistence, and MCP (Model Context Protocol) integration. You own all code in the `server/` and `cli/` directories.
 
 ## Your Domain of Expertise {#domain-of-expertise}
 
-You are responsible for all backend infrastructure in apps/server/:
+You are responsible for all backend infrastructure in `server/` and `cli/`:
 
 ### Server Structure {#server-structure}
 ```
-apps/server/
+server/                        # FastAPI server
 ├── main.py                    # FastAPI app initialization
-├── src/dipeo_server/
-│   ├── api/                   # API layer
-│   │   ├── router.py              # API routes (includes GraphQL endpoint)
-│   │   ├── mcp_sdk_server/        # MCP server implementation
-│   │   │   ├── __init__.py
-│   │   │   ├── config.py          # MCP configuration
-│   │   │   ├── discovery.py       # MCP discovery
-│   │   │   ├── resources.py       # MCP resources
-│   │   │   ├── routers.py         # MCP routing
-│   │   │   └── tools.py           # MCP tools
-│   │   └── mcp_utils.py           # MCP utilities
-│   ├── cli/                   # Command-line interface
-│   │   ├── cli_runner.py          # Execution logic
-│   │   ├── entry_point.py         # CLI entry point
-│   │   ├── parser.py              # Argument parsing
-│   │   ├── dispatcher.py          # Command dispatch
-│   │   ├── query.py               # Query commands
-│   │   ├── compilation.py         # Compilation commands
-│   │   ├── conversion.py          # Conversion utilities
-│   │   ├── execution.py           # Execution commands
-│   │   ├── commands/              # Command implementations
-│   │   ├── core/                  # Core utilities (diagram loader, server/session managers)
-│   │   ├── display/               # Display formatting
-│   │   └── handlers/              # Command handlers
-│   └── infra/                 # Infrastructure
-│       └── message_store.py       # Message persistence
+├── app_context.py             # Application context and container
+├── bootstrap.py               # Bootstrap utilities
+├── api/                       # API layer
+│   ├── router.py              # API routes (includes GraphQL endpoint)
+│   ├── context.py             # API context
+│   ├── middleware.py          # API middleware
+│   ├── webhooks.py            # Webhook handlers
+│   ├── mcp/                   # MCP server implementation
+│   │   ├── __init__.py
+│   │   ├── config.py          # MCP configuration
+│   │   ├── discovery.py       # MCP discovery
+│   │   ├── resources.py       # MCP resources
+│   │   ├── routers.py         # MCP routing
+│   │   └── tools.py           # MCP tools
+│   └── mcp_utils.py           # MCP utilities
+└── schema.graphql             # GraphQL schema
+
+cli/                           # Command-line interface
+├── entry_point.py             # CLI entry point (main)
+├── parser.py                  # Argument parsing
+├── dispatcher.py              # Command dispatch
+├── cli_runner.py              # CLI runner orchestration
+├── diagram_loader.py          # Diagram loading
+├── server_manager.py          # Server lifecycle management
+├── session_manager.py         # Session management
+├── claude_code_manager.py     # Claude Code integration
+├── integration_manager.py     # Integration management
+├── interactive_handler.py     # Interactive user input
+├── event_forwarder.py         # Event forwarding
+├── commands/                  # Command implementations
+│   ├── execution.py           # Execution commands
+│   ├── query.py               # Query commands
+│   ├── compilation.py         # Compilation commands
+│   └── conversion.py          # Conversion utilities
+└── display/                   # Display formatting
+    ├── display.py             # Display manager
+    ├── metrics_display.py     # Metrics formatting
+    └── metrics_manager.py     # Metrics management
 ```
 
 ## Your Core Responsibilities {#core-responsibilities}
 
-### 1. FastAPI Server (apps/server/main.py, api/) {#fastapi-server}
+### 1. FastAPI Server (server/main.py, server/api/) {#fastapi-server}
 **YOU OWN** the FastAPI application and all HTTP endpoints.
 
 **GraphQL Endpoint**: Configuration and initialization, Strawberry GraphQL integration, query/mutation execution, WebSocket subscriptions, and error handling.
@@ -55,7 +67,7 @@ apps/server/
 
 **Health & Monitoring**: Health check endpoints, server status reporting, performance monitoring, and error tracking.
 
-### 2. CLI System (apps/server/cli/) {#cli-system}
+### 2. CLI System (cli/) {#cli-system}
 **YOU OWN** all command-line interface commands and workflow.
 
 #### Core Commands {#cli-commands}
@@ -188,8 +200,8 @@ dipeo dipeocc watch
 
 **Background Execution**: The `--background` flag enables async execution via subprocess isolation. When used, the runner spawns a subprocess with a unique execution_id and returns immediately with session status. The subprocess persists state to the database, preventing blocking while allowing results retrieval via `dipeo results exec_id`.
 
-### 3. Database & Persistence (apps/server/infra/) {#database-persistence}
-**YOU OWN** the SQLite database schema and message store.
+### 3. Database & Persistence {#database-persistence}
+**YOU OWN** the SQLite database schema coordination and CLI-related database operations.
 
 #### Database Schema {#database-schema}
 
@@ -268,7 +280,7 @@ CREATE INDEX IF NOT EXISTS idx_node ON messages(node_id);
 
 **Database Migrations**: When modifying schema, coordinate with dipeo-package-maintainer to update `PersistenceManager.init_schema()` in `/dipeo/infrastructure/execution/state/persistence_manager.py`, test migration on existing data, document changes via `make schema-docs`, and consider backward compatibility.
 
-### 4. MCP Server Integration (apps/server/api/mcp_sdk_server/) {#mcp-server}
+### 4. MCP Server Integration (server/api/mcp/) {#mcp-server}
 **YOU OWN** the MCP (Model Context Protocol) server implementation.
 
 **MCP Architecture**: DiPeO exposes diagrams and executions as MCP tools and resources via the official Python SDK, allowing AI assistants to execute workflows and access results. The MCP server runs over HTTP (not stdio) via `/mcp` endpoint for broad access and external integration via ngrok.
@@ -295,7 +307,7 @@ def run_diagram(self, diagram_path: str, light: bool = False, background: bool =
 
 **Background Execution Pattern**: Use `subprocess.Popen()` to spawn CLI subprocess with unique execution_id and diagram path.
 ```python
-subprocess.Popen(["python", "-m", "dipeo_server.cli.entry_point", "run", diagram_path, "--execution-id", execution_id])
+subprocess.Popen(["python", "-m", "cli.entry_point", "run", diagram_path, "--execution-id", execution_id])
 ```
 
 **Database Operation Pattern**: Use context manager for database connections, execute SQL with parameters, commit transactions.
@@ -355,12 +367,12 @@ Before completing any task:
 
 ## Key Files Reference {#key-files-reference}
 
-**Server**: `apps/server/main.py` (FastAPI app initialization), `apps/server/src/dipeo_server/api/router.py` (API routes with GraphQL endpoint)
+**Server**: `server/main.py` (FastAPI app initialization), `server/api/router.py` (API routes with GraphQL endpoint), `server/app_context.py` (application context)
 
-**CLI**: `apps/server/src/dipeo_server/cli/entry_point.py` (entry), `cli_runner.py` (execution logic), `parser.py` (argument parsing), `dispatcher.py` (command dispatch), `query.py` (query commands)
+**CLI**: `cli/entry_point.py` (main entry point), `cli/cli_runner.py` (execution logic), `cli/parser.py` (argument parsing), `cli/dispatcher.py` (command dispatch), `cli/commands/query.py` (query commands), `cli/commands/execution.py` (execution commands)
 
-**Database**: `/dipeo/infrastructure/execution/state/persistence_manager.py` (schema initialization - package-maintainer domain), `apps/server/src/dipeo_server/infra/message_store.py` (message persistence), `.dipeo/data/dipeo_state.db` (SQLite database)
+**Database**: `/dipeo/infrastructure/execution/state/persistence_manager.py` (schema initialization - package-maintainer domain), `.dipeo/data/dipeo_state.db` (SQLite database)
 
-**MCP**: `apps/server/src/dipeo_server/api/mcp_sdk_server/__init__.py` (server), `tools.py` (tools), `resources.py` (resources), `mcp_utils.py` (utilities)
+**MCP**: `server/api/mcp/__init__.py` (MCP server), `server/api/mcp/tools.py` (MCP tools), `server/api/mcp/resources.py` (MCP resources), `server/api/mcp_utils.py` (utilities)
 
 You are the guardian of DiPeO's backend infrastructure. Every CLI command, database operation, and API endpoint should be reliable, user-friendly, and well-documented. Your work directly impacts the developer experience and system reliability.
diff --git a/docs/agents/index.md b/docs/agents/index.md
index 9539c6f83..459658199 100644
--- a/docs/agents/index.md
+++ b/docs/agents/index.md
@@ -13,10 +13,10 @@ This directory contains detailed development guides for DiPeO's specialized Clau
 
 ### [Backend Development](backend-development.md) {#backend-development}
 **Agent**: `dipeo-backend`
-**Domain**: FastAPI server, CLI, database, and MCP integration in `apps/server/`
+**Domain**: FastAPI server, CLI, database, and MCP integration in `server/` and `cli/`
 **Key Areas**: FastAPI server, CLI commands, SQLite database, MCP server
 **Role**: Owns all backend infrastructure - server lifecycle, command-line interface, persistence, MCP protocol
-**Responsibilities**: GraphQL endpoint, dipeo run/results/metrics/compile/export commands, database schema, message store
+**Responsibilities**: GraphQL endpoint, dipeo run/results/metrics/compile/export commands, database coordination, MCP integration
 
 ### [Code Generation Pipeline](codegen-pipeline.md) {#code-generation-pipeline}
 **Agent**: `dipeo-codegen-pipeline`
diff --git a/docs/agents/package-maintainer.md b/docs/agents/package-maintainer.md
index a741e1999..9895c8977 100644
--- a/docs/agents/package-maintainer.md
+++ b/docs/agents/package-maintainer.md
@@ -49,9 +49,9 @@ You are responsible for runtime execution code in the /dipeo/ directory:
 - ❌ Code generation infrastructure (/dipeo/infrastructure/codegen/) → dipeo-codegen-pipeline
 - ❌ TypeScript model specifications (/dipeo/models/src/) → dipeo-codegen-pipeline
 - ❌ Generated code internals diagnosis → dipeo-codegen-pipeline
-- ❌ Backend server (apps/server/) → dipeo-backend
+- ❌ Backend server (server/ and cli/) → dipeo-backend
 - ❌ CLI commands → dipeo-backend
-- ❌ Database schema → dipeo-backend
+- ❌ Database schema coordination → dipeo-backend
 - ❌ MCP server → dipeo-backend
 
 ## Core Architectural Principles {#core-architectural-principles}
@@ -145,7 +145,7 @@ from dipeo.diagram_generated.generated_nodes import get_node_handler
 
 **To dipeo-codegen-pipeline**: Generated code doesn't provide expected APIs (IR builder/generation issues), generated code structure seems wrong (generation internals), need new node types or models (TypeScript specs), TypeScript spec questions (model design).
 
-**To dipeo-backend**: CLI command issues (apps/server/cli/), server startup/configuration (FastAPI server), database schema changes (apps/server/infra/), MCP server integration (MCP SDK server).
+**To dipeo-backend**: CLI command issues (cli/), server startup/configuration (FastAPI server at server/), database coordination, MCP server integration (server/api/mcp/).
 
 **To Architecture Docs**: Architecture questions (docs/architecture/), GraphQL layer questions (docs/architecture/detailed/graphql-layer.md).
 
diff --git a/docs/architecture/README.md b/docs/architecture/README.md
index be94d48f5..1dc3e13b2 100644
--- a/docs/architecture/README.md
+++ b/docs/architecture/README.md
@@ -9,7 +9,8 @@ DiPeO is an open-source platform that lets developers **design, run and monitor
 | Path                      | What it is                                         | Highlights                                                                              |
 | ------------------------- | -------------------------------------------------- | --------------------------------------------------------------------------------------- |
 | **`apps/web`**            | React 19 visual editor                             | Vite, TailwindCSS, @xyflow/react canvas, Apollo + GraphQL, TRPC, TanStack Query, Zustand state |
-| **`apps/server`**         | FastAPI / Strawberry-GraphQL backend + CLI         | Python 3.13, Hypercorn ASGI, GraphQL subscriptions, CLI at `src/dipeo_server/cli/`     |
+| **`server/`**             | FastAPI / Strawberry-GraphQL backend               | Python 3.13, Hypercorn ASGI, GraphQL subscriptions, MCP integration                    |
+| **`cli/`**                | Command-line tools (dipeo, dipeocc)                | User-facing CLI for diagram execution, compilation, and session conversion              |
 | **`dipeo/`**              | Core domain + application + infrastructure library | Execution engine, DI containers, adapters, code-gen output                              |
 | **`diagram_generated*/`** | Auto-generated code                                | Pydantic models, node handlers, GraphQL schema, TS hooks                                |
 
@@ -19,15 +20,15 @@ DiPeO is an open-source platform that lets developers **design, run and monitor
 
 DiPeO consists of three main applications that work together to provide a complete visual workflow platform:
 
-### Server (Backend API) - `apps/server`
+### Server (Backend API) - `server/`
 
 FastAPI server providing GraphQL and REST endpoints for diagram execution.
 
 **Architecture**:
-- **API Layer** (`src/dipeo_server/api/`): FastAPI/GraphQL adapters
-- **Infrastructure** (`src/dipeo_server/infra/`): State management, caching
+- **API Layer** (`api/`): FastAPI/GraphQL adapters, MCP integration
 - **Container** (`app_context.py`): Dependency injection configuration
 - **Entry Point**: `main.py` - FastAPI + Strawberry GraphQL server
+- **Bootstrap**: `bootstrap.py` - Infrastructure wiring and composition root
 
 **Key Features**:
 - **GraphQL API**: Strawberry-based with subscriptions at `/graphql`
@@ -48,14 +49,15 @@ React-based visual diagram editor. See @docs/agents/frontend-development.md for
 **Tech Stack**: React 19, XYFlow, Apollo Client, Zustand, TailwindCSS
 **Port**: 3000 (development)
 
-### CLI - `apps/server/src/dipeo_server/cli/`
+### CLI - `cli/`
 
-Command-line tool integrated into the server package.
+Command-line tools for diagram execution and management.
 
 **Key Components**:
-- **Server Manager**: Automatic backend lifecycle management
-- **Display System**: Rich terminal UI with GraphQL subscriptions
-- **Commands**: run, ask, claude_code (dipeocc), integrations, convert, metrics
+- **Server Manager**: Automatic backend lifecycle management for monitoring
+- **Display System**: Rich terminal UI with real-time execution updates
+- **Commands**: run, compile, results, metrics, export, convert
+- **DiPeOCC**: Claude Code session conversion tool (dipeocc command)
 
 **Usage**:
 ```bash
@@ -69,8 +71,9 @@ dipeo metrics --latest --breakdown
 
 | Layer                        | Purpose                                      | Key tech                                                                                                            |
 | ---------------------------- | -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------- |
-| **Front-end**<br>`apps/web`  | Drag-and-drop diagram editor, run monitor    | *React 19*, Vite, @xyflow/react, Apollo Client + `graphql-ws`, TRPC (utilities only), Zustand, TanStack Query, TailwindCSS |
-| **Backend**<br>`apps/server` | Exposes GraphQL API, orchestrates runs, CLI  | *Python 3.13*, FastAPI, Strawberry GraphQL, GraphQL subscriptions, Hypercorn, Pydantic v2, CLI tools               |
+| **Frontend**<br>`apps/web`   | Drag-and-drop diagram editor, run monitor    | *React 19*, Vite, @xyflow/react, Apollo Client + `graphql-ws`, TRPC (utilities only), Zustand, TanStack Query, TailwindCSS |
+| **Server**<br>`server/`      | Exposes GraphQL API, MCP integration         | *Python 3.13*, FastAPI, Strawberry GraphQL, GraphQL subscriptions, Hypercorn, Pydantic v2, MCP SDK                 |
+| **CLI**<br>`cli/`            | Command-line tools for diagram operations    | *Python 3.13*, Click (argument parsing), Rich (terminal UI), DiPeOCC session converter                             |
 | **Core library**<br>`dipeo/` | Domain models, execution engine, memory      | Event-driven architecture, async runtime, Pydantic, DI service registry                                             |
 
 ---
@@ -271,7 +274,7 @@ make dev-all
 ### Production
 
 * Build SPA: `pnpm build` → serve via CDN or mount under FastAPI.
-* Serve API: `hypercorn apps/server.main:app -w 4 -k uvloop` (or Uvicorn/Gunicorn).
+* Serve API: `hypercorn server.main:app -w 4 -k uvloop` (or Uvicorn/Gunicorn).
 * For multi-worker deployments, Redis is required for GraphQL subscriptions to work across workers.
 * Container images & Helm charts are provided in `/deploy/`.
 
diff --git a/docs/database-schema.md b/docs/database-schema.md
index bd50e320f..9984eb4e1 100644
--- a/docs/database-schema.md
+++ b/docs/database-schema.md
@@ -50,7 +50,7 @@ DiPeO uses SQLite databases for persistence with the following schema:
 
 ### `messages`
 
-**Source**: `apps/server/src/dipeo_server/infra/message_store.py`
+**Source**: Message store implementation in DiPeO's infrastructure layer
 
 **Primary Key**: `id`
 
diff --git a/docs/features/mcp-server-integration.md b/docs/features/mcp-server-integration.md
index bb5e2a588..e309f1cc3 100644
--- a/docs/features/mcp-server-integration.md
+++ b/docs/features/mcp-server-integration.md
@@ -52,7 +52,7 @@ DiPeO's MCP server uses three distinct implementation patterns, each optimized f
 
 **Implementation approach:**
 ```python
-cmd_args = [sys.executable, "-m", "dipeo_server.cli.entry_point", "command", ...]
+cmd_args = [sys.executable, "-m", "cli.entry_point", "command", ...]
 proc = await asyncio.create_subprocess_exec(*cmd_args, ...)
 ```
 
@@ -235,7 +235,7 @@ See [ChatGPT MCP Integration Guide](./chatgpt-mcp-integration.md) for complete s
 
 ## Available Tools {#available-tools}
 
-DiPeO exposes 6 MCP tools for diagram execution and management. For detailed parameter documentation and examples, use the MCP `tools/list` method or inspect the tool descriptions in [`apps/server/src/dipeo_server/api/mcp_sdk_server/tools.py`](../../apps/server/src/dipeo_server/api/mcp_sdk_server/tools.py).
+DiPeO exposes 6 MCP tools for diagram execution and management. For detailed parameter documentation and examples, use the MCP `tools/list` method or inspect the tool descriptions in [`server/api/mcp/tools.py`](../../server/api/mcp/tools.py).
 
 ### Execution Tools
 
@@ -458,6 +458,6 @@ export DIPEO_LOG_LEVEL=DEBUG
 - [Comprehensive Light Diagram Guide](../formats/comprehensive_light_diagram_guide.md) - Creating diagrams
 
 **Developer Resources:**
-- [MCP Tools Implementation](../../apps/server/src/dipeo_server/api/mcp_sdk_server/tools.py) - Source code with detailed docstrings
+- [MCP Tools Implementation](../../server/api/mcp/tools.py) - Source code with detailed docstrings
 - [Model Context Protocol Specification](https://spec.modelcontextprotocol.io/) - Official MCP spec
 - [DiPeO CLI Documentation](../developer-guide.md) - CLI command reference
diff --git a/docs/projects/code-generation-guide.md b/docs/projects/code-generation-guide.md
index cae456a2a..f05f4067d 100644
--- a/docs/projects/code-generation-guide.md
+++ b/docs/projects/code-generation-guide.md
@@ -53,7 +53,7 @@ projects/codegen/
    ↓
 4. Apply Staged Changes (make apply-syntax-only)
    ↓
-5. Export GraphQL Schema → /apps/server/schema.graphql
+5. Export GraphQL Schema → /server/schema.graphql
    ↓
 6. Generate TypeScript Types (pnpm codegen)
 ```
@@ -125,15 +125,15 @@ Use `make apply-syntax-only` or `make apply` to move staged backend code to acti
 
 ### Stage 5: Export GraphQL Schema {#stage-5-export-graphql-schema}
 
-**Command**: `make graphql-schema`  
-**Output**: `/apps/server/schema.graphql`
+**Command**: `make graphql-schema`
+**Output**: `/server/schema.graphql`
 
 Exports the complete GraphQL schema from the application layer, capturing all types and operations from the generated Strawberry types.
 
 ### Stage 6: GraphQL TypeScript Generation {#stage-6-graphql-typescript-generation}
 
-**Source**: `/apps/web/src/__generated__/queries/*.graphql` + `/apps/server/schema.graphql`  
-**Output**: `/apps/web/src/__generated__/graphql.tsx`  
+**Source**: `/apps/web/src/__generated__/queries/*.graphql` + `/server/schema.graphql`
+**Output**: `/apps/web/src/__generated__/graphql.tsx`
 **Command**: Automatic via `pnpm codegen`
 
 Generates fully typed:
diff --git a/docs/projects/dipeocc-guide.md b/docs/projects/dipeocc-guide.md
index f7bcc2985..45debe6ed 100644
--- a/docs/projects/dipeocc-guide.md
+++ b/docs/projects/dipeocc-guide.md
@@ -366,7 +366,7 @@ dipeo run projects/claude_code/latest.light.yaml --debug
   - Better end-to-end diff application without manual fixes
 
 ### CLI Command {#cli-command}
-- **Location**: `apps/server/src/dipeo_server/cli/commands/claude_code_command.py`
+- **Location**: `cli/commands/conversion.py`
 - **Functionality**: Orchestrates conversion process and file management
 
 
diff --git a/docs/projects/korean/code-generation-guide.md b/docs/projects/korean/code-generation-guide.md
index d6ea81198..899380bbd 100644
--- a/docs/projects/korean/code-generation-guide.md
+++ b/docs/projects/korean/code-generation-guide.md
@@ -20,7 +20,7 @@ DiPeO는 다이어그램 기반의 다단계 코드 생성 파이프라인을 
    ↓
 4. 스테이징 변경 적용 (make apply-syntax-only)
    ↓
-5. GraphQL 스키마 내보내기 → /apps/server/schema.graphql
+5. GraphQL 스키마 내보내기 → /server/schema.graphql
    ↓
 6. TypeScript 타입 생성 (pnpm codegen)
 ```
@@ -85,13 +85,13 @@ DiPeO는 다이어그램 기반의 다단계 코드 생성 파이프라인을 
 ### 4단계: GraphQL 스키마 내보내기
 
 **명령**: `make graphql-schema`
-**출력**: `/apps/server/schema.graphql`
+**출력**: `/server/schema.graphql`
 
 애플리케이션 계층에서 전체 GraphQL 스키마를 내보내며, 생성된 Strawberry 타입의 모든 타입과 오퍼레이션을 반영합니다.
 
 ### 5단계: GraphQL TypeScript 생성
 
-**소스**: `/apps/web/src/__generated__/queries/*.graphql` + `/apps/server/schema.graphql`
+**소스**: `/apps/web/src/__generated__/queries/*.graphql` + `/server/schema.graphql`
 **출력**: `/apps/web/src/__generated__/graphql.tsx`
 **명령**: `pnpm codegen`으로 자동 실행
 
diff --git a/files/progressive_plan.md b/files/progressive_plan.md
deleted file mode 100644
index 27f7f77c6..000000000
--- a/files/progressive_plan.md
+++ /dev/null
@@ -1,64 +0,0 @@
-6. 
-Natural-Language-First Diagram Synthesis
-Problem
-• Non-technical stakeholders still struggle translating requirements into node graphs.
-Solution
-• Add an LLM-powered “Sketch-to-Diagram” flow: users describe intent in plain English → the system drafts a runnable diagram, complete with typed inputs/outputs and stubbed nodes.
-Technical Approach
-• Fine-tune a code-aware LLM on <prompt, diagram-spec> pairs.
-• Provide an interactive chat overlay that explains every generated node and lets the user accept / refine pieces (“replace DB node with DynamoDB variant”).
-Impact
-• Democratizes automation; massively wider user base; accelerates prototyping.
-7. 
-Real-Time Collaborative Diagrams via CRDTs
-Problem
-• Diagram files live in Git; collaboration is async and merge-conflict-heavy.
-Solution
-• Represent diagrams as Y.js / Automerge CRDT documents that sync through WebSockets, enabling Google-Docs-style co-editing with cursor presence, comments, and live validation.
-Technical Approach
-• Diagram editor embeds a CRDT layer; backend persists deltas in Postgres or S3.
-• Compile-time type checker (see plan.md §1) runs incrementally on CRDT updates, surfacing errors instantly to all collaborators.
-Impact
-• Eliminates “my branch is stale” friction; makes DiPeO viable for product teams and pair-programming sessions.
-8. 
-Secure, Polyglot, WebAssembly Node Runtime
-Problem
-• Python/TS code-job nodes execute with full host privileges; adding new languages is hard.
-Solution
-• Compile node code (Python/Rust/Go/JS) to WASI-compatible WebAssembly and run inside a lightweight, capability-scoped sandbox.
-Technical Approach
-• Leverage Wasmtime with wasi-keyvalue & wasi-http proposals for IO.
-• Provide SDKs that transpile user snippets to wasm32-wasi at build time.
-• Expose a declarative permission manifest per node (net, fs, env) checked at runtime.
-Impact
-• Strong isolation, near-native speed, language freedom; prepares DiPeO for untrusted plugins and serverless edge deployment.
-9. 
-Adaptive Graph Scheduler with Reinforcement Learning
-Problem
-• Current execution order is heuristics-based; doesn’t learn from historical runs.
-Solution
-• Train an RL agent (e.g., PPO) that observes node topology + runtime metrics and outputs scheduling decisions (batch size, parallelism degree, pre-fetching).
-Technical Approach
-• State = graph features + recent performance; Action = priority queue ordering; Reward = throughput / latency composite.
-• Start with simulation using recorded traces, then deploy in “shadow mode” before taking control.
-Impact
-• Continuous performance gains; paves the way for self-optimising diagrams to focus on macro-structure while the scheduler handles micro-timing.
-10. 
-Distributed, Fault-Tolerant Diagram Mesh (“Diagram Federation”)
-Problem
-• Large diagrams or multi-tenant workloads saturate a single cluster.
-Solution
-• Partition diagrams into sub-graphs that are deployed to geographically or functionally separate runtime clusters, communicating over gRPC streams with schema-generated contracts.
-Technical Approach
-• Extend DiagramType (§1 plan.md) with “affinity”/“shard” metadata.
-• Build a control plane that manages sub-graph placement, health checks, retries and cross-region data replication.
-• Provide transparent proxy nodes that bridge sub-graphs while preserving type guarantees.
-Impact
-• Planet-scale workflows, graceful degradation, and locality optimisation for latency-sensitive pipelines.
-
-These five initiatives complement the original roadmap:
-
-• #6 & #7 target usability and collaboration.
-• #8 fortifies security while unlocking polyglot extensibility.
-• #9 delivers intelligent, self-learning performance wins.
-• #10 elevates DiPeO from “single-cluster tool” to “distributed operating system for diagrams”.
diff --git a/projects/mcp-diagrams/hello_world.yaml b/projects/mcp-diagrams/hello_world.yaml
new file mode 100644
index 000000000..94a85c376
--- /dev/null
+++ b/projects/mcp-diagrams/hello_world.yaml
@@ -0,0 +1,14 @@
+version: light
+nodes:
+  - label: start
+    type: start
+    position: {x: 100, y: 100}
+    trigger_mode: manual
+  - label: end
+    type: endpoint
+    position: {x: 300, y: 100}
+    file_format: txt
+connections:
+  - from: start
+    to: end
+    content_type: raw_text
diff --git a/projects/mcp-diagrams/test_fix.light.yaml b/projects/mcp-diagrams/test_fix.light.yaml
new file mode 100644
index 000000000..8da48b7ef
--- /dev/null
+++ b/projects/mcp-diagrams/test_fix.light.yaml
@@ -0,0 +1,41 @@
+version: light
+nodes:
+- label: start
+  type: start
+  position:
+    x: 100
+    y: 100
+  trigger_mode: manual
+
+- label: test_job
+  type: person_job
+  position:
+    x: 300
+    y: 100
+  default_prompt: Say "Hello from MCP test!"
+  max_iteration: 1
+  memorize_to: ALL_MESSAGES
+  person: test_person
+
+- label: endpoint
+  type: endpoint
+  position:
+    x: 500
+    y: 100
+  file_format: txt
+  save_to_file: false
+
+connections:
+- from: start
+  to: test_job
+  content_type: raw_text
+
+- from: test_job
+  to: endpoint
+  content_type: raw_text
+
+persons:
+  test_person:
+    service: openai
+    model: gpt-5-nano-2025-08-07
+    api_key_id: APIKEY_52609F
diff --git a/pyproject.toml b/pyproject.toml
index 058ae7f63..37374b3e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,8 +110,8 @@ dependencies = [
     "yarl==1.20.1",
     "datamodel-code-generator>=0.21.0",
     "ollama>=0.5.0",
-    "mcp>=1.16.0",  # Official MCP Python SDK
-    "claude-agent-sdk @ git+https://github.com/anthropics/claude-agent-sdk-python.git",
+    "mcp>=1.16.0", # Official MCP Python SDK
+    "claude-agent-sdk==0.1.6",
     "rich>=13.0.0",
     "psutil==7.1.0",
     # Workspace packages (automatically installed as editable by uv)
@@ -158,11 +158,12 @@ dev-dependencies = [
 ]
 
 [tool.uv.workspace]
-members = ["dipeo", "apps/server"]
+members = ["dipeo", "cli", "server"]
 
 # UV workspace members are automatically installed as editable
 
 [tool.uv.sources]
+dipeo-cli = { workspace = true }
 dipeo-server = { workspace = true }
 
 # Tool configurations (inherited from existing configs)
@@ -335,7 +336,7 @@ extend-exclude = '''
 [tool.isort]
 profile = "black"
 skip_glob = ["*/__generated__/*", "*/generated/*", "*/diagram_generated/*", "*/diagram_generated_staged/*"]
-known_first_party = ["dipeo", "dipeo_server"]
+known_first_party = ["dipeo", "cli", "server"]
 sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
 multi_line_output = 3
 include_trailing_comma = true
@@ -344,7 +345,7 @@ use_parentheses = true
 line_length = 88
 
 [tool.pytest.ini_options]
-testpaths = ["tests", "apps/server/tests"]
+testpaths = ["tests"]
 python_files = ["test_*.py", "*_test.py"]
 addopts = "-v"
 asyncio_mode = "auto"
diff --git a/server/__init__.py b/server/__init__.py
new file mode 100644
index 000000000..77ad854eb
--- /dev/null
+++ b/server/__init__.py
@@ -0,0 +1,41 @@
+"""DiPeO Backend API Server.
+
+This package provides the API service for DiPeO:
+- FastAPI server with GraphQL endpoint
+- MCP (Model Context Protocol) integration
+- Webhook endpoints
+- Execution monitoring and management API
+
+The server is a consumer of the dipeo core library, providing
+an HTTP/GraphQL interface for diagram operations, execution
+management, and integration with external services.
+"""
+
+import sys
+from types import ModuleType
+
+__version__ = "1.0.0"
+
+# Backward compatibility: Make this module accessible as 'dipeo_server'
+# to support legacy imports (e.g., 'from dipeo_server import ...')
+# This allows a gradual migration from the old module structure
+_this_module = sys.modules[__name__]
+sys.modules["dipeo_server"] = _this_module
+
+# Also create a dipeo_server namespace that mirrors this module's structure
+# This handles imports like 'from dipeo_server.api import ...'
+class _CompatModule(ModuleType):
+    """Compatibility module that redirects to the new server module."""
+
+    def __getattr__(self, name):
+        # Import the actual module from server.*
+        try:
+            actual_module = __import__(f"server.{name}", fromlist=[name])
+            return actual_module
+        except ImportError:
+            raise AttributeError(f"module 'dipeo_server' has no attribute '{name}'")
+
+# Set up the compatibility module
+_compat_module = _CompatModule("dipeo_server")
+_compat_module.__path__ = _this_module.__path__ if hasattr(_this_module, '__path__') else []
+sys.modules["dipeo_server"] = _compat_module
diff --git a/apps/server/src/dipeo_server/__main__.py b/server/__main__.py
similarity index 58%
rename from apps/server/src/dipeo_server/__main__.py
rename to server/__main__.py
index 41739c838..cd2906d4b 100644
--- a/apps/server/src/dipeo_server/__main__.py
+++ b/server/__main__.py
@@ -1,10 +1,6 @@
 """Entry point for dipeo-server when run as a module."""
 
-import sys
-from pathlib import Path
-
-sys.path.insert(0, str(Path(__file__).parent.parent.parent))
-from main import start
+from .main import start
 
 
 def main():
diff --git a/apps/server/src/dipeo_server/api/__init__.py b/server/api/__init__.py
similarity index 100%
rename from apps/server/src/dipeo_server/api/__init__.py
rename to server/api/__init__.py
diff --git a/apps/server/src/dipeo_server/api/context.py b/server/api/context.py
similarity index 97%
rename from apps/server/src/dipeo_server/api/context.py
rename to server/api/context.py
index 7f29ae544..7b0b6d87b 100644
--- a/apps/server/src/dipeo_server/api/context.py
+++ b/server/api/context.py
@@ -47,7 +47,7 @@ def can_read_api_keys(self) -> bool:
 
 def get_request_context(request_or_ws=None):
     """Create request context for HTTP or WebSocket connections."""
-    from dipeo_server.app_context import get_container
+    from server.app_context import get_container
 
     container = get_container()
 
diff --git a/apps/server/src/dipeo_server/api/mcp_sdk_server/__init__.py b/server/api/mcp/__init__.py
similarity index 100%
rename from apps/server/src/dipeo_server/api/mcp_sdk_server/__init__.py
rename to server/api/mcp/__init__.py
diff --git a/apps/server/src/dipeo_server/api/mcp_sdk_server/config.py b/server/api/mcp/config.py
similarity index 80%
rename from apps/server/src/dipeo_server/api/mcp_sdk_server/config.py
rename to server/api/mcp/config.py
index 37f68b385..f2bc14d49 100644
--- a/apps/server/src/dipeo_server/api/mcp_sdk_server/config.py
+++ b/server/api/mcp/config.py
@@ -7,7 +7,7 @@
 
 DEFAULT_MCP_TIMEOUT = int(os.environ.get("MCP_DEFAULT_TIMEOUT", "300"))
 
-PROJECT_ROOT = Path(__file__).parent.parent.parent.parent.parent.parent
+PROJECT_ROOT = Path(__file__).parent.parent.parent
 
 mcp_server = FastMCP(
     name="dipeo-mcp-server",
diff --git a/apps/server/src/dipeo_server/api/mcp_sdk_server/discovery.py b/server/api/mcp/discovery.py
similarity index 100%
rename from apps/server/src/dipeo_server/api/mcp_sdk_server/discovery.py
rename to server/api/mcp/discovery.py
diff --git a/apps/server/src/dipeo_server/api/mcp_sdk_server/resources.py b/server/api/mcp/resources.py
similarity index 100%
rename from apps/server/src/dipeo_server/api/mcp_sdk_server/resources.py
rename to server/api/mcp/resources.py
diff --git a/apps/server/src/dipeo_server/api/mcp_sdk_server/routers.py b/server/api/mcp/routers.py
similarity index 100%
rename from apps/server/src/dipeo_server/api/mcp_sdk_server/routers.py
rename to server/api/mcp/routers.py
diff --git a/apps/server/src/dipeo_server/api/mcp_sdk_server/tools.py b/server/api/mcp/tools.py
similarity index 93%
rename from apps/server/src/dipeo_server/api/mcp_sdk_server/tools.py
rename to server/api/mcp/tools.py
index 4727c2a9f..9560648d7 100644
--- a/apps/server/src/dipeo_server/api/mcp_sdk_server/tools.py
+++ b/server/api/mcp/tools.py
@@ -67,10 +67,14 @@ async def run_backend(
     if input_data is None:
         input_data = {}
 
+    # Default to mcp-diagrams directory if diagram is just a name (no path separators)
+    if "/" not in diagram and "\\" not in diagram:
+        diagram = f"projects/mcp-diagrams/{diagram}"
+
     cmd_args = [
         sys.executable,
         "-m",
-        "dipeo_server.cli.entry_point",
+        "cli.entry_point",
         "run",
         diagram,
         "--background",
@@ -99,7 +103,7 @@ async def run_backend(
         # Add timeout to prevent hanging
         try:
             stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=60.0)
-        except asyncio.TimeoutError:
+        except TimeoutError:
             proc.kill()
             await proc.wait()
             result = {
@@ -184,54 +188,22 @@ async def run_backend(
 """
 )
 async def see_result(session_id: str) -> list[TextContent]:
-    try:
-        cmd_args = [
-            sys.executable,
-            "-m",
-            "dipeo_server.cli.entry_point",
-            "results",
-            session_id,
-            "--verbose",
-        ]
-
-        proc = await asyncio.create_subprocess_exec(
-            *cmd_args,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE,
-        )
-
-        # Add timeout to prevent hanging
-        try:
-            stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30.0)
-        except asyncio.TimeoutError:
-            proc.kill()
-            await proc.wait()
-            error_result = {
-                "success": False,
-                "session_id": session_id,
-                "error": "Result retrieval timed out after 30 seconds",
-            }
-            return [TextContent(type="text", text=json.dumps(error_result, indent=2))]
+    from cli import CLIRunner
+    from server.app_context import get_container
 
-        output = stdout.decode().strip()
-        cli_result = json.loads(output)
+    try:
+        container = get_container()
+        cli = CLIRunner(container)
 
-        if "error" in cli_result:
-            result = {
-                "success": False,
-                "session_id": session_id,
-                "error": cli_result["error"],
-            }
-            return [TextContent(type="text", text=json.dumps(result, indent=2))]
+        result_data = await cli.query.get_results_data(session_id, verbose=True)
 
-        return [TextContent(type="text", text=json.dumps(cli_result, indent=2))]
+        return [TextContent(type="text", text=json.dumps(result_data, indent=2))]
 
     except Exception as e:
         logger.error(f"Error retrieving result for {session_id}: {e}", exc_info=True)
         error_result = {
-            "success": False,
-            "session_id": session_id,
             "error": f"Error retrieving result: {e!s}",
+            "session_id": session_id,
         }
         return [TextContent(type="text", text=json.dumps(error_result, indent=2))]
 
@@ -283,6 +255,10 @@ async def dipeo_run(
     if input_data is None:
         input_data = {}
 
+    # Default to mcp-diagrams directory if diagram is just a name (no path separators)
+    if "/" not in diagram and "\\" not in diagram:
+        diagram = f"projects/mcp-diagrams/{diagram}"
+
     arguments = {
         "diagram": diagram,
         "input_data": input_data,
@@ -367,7 +343,6 @@ async def _execute_diagram(arguments: dict[str, Any]) -> list[TextContent]:
 """
 )
 async def search(query: str) -> list[TextContent]:
-
     def search_diagrams(search_query: str):
         results = []
         search_lower = search_query.lower()
@@ -470,7 +445,6 @@ def search_diagrams(search_query: str):
 """
 )
 async def fetch(uri: str) -> list[TextContent]:
-
     def fetch_diagram_content(diagram_uri: str):
         if diagram_uri.startswith("dipeo://diagrams/"):
             diagram_name = diagram_uri.replace("dipeo://diagrams/", "")
@@ -617,7 +591,7 @@ async def compile_diagram(
         cmd_args = [
             sys.executable,
             "-m",
-            "dipeo_server.cli.entry_point",
+            "cli.entry_point",
             "compile",
             "--stdin",
             "--json",
@@ -645,7 +619,7 @@ async def compile_diagram(
             stdout, stderr = await asyncio.wait_for(
                 proc.communicate(input=diagram_content.encode()), timeout=30.0
             )
-        except asyncio.TimeoutError:
+        except TimeoutError:
             proc.kill()
             await proc.wait()
             result = {
diff --git a/apps/server/src/dipeo_server/api/mcp_utils.py b/server/api/mcp_utils.py
similarity index 97%
rename from apps/server/src/dipeo_server/api/mcp_utils.py
rename to server/api/mcp_utils.py
index cdb4532a4..394239f2d 100644
--- a/apps/server/src/dipeo_server/api/mcp_utils.py
+++ b/server/api/mcp_utils.py
@@ -10,9 +10,9 @@
 from dataclasses import dataclass
 from typing import Any, Optional
 
+from cli import CLIRunner
 from dipeo.config.base_logger import get_module_logger
-from dipeo_server.app_context import get_container
-from dipeo_server.cli import CLIRunner
+from server.app_context import get_container
 
 logger = get_module_logger(__name__)
 
diff --git a/apps/server/src/dipeo_server/api/middleware.py b/server/api/middleware.py
similarity index 100%
rename from apps/server/src/dipeo_server/api/middleware.py
rename to server/api/middleware.py
diff --git a/apps/server/src/dipeo_server/api/router.py b/server/api/router.py
similarity index 92%
rename from apps/server/src/dipeo_server/api/router.py
rename to server/api/router.py
index 1b7835577..c659afb59 100644
--- a/apps/server/src/dipeo_server/api/router.py
+++ b/server/api/router.py
@@ -10,13 +10,13 @@
 from dipeo.application.graphql import create_schema
 
 from .context import get_request_context
-from .mcp_sdk_server import create_info_router, create_messages_router
+from .mcp import create_info_router, create_messages_router
 from .webhooks import router as webhook_router
 
 
 def create_graphql_router(context_getter=None, container=None):
     """Create a GraphQL router with monitoring stream support."""
-    from dipeo_server.app_context import get_container
+    from server.app_context import get_container
 
     if not container:
         container = get_container()
diff --git a/apps/server/src/dipeo_server/api/webhooks.py b/server/api/webhooks.py
similarity index 98%
rename from apps/server/src/dipeo_server/api/webhooks.py
rename to server/api/webhooks.py
index 40341d68b..3ade4e9b6 100644
--- a/apps/server/src/dipeo_server/api/webhooks.py
+++ b/server/api/webhooks.py
@@ -209,7 +209,7 @@ async def receive_webhook(provider: str, request: Request, response: Response) -
     """Receive and process provider webhooks with signature validation and event emission."""
     try:
         from dipeo.application.registry.keys import EVENT_BUS, PROVIDER_REGISTRY
-        from dipeo_server.app_context import get_container
+        from server.app_context import get_container
 
         container = get_container()
         registry = container.registry.resolve(PROVIDER_REGISTRY)
@@ -261,7 +261,7 @@ async def receive_webhook(provider: str, request: Request, response: Response) -
 async def test_webhook_endpoint(provider: str) -> dict[str, Any]:
     """Test endpoint returning webhook configuration and supported events."""
     try:
-        from dipeo_server.app_context import get_container
+        from server.app_context import get_container
 
         container = get_container()
         registry = container.registry.get("provider_registry")
diff --git a/apps/server/src/dipeo_server/app_context.py b/server/app_context.py
similarity index 99%
rename from apps/server/src/dipeo_server/app_context.py
rename to server/app_context.py
index f08cb4255..e274a9de7 100644
--- a/apps/server/src/dipeo_server/app_context.py
+++ b/server/app_context.py
@@ -22,11 +22,6 @@ async def create_server_container() -> Container:
     settings = get_settings()
     container = Container(settings)
 
-    from apps.server.bootstrap import (
-        bootstrap_services,
-        execute_event_subscriptions,
-        wire_feature_flags,
-    )
     from dipeo.application.registry.keys import (
         CLI_SESSION_SERVICE,
         EVENT_BUS,
@@ -35,6 +30,12 @@ async def create_server_container() -> Container:
         STATE_STORE,
     )
 
+    from .bootstrap import (
+        bootstrap_services,
+        execute_event_subscriptions,
+        wire_feature_flags,
+    )
+
     bootstrap_services(container.registry, redis_client=None)
 
     features = os.getenv("DIPEO_FEATURES", "").split(",") if os.getenv("DIPEO_FEATURES") else []
diff --git a/apps/server/bootstrap.py b/server/bootstrap.py
similarity index 100%
rename from apps/server/bootstrap.py
rename to server/bootstrap.py
diff --git a/apps/server/main.py b/server/main.py
similarity index 96%
rename from apps/server/main.py
rename to server/main.py
index 882da8f0d..bbc141b62 100644
--- a/apps/server/main.py
+++ b/server/main.py
@@ -4,15 +4,15 @@
 from contextlib import asynccontextmanager
 from pathlib import Path
 
-from dipeo_server.api.middleware import setup_middleware
-from dipeo_server.api.router import setup_routes
-from dipeo_server.app_context import initialize_container_async
 from dotenv import load_dotenv
 from fastapi import FastAPI, Request
 from fastapi.responses import Response
 
 from dipeo.application.bootstrap import init_resources, shutdown_resources
 from dipeo.infrastructure.logging_config import setup_logging
+from server.api.middleware import setup_middleware
+from server.api.router import setup_routes
+from server.app_context import initialize_container_async
 
 
 def setup_bundled_paths():
diff --git a/server/py.typed b/server/py.typed
new file mode 100644
index 000000000..e69de29bb
diff --git a/apps/server/pyproject.toml b/server/pyproject.toml
similarity index 64%
rename from apps/server/pyproject.toml
rename to server/pyproject.toml
index 252d5a4bb..83d1ac40a 100644
--- a/apps/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -10,9 +10,7 @@ requires-python = ">=3.13"
 dependencies = []  # Dependencies managed by root pyproject.toml
 
 [project.scripts]
-dipeo-server = "dipeo_server.__main__:main"
-dipeo = "dipeo_server.cli.entry_point:main"
-dipeocc = "dipeo_server.cli.entry_point:dipeocc_main"
+dipeo-server = "server.__main__:main"
 
 [tool.hatch.build.targets.wheel]
-packages = ["src/dipeo_server"]
+packages = ["server"]
diff --git a/apps/server/schema.graphql b/server/schema.graphql
similarity index 100%
rename from apps/server/schema.graphql
rename to server/schema.graphql
diff --git a/tests/api/__init__.py b/tests/api/__init__.py
deleted file mode 100644
index 7e5331416..000000000
--- a/tests/api/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for API modules."""
diff --git a/tests/api/auth/__init__.py b/tests/api/auth/__init__.py
deleted file mode 100644
index c7c9789e5..000000000
--- a/tests/api/auth/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for API auth modules."""
diff --git a/tests/api/auth/test_dependencies.py b/tests/api/auth/test_dependencies.py
deleted file mode 100644
index 9a30b01c7..000000000
--- a/tests/api/auth/test_dependencies.py
+++ /dev/null
@@ -1,360 +0,0 @@
-"""Tests for FastAPI authentication dependencies.
-
-These tests verify the authentication dependency behavior including:
-- Optional authentication
-- Required authentication
-- Bearer token authentication
-- API key authentication
-"""
-
-import os
-import time
-from unittest.mock import AsyncMock, Mock, patch
-
-import jwt
-import pytest
-from fastapi import HTTPException, Request
-
-from dipeo_server.api.auth.dependencies import (
-    get_current_user,
-    optional_authentication,
-    require_authentication,
-)
-from dipeo_server.api.auth.oauth import TokenData
-
-
-@pytest.fixture
-def mock_request():
-    """Create a mock FastAPI request."""
-    request = Mock(spec=Request)
-    request.headers = {}
-    return request
-
-
-@pytest.fixture
-def hs256_secret():
-    """HS256 secret key for testing."""
-    return "test-secret-key-for-hs256"
-
-
-@pytest.fixture
-def valid_jwt_token(hs256_secret):
-    """Create a valid JWT token."""
-    payload = {
-        "sub": "test-user",
-        "email": "test@example.com",
-        "exp": int(time.time()) + 3600,
-    }
-    return jwt.encode(payload, hs256_secret, algorithm="HS256")
-
-
-class TestGetCurrentUser:
-    """Test get_current_user dependency."""
-
-    @pytest.mark.asyncio
-    async def test_auth_disabled(self, mock_request):
-        """Test that authentication is bypassed when disabled."""
-        with patch.dict(os.environ, {"MCP_AUTH_ENABLED": "false"}, clear=True):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            user = await get_current_user(mock_request, None)
-            assert user is None
-
-    @pytest.mark.asyncio
-    async def test_valid_jwt_token(self, mock_request, valid_jwt_token, hs256_secret):
-        """Test authentication with valid JWT token."""
-        # Create bearer credentials mock
-        credentials = Mock()
-        credentials.credentials = valid_jwt_token
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_AUTH_ENABLED": "true",
-                "MCP_JWT_ENABLED": "true",
-                "MCP_JWT_ALGORITHM": "HS256",
-                "MCP_JWT_SECRET": hs256_secret,
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            user = await get_current_user(mock_request, credentials)
-            assert user is not None
-            assert user.sub == "test-user"
-            assert user.email == "test@example.com"
-
-    @pytest.mark.asyncio
-    async def test_valid_api_key(self, mock_request):
-        """Test authentication with valid API key."""
-        test_api_key = "test-api-key-12345"
-        mock_request.headers = {"X-API-Key": test_api_key}
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_AUTH_ENABLED": "true",
-                "MCP_API_KEY_ENABLED": "true",
-                "MCP_API_KEYS": test_api_key,
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            user = await get_current_user(mock_request, None)
-            assert user is not None
-            assert user.sub == "api_key_user"
-
-    @pytest.mark.asyncio
-    async def test_invalid_jwt_optional_auth(
-        self, mock_request, hs256_secret
-    ):
-        """Test that invalid JWT is ignored when auth is optional."""
-        credentials = Mock()
-        credentials.credentials = "invalid.jwt.token"
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_AUTH_ENABLED": "true",
-                "MCP_AUTH_REQUIRED": "false",  # Optional
-                "MCP_JWT_ENABLED": "true",
-                "MCP_JWT_ALGORITHM": "HS256",
-                "MCP_JWT_SECRET": hs256_secret,
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            # Should return None, not raise exception
-            user = await get_current_user(mock_request, credentials)
-            assert user is None
-
-    @pytest.mark.asyncio
-    async def test_invalid_jwt_required_auth(
-        self, mock_request, hs256_secret
-    ):
-        """Test that invalid JWT raises exception when auth is required."""
-        credentials = Mock()
-        credentials.credentials = "invalid.jwt.token"
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_AUTH_ENABLED": "true",
-                "MCP_AUTH_REQUIRED": "true",  # Required
-                "MCP_JWT_ENABLED": "true",
-                "MCP_JWT_ALGORITHM": "HS256",
-                "MCP_JWT_SECRET": hs256_secret,
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            with pytest.raises(HTTPException) as exc_info:
-                await get_current_user(mock_request, credentials)
-
-            assert exc_info.value.status_code == 401
-
-    @pytest.mark.asyncio
-    async def test_invalid_api_key_optional_auth(self, mock_request):
-        """Test that invalid API key is ignored when auth is optional."""
-        mock_request.headers = {"X-API-Key": "invalid-key"}
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_AUTH_ENABLED": "true",
-                "MCP_AUTH_REQUIRED": "false",  # Optional
-                "MCP_API_KEY_ENABLED": "true",
-                "MCP_API_KEYS": "valid-key",
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            user = await get_current_user(mock_request, None)
-            assert user is None
-
-    @pytest.mark.asyncio
-    async def test_invalid_api_key_required_auth(self, mock_request):
-        """Test that invalid API key raises exception when auth is required."""
-        mock_request.headers = {"X-API-Key": "invalid-key"}
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_AUTH_ENABLED": "true",
-                "MCP_AUTH_REQUIRED": "true",  # Required
-                "MCP_API_KEY_ENABLED": "true",
-                "MCP_API_KEYS": "valid-key",
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            with pytest.raises(HTTPException) as exc_info:
-                await get_current_user(mock_request, None)
-
-            assert exc_info.value.status_code == 401
-            assert "Invalid API key" in exc_info.value.detail
-
-    @pytest.mark.asyncio
-    async def test_no_credentials_optional_auth(self, mock_request):
-        """Test that no credentials is accepted when auth is optional."""
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_AUTH_ENABLED": "true",
-                "MCP_AUTH_REQUIRED": "false",  # Optional
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            user = await get_current_user(mock_request, None)
-            assert user is None
-
-    @pytest.mark.asyncio
-    async def test_no_credentials_required_auth(self, mock_request):
-        """Test that no credentials raises exception when auth is required."""
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_AUTH_ENABLED": "true",
-                "MCP_AUTH_REQUIRED": "true",  # Required
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            with pytest.raises(HTTPException) as exc_info:
-                await get_current_user(mock_request, None)
-
-            assert exc_info.value.status_code == 401
-            assert "Authentication required" in exc_info.value.detail
-
-    @pytest.mark.asyncio
-    async def test_www_authenticate_header(self, mock_request):
-        """Test that WWW-Authenticate header is included in 401 responses."""
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_AUTH_ENABLED": "true",
-                "MCP_AUTH_REQUIRED": "true",
-                "MCP_OAUTH_SERVER_URL": "https://auth.example.com",
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            with pytest.raises(HTTPException) as exc_info:
-                await get_current_user(mock_request, None)
-
-            assert exc_info.value.status_code == 401
-            assert "WWW-Authenticate" in exc_info.value.headers
-            assert "Bearer" in exc_info.value.headers["WWW-Authenticate"]
-            assert (
-                "auth.example.com" in exc_info.value.headers["WWW-Authenticate"]
-            )
-
-    @pytest.mark.asyncio
-    async def test_jwt_takes_precedence_over_api_key(
-        self, mock_request, valid_jwt_token, hs256_secret
-    ):
-        """Test that JWT authentication is tried before API key."""
-        test_api_key = "test-api-key"
-        mock_request.headers = {"X-API-Key": test_api_key}
-
-        credentials = Mock()
-        credentials.credentials = valid_jwt_token
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_AUTH_ENABLED": "true",
-                "MCP_JWT_ENABLED": "true",
-                "MCP_JWT_ALGORITHM": "HS256",
-                "MCP_JWT_SECRET": hs256_secret,
-                "MCP_API_KEY_ENABLED": "true",
-                "MCP_API_KEYS": test_api_key,
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            user = await get_current_user(mock_request, credentials)
-            # Should authenticate via JWT, not API key
-            assert user.sub == "test-user"
-            assert user.sub != "api_key_user"
-
-
-class TestOptionalAuthentication:
-    """Test optional_authentication dependency."""
-
-    @pytest.mark.asyncio
-    async def test_optional_authentication_with_user(self):
-        """Test optional authentication when user is authenticated."""
-        mock_user = TokenData(sub="test-user")
-
-        result = await optional_authentication(mock_user)
-        assert result == mock_user
-
-    @pytest.mark.asyncio
-    async def test_optional_authentication_without_user(self):
-        """Test optional authentication when user is not authenticated."""
-        result = await optional_authentication(None)
-        assert result is None
-
-
-class TestRequireAuthentication:
-    """Test require_authentication dependency."""
-
-    @pytest.mark.asyncio
-    async def test_require_authentication_with_user(self):
-        """Test required authentication when user is authenticated."""
-        mock_user = TokenData(sub="test-user")
-
-        result = await require_authentication(mock_user)
-        assert result == mock_user
-
-    @pytest.mark.asyncio
-    async def test_require_authentication_without_user(self):
-        """Test required authentication when user is not authenticated."""
-        with patch.dict(
-            os.environ,
-            {"MCP_OAUTH_SERVER_URL": "https://auth.example.com"},
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            with pytest.raises(HTTPException) as exc_info:
-                await require_authentication(None)
-
-            assert exc_info.value.status_code == 401
-            assert "Authentication required" in exc_info.value.detail
-            assert "WWW-Authenticate" in exc_info.value.headers
diff --git a/tests/api/auth/test_oauth.py b/tests/api/auth/test_oauth.py
deleted file mode 100644
index 3e8ce557b..000000000
--- a/tests/api/auth/test_oauth.py
+++ /dev/null
@@ -1,559 +0,0 @@
-"""Tests for OAuth 2.1 authentication module.
-
-These tests verify the security-critical authentication functionality including:
-- API key validation with timing attack resistance
-- JWT token validation
-- OAuth configuration
-- Error handling
-"""
-
-import os
-import secrets
-import time
-from typing import Any
-from unittest.mock import Mock, patch
-
-import jwt
-import pytest
-from fastapi import HTTPException
-
-from dipeo_server.api.auth.oauth import (
-    MCPOAuthConfig,
-    TokenData,
-    get_oauth_config,
-    verify_api_key,
-    verify_jwt_token,
-)
-
-
-class TestMCPOAuthConfig:
-    """Test OAuth configuration."""
-
-    def test_from_env_defaults(self):
-        """Test default configuration values."""
-        with patch.dict(os.environ, {}, clear=True):
-            config = MCPOAuthConfig.from_env()
-
-            assert config.enabled is True
-            assert config.require_auth is False
-            assert config.api_key_enabled is True
-            assert config.jwt_enabled is True
-            assert config.jwt_algorithm == "RS256"
-            assert len(config.api_keys) == 0
-
-    def test_from_env_custom_values(self):
-        """Test configuration from environment variables."""
-        env_vars = {
-            "MCP_AUTH_ENABLED": "false",
-            "MCP_AUTH_REQUIRED": "true",
-            "MCP_API_KEY_ENABLED": "false",
-            "MCP_API_KEYS": "key1, key2, key3",
-            "MCP_JWT_ENABLED": "false",
-            "MCP_JWT_ALGORITHM": "HS256",
-            "MCP_JWT_SECRET": "test-secret",
-            "MCP_JWT_AUDIENCE": "test-audience",
-            "MCP_JWT_ISSUER": "test-issuer",
-            "MCP_OAUTH_SERVER_URL": "https://auth.example.com",
-        }
-
-        with patch.dict(os.environ, env_vars, clear=True):
-            config = MCPOAuthConfig.from_env()
-
-            assert config.enabled is False
-            assert config.require_auth is True
-            assert config.api_key_enabled is False
-            assert config.api_keys == {"key1", "key2", "key3"}
-            assert config.jwt_enabled is False
-            assert config.jwt_algorithm == "HS256"
-            assert config.jwt_secret == "test-secret"
-            assert config.jwt_audience == "test-audience"
-            assert config.jwt_issuer == "test-issuer"
-            assert config.authorization_server_url == "https://auth.example.com"
-
-    def test_from_env_api_keys_parsing(self):
-        """Test API keys parsing with various formats."""
-        test_cases = [
-            ("key1,key2,key3", {"key1", "key2", "key3"}),
-            ("key1, key2, key3", {"key1", "key2", "key3"}),
-            ("key1,  key2  ,  key3  ", {"key1", "key2", "key3"}),
-            ("", set()),
-            ("single-key", {"single-key"}),
-            (",,,", set()),  # Only commas
-            ("key1,,key2", {"key1", "key2"}),  # Empty values
-        ]
-
-        for api_keys_str, expected_keys in test_cases:
-            with patch.dict(os.environ, {"MCP_API_KEYS": api_keys_str}, clear=True):
-                config = MCPOAuthConfig.from_env()
-                assert config.api_keys == expected_keys
-
-    def test_public_key_file_loading(self, tmp_path):
-        """Test loading JWT public key from file."""
-        # Create a temporary key file
-        key_file = tmp_path / "test_key.pem"
-        test_key = "-----BEGIN PUBLIC KEY-----\ntest-key-content\n-----END PUBLIC KEY-----"
-        key_file.write_text(test_key)
-
-        with patch.dict(
-            os.environ, {"MCP_JWT_PUBLIC_KEY_FILE": str(key_file)}, clear=True
-        ):
-            config = MCPOAuthConfig.from_env()
-            assert config.jwt_public_key == test_key
-
-    def test_public_key_file_error_handling(self, tmp_path):
-        """Test error handling when key file cannot be read."""
-        # Non-existent file
-        with patch.dict(
-            os.environ, {"MCP_JWT_PUBLIC_KEY_FILE": "/nonexistent/file.pem"}, clear=True
-        ):
-            config = MCPOAuthConfig.from_env()
-            assert config.jwt_public_key is None
-
-    def test_public_key_direct_vs_file(self, tmp_path):
-        """Test that direct key takes precedence over file key."""
-        key_file = tmp_path / "test_key.pem"
-        key_file.write_text("file-key")
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_JWT_PUBLIC_KEY": "direct-key",
-                "MCP_JWT_PUBLIC_KEY_FILE": str(key_file),
-            },
-            clear=True,
-        ):
-            config = MCPOAuthConfig.from_env()
-            # File key should override direct key in current implementation
-            assert config.jwt_public_key == "file-key"
-
-
-class TestAPIKeyVerification:
-    """Test API key verification."""
-
-    def test_verify_api_key_valid(self):
-        """Test verification of valid API key."""
-        test_key = "test-api-key-12345"
-
-        with patch.dict(
-            os.environ,
-            {"MCP_API_KEY_ENABLED": "true", "MCP_API_KEYS": test_key},
-            clear=True,
-        ):
-            # Reset global config
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            assert verify_api_key(test_key) is True
-
-    def test_verify_api_key_invalid(self):
-        """Test verification of invalid API key."""
-        with patch.dict(
-            os.environ,
-            {"MCP_API_KEY_ENABLED": "true", "MCP_API_KEYS": "valid-key"},
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            assert verify_api_key("invalid-key") is False
-
-    def test_verify_api_key_disabled(self):
-        """Test that verification fails when API key auth is disabled."""
-        with patch.dict(
-            os.environ,
-            {"MCP_API_KEY_ENABLED": "false", "MCP_API_KEYS": "test-key"},
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            assert verify_api_key("test-key") is False
-
-    def test_verify_api_key_no_keys_configured(self):
-        """Test that verification fails when no keys are configured."""
-        with patch.dict(
-            os.environ, {"MCP_API_KEY_ENABLED": "true", "MCP_API_KEYS": ""}, clear=True
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            assert verify_api_key("any-key") is False
-
-    def test_api_key_timing_attack_resistance(self):
-        """Test that API key verification resists timing attacks."""
-        # This test verifies that we use constant-time comparison
-        valid_key = "a" * 32
-        invalid_key_short = "b" * 2
-        invalid_key_long = "b" * 32
-
-        with patch.dict(
-            os.environ,
-            {"MCP_API_KEY_ENABLED": "true", "MCP_API_KEYS": valid_key},
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            # Measure timing for short invalid key
-            iterations = 1000
-            start = time.perf_counter()
-            for _ in range(iterations):
-                verify_api_key(invalid_key_short)
-            time_short = time.perf_counter() - start
-
-            # Measure timing for long invalid key
-            start = time.perf_counter()
-            for _ in range(iterations):
-                verify_api_key(invalid_key_long)
-            time_long = time.perf_counter() - start
-
-            # The times should be similar (within 50% of each other)
-            # This is a weak test but helps ensure we're using constant-time comparison
-            ratio = max(time_short, time_long) / min(time_short, time_long)
-            assert ratio < 1.5, f"Timing difference too large: {ratio}"
-
-    def test_api_key_multiple_keys(self):
-        """Test verification with multiple configured keys."""
-        keys = "key1,key2,key3"
-
-        with patch.dict(
-            os.environ, {"MCP_API_KEY_ENABLED": "true", "MCP_API_KEYS": keys}, clear=True
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            assert verify_api_key("key1") is True
-            assert verify_api_key("key2") is True
-            assert verify_api_key("key3") is True
-            assert verify_api_key("key4") is False
-
-
-class TestJWTVerification:
-    """Test JWT token verification."""
-
-    @pytest.fixture
-    def hs256_secret(self):
-        """HS256 secret key for testing."""
-        return "test-secret-key-for-hs256"
-
-    @pytest.fixture
-    def rs256_keys(self):
-        """RS256 key pair for testing."""
-        # Generate a test RSA key pair
-        from cryptography.hazmat.backends import default_backend
-        from cryptography.hazmat.primitives import serialization
-        from cryptography.hazmat.primitives.asymmetric import rsa
-
-        private_key = rsa.generate_private_key(
-            public_exponent=65537, key_size=2048, backend=default_backend()
-        )
-
-        private_pem = private_key.private_bytes(
-            encoding=serialization.Encoding.PEM,
-            format=serialization.PrivateFormat.PKCS8,
-            encryption_algorithm=serialization.NoEncryption(),
-        ).decode()
-
-        public_key = private_key.public_key()
-        public_pem = public_key.public_bytes(
-            encoding=serialization.Encoding.PEM,
-            format=serialization.PublicFormat.SubjectPublicKeyInfo,
-        ).decode()
-
-        return {"private": private_pem, "public": public_pem}
-
-    def test_verify_jwt_valid_hs256(self, hs256_secret):
-        """Test verification of valid HS256 JWT."""
-        payload = {
-            "sub": "test-user",
-            "iss": "test-issuer",
-            "aud": "test-audience",
-            "exp": int(time.time()) + 3600,
-        }
-        token = jwt.encode(payload, hs256_secret, algorithm="HS256")
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_JWT_ENABLED": "true",
-                "MCP_JWT_ALGORITHM": "HS256",
-                "MCP_JWT_SECRET": hs256_secret,
-                "MCP_JWT_ISSUER": "test-issuer",
-                "MCP_JWT_AUDIENCE": "test-audience",
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            token_data = verify_jwt_token(token)
-            assert token_data.sub == "test-user"
-            assert token_data.iss == "test-issuer"
-            assert token_data.aud == "test-audience"
-
-    def test_verify_jwt_valid_rs256(self, rs256_keys):
-        """Test verification of valid RS256 JWT."""
-        payload = {
-            "sub": "test-user",
-            "exp": int(time.time()) + 3600,
-        }
-        token = jwt.encode(payload, rs256_keys["private"], algorithm="RS256")
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_JWT_ENABLED": "true",
-                "MCP_JWT_ALGORITHM": "RS256",
-                "MCP_JWT_PUBLIC_KEY": rs256_keys["public"],
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            token_data = verify_jwt_token(token)
-            assert token_data.sub == "test-user"
-
-    def test_verify_jwt_expired(self, hs256_secret):
-        """Test that expired tokens are rejected."""
-        payload = {
-            "sub": "test-user",
-            "exp": int(time.time()) - 3600,  # Expired 1 hour ago
-        }
-        token = jwt.encode(payload, hs256_secret, algorithm="HS256")
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_JWT_ENABLED": "true",
-                "MCP_JWT_ALGORITHM": "HS256",
-                "MCP_JWT_SECRET": hs256_secret,
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            with pytest.raises(HTTPException) as exc_info:
-                verify_jwt_token(token)
-
-            assert exc_info.value.status_code == 401
-            assert "expired" in exc_info.value.detail.lower()
-
-    def test_verify_jwt_missing_sub_claim(self, hs256_secret):
-        """Test that tokens missing sub claim are rejected with 401."""
-        payload = {
-            "iss": "test-issuer",
-            "exp": int(time.time()) + 3600,
-            # Missing 'sub' claim
-        }
-        token = jwt.encode(payload, hs256_secret, algorithm="HS256")
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_JWT_ENABLED": "true",
-                "MCP_JWT_ALGORITHM": "HS256",
-                "MCP_JWT_SECRET": hs256_secret,
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            with pytest.raises(HTTPException) as exc_info:
-                verify_jwt_token(token)
-
-            # Should return 401, not 500
-            assert exc_info.value.status_code == 401
-            assert "sub" in exc_info.value.detail.lower()
-
-    def test_verify_jwt_invalid_sub_type(self, hs256_secret):
-        """Test that tokens with invalid sub type are rejected with 401."""
-        payload = {
-            "sub": 12345,  # Should be string, not int
-            "exp": int(time.time()) + 3600,
-        }
-        token = jwt.encode(payload, hs256_secret, algorithm="HS256")
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_JWT_ENABLED": "true",
-                "MCP_JWT_ALGORITHM": "HS256",
-                "MCP_JWT_SECRET": hs256_secret,
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            with pytest.raises(HTTPException) as exc_info:
-                verify_jwt_token(token)
-
-            # Should return 401, not 500
-            assert exc_info.value.status_code == 401
-
-    def test_verify_jwt_invalid_audience(self, hs256_secret):
-        """Test that tokens with invalid audience are rejected."""
-        payload = {
-            "sub": "test-user",
-            "aud": "wrong-audience",
-            "exp": int(time.time()) + 3600,
-        }
-        token = jwt.encode(payload, hs256_secret, algorithm="HS256")
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_JWT_ENABLED": "true",
-                "MCP_JWT_ALGORITHM": "HS256",
-                "MCP_JWT_SECRET": hs256_secret,
-                "MCP_JWT_AUDIENCE": "test-audience",
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            with pytest.raises(HTTPException) as exc_info:
-                verify_jwt_token(token)
-
-            assert exc_info.value.status_code == 401
-            assert "audience" in exc_info.value.detail.lower()
-
-    def test_verify_jwt_invalid_issuer(self, hs256_secret):
-        """Test that tokens with invalid issuer are rejected."""
-        payload = {
-            "sub": "test-user",
-            "iss": "wrong-issuer",
-            "exp": int(time.time()) + 3600,
-        }
-        token = jwt.encode(payload, hs256_secret, algorithm="HS256")
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_JWT_ENABLED": "true",
-                "MCP_JWT_ALGORITHM": "HS256",
-                "MCP_JWT_SECRET": hs256_secret,
-                "MCP_JWT_ISSUER": "test-issuer",
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            with pytest.raises(HTTPException) as exc_info:
-                verify_jwt_token(token)
-
-            assert exc_info.value.status_code == 401
-            assert "issuer" in exc_info.value.detail.lower()
-
-    def test_verify_jwt_invalid_signature(self, hs256_secret):
-        """Test that tokens with invalid signature are rejected."""
-        payload = {
-            "sub": "test-user",
-            "exp": int(time.time()) + 3600,
-        }
-        # Sign with wrong secret
-        token = jwt.encode(payload, "wrong-secret", algorithm="HS256")
-
-        with patch.dict(
-            os.environ,
-            {
-                "MCP_JWT_ENABLED": "true",
-                "MCP_JWT_ALGORITHM": "HS256",
-                "MCP_JWT_SECRET": hs256_secret,
-            },
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            with pytest.raises(HTTPException) as exc_info:
-                verify_jwt_token(token)
-
-            assert exc_info.value.status_code == 401
-
-    def test_verify_jwt_disabled(self, hs256_secret):
-        """Test that JWT verification fails when disabled."""
-        payload = {
-            "sub": "test-user",
-            "exp": int(time.time()) + 3600,
-        }
-        token = jwt.encode(payload, hs256_secret, algorithm="HS256")
-
-        with patch.dict(os.environ, {"MCP_JWT_ENABLED": "false"}, clear=True):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            with pytest.raises(HTTPException) as exc_info:
-                verify_jwt_token(token)
-
-            assert exc_info.value.status_code == 401
-            assert "not enabled" in exc_info.value.detail.lower()
-
-    def test_verify_jwt_malformed_token(self):
-        """Test that malformed tokens are rejected."""
-        with patch.dict(
-            os.environ,
-            {"MCP_JWT_ENABLED": "true", "MCP_JWT_ALGORITHM": "HS256"},
-            clear=True,
-        ):
-            import dipeo_server.api.auth.oauth as oauth_module
-
-            oauth_module._oauth_config = None
-
-            with pytest.raises(HTTPException) as exc_info:
-                verify_jwt_token("not.a.valid.jwt.token")
-
-            assert exc_info.value.status_code == 401
-
-
-class TestThreadSafety:
-    """Test thread safety of singleton pattern."""
-
-    def test_get_oauth_config_thread_safety(self):
-        """Test that config initialization is thread-safe."""
-        import threading
-
-        import dipeo_server.api.auth.oauth as oauth_module
-
-        # Reset global config
-        oauth_module._oauth_config = None
-
-        results = []
-
-        def get_config():
-            config = get_oauth_config()
-            results.append(id(config))
-
-        # Create multiple threads that all try to get the config
-        threads = [threading.Thread(target=get_config) for _ in range(10)]
-
-        # Start all threads
-        for thread in threads:
-            thread.start()
-
-        # Wait for all threads to complete
-        for thread in threads:
-            thread.join()
-
-        # All threads should have gotten the same config instance
-        assert len(set(results)) == 1
diff --git a/tests/domain/__init__.py b/tests/domain/__init__.py
deleted file mode 100644
index 0ed1b84ef..000000000
--- a/tests/domain/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Domain layer tests."""
diff --git a/tests/domain/execution/__init__.py b/tests/domain/execution/__init__.py
deleted file mode 100644
index 779e02cfb..000000000
--- a/tests/domain/execution/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Execution domain tests."""
diff --git a/tests/domain/execution/state/__init__.py b/tests/domain/execution/state/__init__.py
deleted file mode 100644
index e22bbb587..000000000
--- a/tests/domain/execution/state/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""State tracking tests."""
diff --git a/tests/domain/execution/state/test_unified_state_tracker.py b/tests/domain/execution/state/test_unified_state_tracker.py
deleted file mode 100644
index 62d75e548..000000000
--- a/tests/domain/execution/state/test_unified_state_tracker.py
+++ /dev/null
@@ -1,767 +0,0 @@
-"""Comprehensive tests for UnifiedStateTracker.
-
-Tests cover:
-- State transitions
-- Execution history tracking
-- Iteration limits
-- Metadata management
-- Thread safety
-- Backward compatibility
-- Error handling
-"""
-
-import threading
-import time
-from datetime import datetime
-
-import pytest
-
-from dipeo.diagram_generated import NodeID, Status
-from dipeo.diagram_generated.enums import CompletionStatus
-from dipeo.domain.execution.messaging.envelope import EnvelopeFactory
-from dipeo.domain.execution.state.unified_state_tracker import (
-    NodeExecutionRecord,
-    NodeState,
-    UnifiedStateTracker,
-)
-
-
-class TestStateTransitions:
-    """Test state transition methods."""
-
-    def test_initialize_node(self):
-        """Test node initialization to PENDING state."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        tracker.initialize_node(node_id)
-
-        state = tracker.get_node_state(node_id)
-        assert state is not None
-        assert state.status == Status.PENDING
-        assert state.error is None
-
-    def test_initialize_node_idempotent(self):
-        """Test that initializing an already initialized node is idempotent."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        tracker.initialize_node(node_id)
-        tracker.initialize_node(node_id)
-
-        state = tracker.get_node_state(node_id)
-        assert state.status == Status.PENDING
-
-    def test_transition_to_running(self):
-        """Test transition to RUNNING state."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-        tracker.initialize_node(node_id)
-
-        exec_count = tracker.transition_to_running(node_id, epoch=0)
-
-        assert exec_count == 1
-        state = tracker.get_node_state(node_id)
-        assert state.status == Status.RUNNING
-        assert tracker.get_execution_count(node_id) == 1
-
-    def test_transition_to_completed(self):
-        """Test transition to COMPLETED state."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-        tracker.initialize_node(node_id)
-        tracker.transition_to_running(node_id, epoch=0)
-
-        output = EnvelopeFactory.create(body="result")
-        token_usage = {"input": 100, "output": 50, "cached": 10}
-        tracker.transition_to_completed(node_id, output=output, token_usage=token_usage)
-
-        state = tracker.get_node_state(node_id)
-        assert state.status == Status.COMPLETED
-        assert tracker.get_last_output(node_id) == output
-
-        history = tracker.get_node_execution_history(node_id)
-        assert len(history) == 1
-        assert history[0].status == CompletionStatus.SUCCESS
-        assert history[0].token_usage == token_usage
-
-    def test_transition_to_failed(self):
-        """Test transition to FAILED state."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-        tracker.initialize_node(node_id)
-        tracker.transition_to_running(node_id, epoch=0)
-
-        error_msg = "Connection timeout"
-        tracker.transition_to_failed(node_id, error=error_msg)
-
-        state = tracker.get_node_state(node_id)
-        assert state.status == Status.FAILED
-        assert state.error == error_msg
-
-        history = tracker.get_node_execution_history(node_id)
-        assert len(history) == 1
-        assert history[0].status == CompletionStatus.FAILED
-        assert history[0].error == error_msg
-
-    def test_transition_to_maxiter(self):
-        """Test transition to MAXITER_REACHED state."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-        tracker.initialize_node(node_id)
-        tracker.transition_to_running(node_id, epoch=0)
-
-        output = EnvelopeFactory.create(body="final_result")
-        tracker.transition_to_maxiter(node_id, output=output)
-
-        state = tracker.get_node_state(node_id)
-        assert state.status == Status.MAXITER_REACHED
-        assert tracker.get_last_output(node_id) == output
-
-        history = tracker.get_node_execution_history(node_id)
-        assert len(history) == 1
-        assert history[0].status == CompletionStatus.MAX_ITER
-
-    def test_transition_to_skipped(self):
-        """Test transition to SKIPPED state."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-        tracker.initialize_node(node_id)
-        tracker.transition_to_running(node_id, epoch=0)
-
-        tracker.transition_to_skipped(node_id)
-
-        state = tracker.get_node_state(node_id)
-        assert state.status == Status.SKIPPED
-
-        history = tracker.get_node_execution_history(node_id)
-        assert len(history) == 1
-        assert history[0].status == CompletionStatus.SKIPPED
-
-    def test_reset_node(self):
-        """Test resetting node to PENDING state."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-        tracker.initialize_node(node_id)
-        tracker.transition_to_running(node_id, epoch=0)
-        tracker.transition_to_completed(node_id)
-
-        tracker.reset_node(node_id)
-
-        state = tracker.get_node_state(node_id)
-        assert state.status == Status.PENDING
-        # Execution count should NOT be reset
-        assert tracker.get_execution_count(node_id) == 1
-
-    def test_complete_execution_without_start_raises_error(self):
-        """Test that completing without starting raises ValueError."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        with pytest.raises(ValueError, match="No execution started"):
-            tracker.transition_to_completed(node_id)
-
-    def test_complete_execution_twice_raises_error(self):
-        """Test that completing twice raises ValueError."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-        tracker.initialize_node(node_id)
-        tracker.transition_to_running(node_id, epoch=0)
-        tracker.transition_to_completed(node_id)
-
-        with pytest.raises(ValueError, match="execution already completed"):
-            tracker.transition_to_completed(node_id)
-
-
-class TestStateQueries:
-    """Test state query methods."""
-
-    def test_get_node_state_nonexistent(self):
-        """Test getting state of non-existent node returns None."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        state = tracker.get_node_state(node_id)
-        assert state is None
-
-    def test_get_all_node_states(self):
-        """Test getting all node states."""
-        tracker = UnifiedStateTracker()
-        node_ids = [NodeID(f"node-{i}") for i in range(3)]
-
-        for node_id in node_ids:
-            tracker.initialize_node(node_id)
-
-        all_states = tracker.get_all_node_states()
-        assert len(all_states) == 3
-        for node_id in node_ids:
-            assert node_id in all_states
-            assert all_states[node_id].status == Status.PENDING
-
-    def test_get_completed_nodes(self):
-        """Test getting all completed nodes."""
-        tracker = UnifiedStateTracker()
-
-        # Create multiple nodes in different states
-        tracker.initialize_node(NodeID("node-1"))
-        tracker.transition_to_running(NodeID("node-1"), epoch=0)
-        tracker.transition_to_completed(NodeID("node-1"))
-
-        tracker.initialize_node(NodeID("node-2"))
-        tracker.transition_to_running(NodeID("node-2"), epoch=0)
-
-        tracker.initialize_node(NodeID("node-3"))
-        tracker.transition_to_running(NodeID("node-3"), epoch=0)
-        tracker.transition_to_completed(NodeID("node-3"))
-
-        completed = tracker.get_completed_nodes()
-        assert len(completed) == 2
-        assert NodeID("node-1") in completed
-        assert NodeID("node-3") in completed
-
-    def test_get_running_nodes(self):
-        """Test getting all running nodes."""
-        tracker = UnifiedStateTracker()
-
-        tracker.initialize_node(NodeID("node-1"))
-        tracker.transition_to_running(NodeID("node-1"), epoch=0)
-
-        tracker.initialize_node(NodeID("node-2"))
-        tracker.transition_to_running(NodeID("node-2"), epoch=0)
-        tracker.transition_to_completed(NodeID("node-2"))
-
-        running = tracker.get_running_nodes()
-        assert len(running) == 1
-        assert NodeID("node-1") in running
-
-    def test_get_failed_nodes(self):
-        """Test getting all failed nodes."""
-        tracker = UnifiedStateTracker()
-
-        tracker.initialize_node(NodeID("node-1"))
-        tracker.transition_to_running(NodeID("node-1"), epoch=0)
-        tracker.transition_to_failed(NodeID("node-1"), error="Error")
-
-        tracker.initialize_node(NodeID("node-2"))
-        tracker.transition_to_running(NodeID("node-2"), epoch=0)
-        tracker.transition_to_completed(NodeID("node-2"))
-
-        failed = tracker.get_failed_nodes()
-        assert len(failed) == 1
-        assert NodeID("node-1") in failed
-
-    def test_has_running_nodes(self):
-        """Test checking if any nodes are running."""
-        tracker = UnifiedStateTracker()
-
-        assert not tracker.has_running_nodes()
-
-        tracker.initialize_node(NodeID("node-1"))
-        assert not tracker.has_running_nodes()
-
-        tracker.transition_to_running(NodeID("node-1"), epoch=0)
-        assert tracker.has_running_nodes()
-
-        tracker.transition_to_completed(NodeID("node-1"))
-        assert not tracker.has_running_nodes()
-
-
-class TestExecutionHistory:
-    """Test execution history methods."""
-
-    def test_execution_count_increments(self):
-        """Test that execution count increments properly."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        for i in range(5):
-            tracker.transition_to_running(node_id, epoch=0)
-            assert tracker.get_execution_count(node_id) == i + 1
-            tracker.transition_to_completed(node_id)
-            tracker.reset_node(node_id)
-
-    def test_has_executed(self):
-        """Test has_executed check."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        assert not tracker.has_executed(node_id)
-
-        tracker.transition_to_running(node_id, epoch=0)
-        assert tracker.has_executed(node_id)
-
-    def test_get_last_output(self):
-        """Test getting last output."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        assert tracker.get_last_output(node_id) is None
-
-        tracker.transition_to_running(node_id, epoch=0)
-        output1 = EnvelopeFactory.create(body="result1")
-        tracker.transition_to_completed(node_id, output=output1)
-
-        assert tracker.get_last_output(node_id) == output1
-
-        # Second execution with different output
-        tracker.reset_node(node_id)
-        tracker.transition_to_running(node_id, epoch=0)
-        output2 = EnvelopeFactory.create(body="result2")
-        tracker.transition_to_completed(node_id, output=output2)
-
-        assert tracker.get_last_output(node_id) == output2
-
-    def test_get_node_result(self):
-        """Test getting node result with metadata."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        assert tracker.get_node_result(node_id) is None
-
-        tracker.transition_to_running(node_id, epoch=0)
-        output = EnvelopeFactory.create(body="result", meta={"key": "value"})
-        tracker.transition_to_completed(node_id, output=output)
-
-        result = tracker.get_node_result(node_id)
-        assert result is not None
-        assert result["value"] == "result"
-        assert result["metadata"]["key"] == "value"
-
-    def test_get_node_execution_history(self):
-        """Test getting full execution history."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        # Execute 3 times
-        for i in range(3):
-            tracker.transition_to_running(node_id, epoch=0)
-            output = EnvelopeFactory.create(body=f"result{i}")
-            tracker.transition_to_completed(node_id, output=output)
-            tracker.reset_node(node_id)
-
-        history = tracker.get_node_execution_history(node_id)
-        assert len(history) == 3
-        for i, record in enumerate(history):
-            assert record.execution_number == i + 1
-            assert record.is_complete()
-            assert record.was_successful()
-
-    def test_execution_record_timing(self):
-        """Test that execution records track timing correctly."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        start_time = datetime.now()
-        tracker.transition_to_running(node_id, epoch=0)
-        time.sleep(0.01)  # Small delay
-        tracker.transition_to_completed(node_id)
-        end_time = datetime.now()
-
-        history = tracker.get_node_execution_history(node_id)
-        record = history[0]
-
-        assert record.started_at >= start_time
-        assert record.ended_at <= end_time
-        assert record.duration > 0
-
-    def test_get_execution_summary(self):
-        """Test execution summary generation."""
-        tracker = UnifiedStateTracker()
-
-        # Successful execution
-        tracker.transition_to_running(NodeID("node-1"), epoch=0)
-        tracker.transition_to_completed(
-            NodeID("node-1"), token_usage={"input": 100, "output": 50, "cached": 10}
-        )
-
-        # Failed execution
-        tracker.transition_to_running(NodeID("node-2"), epoch=0)
-        tracker.transition_to_failed(NodeID("node-2"), error="Error")
-
-        # Another successful execution
-        tracker.transition_to_running(NodeID("node-3"), epoch=0)
-        tracker.transition_to_completed(
-            NodeID("node-3"), token_usage={"input": 200, "output": 100, "cached": 20}
-        )
-
-        summary = tracker.get_execution_summary()
-        assert summary["total_executions"] == 3
-        assert summary["successful_executions"] == 2
-        assert summary["failed_executions"] == 1
-        assert summary["success_rate"] == pytest.approx(2 / 3)
-        assert summary["total_tokens"]["input"] == 300
-        assert summary["total_tokens"]["output"] == 150
-        assert summary["total_tokens"]["cached"] == 30
-        assert summary["nodes_executed"] == 3
-        assert len(summary["execution_order"]) == 3
-
-    def test_get_execution_order(self):
-        """Test execution order tracking."""
-        tracker = UnifiedStateTracker()
-
-        node_ids = [NodeID(f"node-{i}") for i in range(3)]
-        for node_id in node_ids:
-            tracker.transition_to_running(node_id, epoch=0)
-            tracker.transition_to_completed(node_id)
-
-        execution_order = tracker.get_execution_order()
-        assert execution_order == node_ids
-
-
-class TestIterationLimits:
-    """Test iteration limit enforcement."""
-
-    def test_can_execute_in_loop_default_limit(self):
-        """Test iteration limit with default max."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-        epoch = 0
-
-        # Execute up to default limit (100)
-        for _i in range(100):
-            assert tracker.can_execute_in_loop(node_id, epoch)
-            tracker.transition_to_running(node_id, epoch)
-            tracker.transition_to_completed(node_id)
-            tracker.reset_node(node_id)
-
-        # 101st should fail
-        assert not tracker.can_execute_in_loop(node_id, epoch)
-
-    def test_can_execute_in_loop_custom_limit(self):
-        """Test iteration limit with custom max."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-        epoch = 0
-        max_iteration = 5
-
-        # Execute up to custom limit
-        for _i in range(5):
-            assert tracker.can_execute_in_loop(node_id, epoch, max_iteration)
-            tracker.transition_to_running(node_id, epoch)
-            tracker.transition_to_completed(node_id)
-            tracker.reset_node(node_id)
-
-        # 6th should fail
-        assert not tracker.can_execute_in_loop(node_id, epoch, max_iteration)
-
-    def test_iteration_limits_per_epoch(self):
-        """Test that iteration limits are tracked per epoch."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-        max_iteration = 3
-
-        # Epoch 0: Execute 3 times
-        for _i in range(3):
-            assert tracker.can_execute_in_loop(node_id, 0, max_iteration)
-            tracker.transition_to_running(node_id, epoch=0)
-            tracker.transition_to_completed(node_id)
-            tracker.reset_node(node_id)
-
-        assert not tracker.can_execute_in_loop(node_id, 0, max_iteration)
-
-        # Epoch 1: Should allow 3 more times
-        for _i in range(3):
-            assert tracker.can_execute_in_loop(node_id, 1, max_iteration)
-            tracker.transition_to_running(node_id, epoch=1)
-            tracker.transition_to_completed(node_id)
-            tracker.reset_node(node_id)
-
-        assert not tracker.can_execute_in_loop(node_id, 1, max_iteration)
-
-    def test_get_iterations_in_epoch(self):
-        """Test getting iteration count per epoch."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        assert tracker.get_iterations_in_epoch(node_id, 0) == 0
-
-        # Execute twice in epoch 0
-        tracker.transition_to_running(node_id, epoch=0)
-        tracker.transition_to_completed(node_id)
-        tracker.reset_node(node_id)
-
-        tracker.transition_to_running(node_id, epoch=0)
-        tracker.transition_to_completed(node_id)
-
-        assert tracker.get_iterations_in_epoch(node_id, 0) == 2
-        assert tracker.get_iterations_in_epoch(node_id, 1) == 0
-
-
-class TestMetadata:
-    """Test metadata management."""
-
-    def test_get_node_metadata_empty(self):
-        """Test getting metadata for node with no metadata."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        metadata = tracker.get_node_metadata(node_id)
-        assert metadata == {}
-
-    def test_set_and_get_node_metadata(self):
-        """Test setting and getting node metadata."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        tracker.set_node_metadata(node_id, "key1", "value1")
-        tracker.set_node_metadata(node_id, "key2", 42)
-
-        metadata = tracker.get_node_metadata(node_id)
-        assert metadata["key1"] == "value1"
-        assert metadata["key2"] == 42
-
-    def test_set_node_metadata_overwrites(self):
-        """Test that setting metadata overwrites previous value."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        tracker.set_node_metadata(node_id, "key", "value1")
-        tracker.set_node_metadata(node_id, "key", "value2")
-
-        metadata = tracker.get_node_metadata(node_id)
-        assert metadata["key"] == "value2"
-
-    def test_get_node_metadata_returns_copy(self):
-        """Test that get_node_metadata returns a copy."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        tracker.set_node_metadata(node_id, "key", "value")
-        metadata1 = tracker.get_node_metadata(node_id)
-        metadata1["key"] = "modified"
-
-        metadata2 = tracker.get_node_metadata(node_id)
-        assert metadata2["key"] == "value"  # Should not be modified
-
-
-class TestThreadSafety:
-    """Test thread safety of concurrent operations."""
-
-    def test_concurrent_transitions(self):
-        """Test concurrent state transitions."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-        tracker.initialize_node(node_id)
-
-        errors = []
-
-        def worker():
-            try:
-                for _ in range(10):
-                    tracker.transition_to_running(node_id, epoch=0)
-                    time.sleep(0.001)  # Small delay
-                    tracker.transition_to_completed(node_id)
-            except Exception as e:
-                errors.append(e)
-
-        threads = [threading.Thread(target=worker) for _ in range(5)]
-        for t in threads:
-            t.start()
-        for t in threads:
-            t.join()
-
-        # Should have 50 executions total (5 threads * 10 executions)
-        # Note: This will likely fail because we can't transition_to_running
-        # multiple times without reset. Let's adjust the test.
-        assert len(errors) > 0  # Expect errors due to invalid state transitions
-
-    def test_concurrent_execution_with_reset(self):
-        """Test concurrent executions with proper reset."""
-        tracker = UnifiedStateTracker()
-        num_threads = 5
-        iterations_per_thread = 10
-
-        def worker(thread_id: int):
-            node_id = NodeID(f"node-{thread_id}")
-            tracker.initialize_node(node_id)
-
-            for _ in range(iterations_per_thread):
-                tracker.transition_to_running(node_id, epoch=0)
-                output = EnvelopeFactory.create(body=f"result-{thread_id}")
-                tracker.transition_to_completed(node_id, output=output)
-                tracker.reset_node(node_id)
-
-        threads = [threading.Thread(target=worker, args=(i,)) for i in range(num_threads)]
-        for t in threads:
-            t.start()
-        for t in threads:
-            t.join()
-
-        # Verify all nodes executed the correct number of times
-        for i in range(num_threads):
-            node_id = NodeID(f"node-{i}")
-            assert tracker.get_execution_count(node_id) == iterations_per_thread
-
-    def test_concurrent_metadata_access(self):
-        """Test concurrent metadata access."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        def writer(key: str, value: int):
-            for i in range(100):
-                tracker.set_node_metadata(node_id, key, value + i)
-
-        def reader(key: str):
-            for _ in range(100):
-                tracker.get_node_metadata(node_id)
-
-        threads = []
-        threads.extend(
-            [threading.Thread(target=writer, args=(f"key{i}", i * 1000)) for i in range(3)]
-        )
-        threads.extend([threading.Thread(target=reader, args=(f"key{i}",)) for i in range(3)])
-
-        for t in threads:
-            t.start()
-        for t in threads:
-            t.join()
-
-        # No assertions - just verify no crashes
-
-
-class TestPersistence:
-    """Test persistence methods."""
-
-    def test_load_states(self):
-        """Test loading persisted states."""
-        tracker = UnifiedStateTracker()
-
-        # Create some initial state
-        node_states = {
-            NodeID("node-1"): NodeState(status=Status.COMPLETED),
-            NodeID("node-2"): NodeState(status=Status.FAILED, error="Error"),
-        }
-        execution_counts = {
-            NodeID("node-1"): 3,
-            NodeID("node-2"): 1,
-        }
-
-        tracker.load_states(node_states, execution_counts=execution_counts)
-
-        assert tracker.get_node_state(NodeID("node-1")).status == Status.COMPLETED
-        assert tracker.get_node_state(NodeID("node-2")).status == Status.FAILED
-        assert tracker.get_execution_count(NodeID("node-1")) == 3
-        assert tracker.get_execution_count(NodeID("node-2")) == 1
-
-    def test_clear_history(self):
-        """Test clearing all history."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        # Create some state
-        tracker.initialize_node(node_id)
-        tracker.transition_to_running(node_id, epoch=0)
-        tracker.transition_to_completed(node_id)
-        tracker.set_node_metadata(node_id, "key", "value")
-
-        tracker.clear_history()
-
-        assert tracker.get_node_state(node_id) is None
-        assert tracker.get_execution_count(node_id) == 0
-        assert tracker.get_last_output(node_id) is None
-        assert tracker.get_node_metadata(node_id) == {}
-        assert len(tracker.get_execution_order()) == 0
-
-
-class TestBackwardCompatibility:
-    """Test backward compatibility methods."""
-
-    def test_get_tracker(self):
-        """Test get_tracker returns self."""
-        tracker = UnifiedStateTracker()
-        assert tracker.get_tracker() is tracker
-
-    def test_get_node_execution_count_alias(self):
-        """Test get_node_execution_count alias."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        tracker.transition_to_running(node_id, epoch=0)
-        tracker.transition_to_completed(node_id)
-
-        # Both methods should return same value
-        assert tracker.get_node_execution_count(node_id) == tracker.get_execution_count(node_id)
-        assert tracker.get_node_execution_count(node_id) == 1
-
-    def test_get_node_output_alias(self):
-        """Test get_node_output alias."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        tracker.transition_to_running(node_id, epoch=0)
-        output = EnvelopeFactory.create(body="result")
-        tracker.transition_to_completed(node_id, output=output)
-
-        # Both methods should return same value
-        assert tracker.get_node_output(node_id) == tracker.get_last_output(node_id)
-        assert tracker.get_node_output(node_id) == output
-
-
-class TestEdgeCases:
-    """Test edge cases and error conditions."""
-
-    def test_execution_record_immutability(self):
-        """Test that execution records maintain immutability semantics."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        tracker.transition_to_running(node_id, epoch=0)
-        tracker.transition_to_completed(node_id)
-
-        history1 = tracker.get_node_execution_history(node_id)
-        history2 = tracker.get_node_execution_history(node_id)
-
-        # Should be different list instances
-        assert history1 is not history2
-        # But contain equal records
-        assert len(history1) == len(history2)
-        assert history1[0].execution_number == history2[0].execution_number
-
-    def test_state_copy_semantics(self):
-        """Test that get_all_node_states returns a copy."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-        tracker.initialize_node(node_id)
-
-        states1 = tracker.get_all_node_states()
-        states1[NodeID("fake-node")] = NodeState(status=Status.FAILED)
-
-        states2 = tracker.get_all_node_states()
-        assert NodeID("fake-node") not in states2
-
-    def test_execution_with_no_output(self):
-        """Test completing execution without output."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        tracker.transition_to_running(node_id, epoch=0)
-        tracker.transition_to_completed(node_id, output=None)
-
-        assert tracker.get_last_output(node_id) is None
-        assert tracker.get_node_result(node_id) is None
-
-    def test_multiple_epochs_same_node(self):
-        """Test node executing in multiple epochs."""
-        tracker = UnifiedStateTracker()
-        node_id = NodeID("node-1")
-
-        # Execute in epoch 0
-        tracker.transition_to_running(node_id, epoch=0)
-        tracker.transition_to_completed(node_id)
-        tracker.reset_node(node_id)
-
-        # Execute in epoch 1
-        tracker.transition_to_running(node_id, epoch=1)
-        tracker.transition_to_completed(node_id)
-        tracker.reset_node(node_id)
-
-        # Execute in epoch 2
-        tracker.transition_to_running(node_id, epoch=2)
-        tracker.transition_to_completed(node_id)
-
-        # Total execution count should be 3
-        assert tracker.get_execution_count(node_id) == 3
-        # But iterations per epoch should be 1
-        assert tracker.get_iterations_in_epoch(node_id, 0) == 1
-        assert tracker.get_iterations_in_epoch(node_id, 1) == 1
-        assert tracker.get_iterations_in_epoch(node_id, 2) == 1
diff --git a/tests/infrastructure/execution/rules/__init__.py b/tests/infrastructure/execution/rules/__init__.py
deleted file mode 100644
index 87cd2be2f..000000000
--- a/tests/infrastructure/execution/rules/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for execution rule infrastructure."""
diff --git a/tests/infrastructure/execution/rules/test_backward_compat.py b/tests/infrastructure/execution/rules/test_backward_compat.py
deleted file mode 100644
index 74e5980a4..000000000
--- a/tests/infrastructure/execution/rules/test_backward_compat.py
+++ /dev/null
@@ -1,253 +0,0 @@
-"""Tests for backward compatibility layer."""
-
-from unittest.mock import Mock
-
-import pytest
-
-from dipeo.diagram_generated import NodeType
-from dipeo.diagram_generated.generated_nodes import PersonJobNode
-from dipeo.domain.execution.rules import DataTransformRules, NodeConnectionRules
-from dipeo.infrastructure.execution.rules.compat import reset_default_registry
-
-
-class TestNodeConnectionRulesBackwardCompat:
-    """Test backward compatibility of NodeConnectionRules."""
-
-    def setup_method(self):
-        """Reset registry before each test."""
-        reset_default_registry()
-
-    def test_can_connect_start_no_input(self):
-        """Test that START nodes cannot receive input."""
-        # Any node -> START should be False
-        assert not NodeConnectionRules.can_connect(NodeType.PERSON_JOB, NodeType.START)
-        assert not NodeConnectionRules.can_connect(NodeType.CODE_JOB, NodeType.START)
-        assert not NodeConnectionRules.can_connect(NodeType.ENDPOINT, NodeType.START)
-
-    def test_can_connect_endpoint_no_output(self):
-        """Test that ENDPOINT nodes cannot send output."""
-        # ENDPOINT -> any node should be False
-        assert not NodeConnectionRules.can_connect(NodeType.ENDPOINT, NodeType.PERSON_JOB)
-        assert not NodeConnectionRules.can_connect(NodeType.ENDPOINT, NodeType.CODE_JOB)
-        assert not NodeConnectionRules.can_connect(NodeType.ENDPOINT, NodeType.START)
-
-    def test_can_connect_valid_connections(self):
-        """Test valid connections."""
-        # START -> other nodes
-        assert NodeConnectionRules.can_connect(NodeType.START, NodeType.PERSON_JOB)
-        assert NodeConnectionRules.can_connect(NodeType.START, NodeType.CODE_JOB)
-        assert NodeConnectionRules.can_connect(NodeType.START, NodeType.ENDPOINT)
-
-        # Normal nodes -> other nodes (except START)
-        assert NodeConnectionRules.can_connect(NodeType.PERSON_JOB, NodeType.CODE_JOB)
-        assert NodeConnectionRules.can_connect(NodeType.CODE_JOB, NodeType.PERSON_JOB)
-        assert NodeConnectionRules.can_connect(NodeType.PERSON_JOB, NodeType.ENDPOINT)
-
-    def test_can_connect_output_capable_nodes(self):
-        """Test output-capable node connections."""
-        output_capable = [
-            NodeType.PERSON_JOB,
-            NodeType.CONDITION,
-            NodeType.CODE_JOB,
-            NodeType.API_JOB,
-            NodeType.START,
-        ]
-
-        for node_type in output_capable:
-            # Should be able to connect to non-START nodes
-            assert NodeConnectionRules.can_connect(node_type, NodeType.PERSON_JOB)
-            assert NodeConnectionRules.can_connect(node_type, NodeType.ENDPOINT)
-
-            # Should NOT be able to connect to START
-            assert not NodeConnectionRules.can_connect(node_type, NodeType.START)
-
-    def test_get_connection_constraints_start(self):
-        """Test connection constraints for START node."""
-        constraints = NodeConnectionRules.get_connection_constraints(NodeType.START)
-
-        # START cannot receive from anyone
-        assert constraints["can_receive_from"] == []
-
-        # START can send to everyone except itself
-        assert NodeType.START not in constraints["can_send_to"]
-        assert NodeType.PERSON_JOB in constraints["can_send_to"]
-        assert NodeType.ENDPOINT in constraints["can_send_to"]
-
-    def test_get_connection_constraints_endpoint(self):
-        """Test connection constraints for ENDPOINT node."""
-        constraints = NodeConnectionRules.get_connection_constraints(NodeType.ENDPOINT)
-
-        # ENDPOINT cannot send to anyone
-        assert constraints["can_send_to"] == []
-
-        # ENDPOINT can receive from everyone except itself
-        assert NodeType.ENDPOINT not in constraints["can_receive_from"]
-        assert NodeType.PERSON_JOB in constraints["can_receive_from"]
-        assert NodeType.START in constraints["can_receive_from"]
-
-    def test_get_connection_constraints_regular_node(self):
-        """Test connection constraints for regular nodes."""
-        constraints = NodeConnectionRules.get_connection_constraints(NodeType.PERSON_JOB)
-
-        # Can receive from non-ENDPOINT nodes
-        assert NodeType.START in constraints["can_receive_from"]
-        assert NodeType.CODE_JOB in constraints["can_receive_from"]
-        assert NodeType.ENDPOINT not in constraints["can_receive_from"]
-
-        # Can send to non-START nodes
-        assert NodeType.ENDPOINT in constraints["can_send_to"]
-        assert NodeType.CODE_JOB in constraints["can_send_to"]
-        assert NodeType.START not in constraints["can_send_to"]
-
-    def test_get_registry(self):
-        """Test accessing the underlying registry."""
-        registry = NodeConnectionRules.get_registry()
-        assert registry is not None
-
-        # Should be able to use registry methods
-        rules = registry.list_rules()
-        assert len(rules) > 0
-
-
-class TestDataTransformRulesBackwardCompat:
-    """Test backward compatibility of DataTransformRules."""
-
-    def setup_method(self):
-        """Reset registry before each test."""
-        reset_default_registry()
-
-    def test_get_data_transform_personjob_with_tools(self):
-        """Test transform rule for PersonJob with tools."""
-        # Use mock with tools attribute
-        source = Mock(spec=PersonJobNode)
-        source.tools = ["calculator", "web_search"]
-        target = Mock()
-
-        transforms = DataTransformRules.get_data_transform(source, target)
-        assert transforms.get("extract_tool_results") is True
-
-    def test_get_data_transform_personjob_without_tools(self):
-        """Test transform rule for PersonJob without tools."""
-        # Use mock without tools
-        source = Mock(spec=PersonJobNode)
-        source.tools = None
-        target = Mock()
-
-        transforms = DataTransformRules.get_data_transform(source, target)
-        assert "extract_tool_results" not in transforms
-
-    def test_get_data_transform_non_personjob(self):
-        """Test transform rule for non-PersonJob nodes."""
-        # Use simple mocks (not PersonJob)
-        source = Mock()
-        target = Mock()
-
-        transforms = DataTransformRules.get_data_transform(source, target)
-        assert "extract_tool_results" not in transforms
-
-    def test_merge_transforms_edge_priority(self):
-        """Test that edge transforms have priority over type transforms."""
-        edge_transform = {"custom": "edge_value", "priority": "edge"}
-        type_transform = {"priority": "type", "default": "type_value"}
-
-        merged = DataTransformRules.merge_transforms(edge_transform, type_transform)
-
-        # Edge values should take precedence
-        assert merged["priority"] == "edge"
-        assert merged["custom"] == "edge_value"
-        assert merged["default"] == "type_value"
-
-    def test_merge_transforms_empty_dicts(self):
-        """Test merging empty transform dictionaries."""
-        merged = DataTransformRules.merge_transforms({}, {})
-        assert merged == {}
-
-        merged = DataTransformRules.merge_transforms({"key": "value"}, {})
-        assert merged == {"key": "value"}
-
-        merged = DataTransformRules.merge_transforms({}, {"key": "value"})
-        assert merged == {"key": "value"}
-
-    def test_get_registry(self):
-        """Test accessing the underlying registry."""
-        registry = DataTransformRules.get_registry()
-        assert registry is not None
-
-        # Should be able to use registry methods
-        rules = registry.list_rules()
-        assert len(rules) > 0
-
-
-class TestRegistryExtensibility:
-    """Test that the registry can be extended with custom rules."""
-
-    def setup_method(self):
-        """Reset registry before each test."""
-        reset_default_registry()
-
-    def test_register_custom_connection_rule(self):
-        """Test registering a custom connection rule."""
-        from dipeo.infrastructure.execution.rules import (
-            BaseConnectionRule,
-            RuleCategory,
-            RuleKey,
-            RulePriority,
-        )
-
-        class CustomRule(BaseConnectionRule):
-            def __init__(self):
-                super().__init__("custom_rule", "Custom test rule", RulePriority.HIGH)
-
-            def can_connect(self, source_type, target_type):
-                # Block all CODE_JOB to CODE_JOB connections
-                return not (source_type == NodeType.CODE_JOB and target_type == NodeType.CODE_JOB)
-
-        registry = NodeConnectionRules.get_registry()
-        rule = CustomRule()
-        key = RuleKey(
-            name=rule.name,
-            category=RuleCategory.CONNECTION,
-            priority=rule.priority,
-        )
-        registry.register_connection_rule(key, rule)
-
-        # Custom rule should be enforced
-        assert not NodeConnectionRules.can_connect(NodeType.CODE_JOB, NodeType.CODE_JOB)
-        # Other connections should still work
-        assert NodeConnectionRules.can_connect(NodeType.CODE_JOB, NodeType.PERSON_JOB)
-
-    def test_register_custom_transform_rule(self):
-        """Test registering a custom transform rule."""
-        from dipeo.diagram_generated.generated_nodes import CodeJobNode
-        from dipeo.infrastructure.execution.rules import (
-            BaseTransformRule,
-            RuleCategory,
-            RuleKey,
-            RulePriority,
-        )
-
-        class CustomTransformRule(BaseTransformRule):
-            def __init__(self):
-                super().__init__("custom_transform", "Custom transform", RulePriority.HIGH)
-
-            def applies_to(self, source, target):
-                return isinstance(source, CodeJobNode)
-
-            def get_transform(self, source, target):
-                return {"custom_transform": "applied"}
-
-        registry = DataTransformRules.get_registry()
-        rule = CustomTransformRule()
-        key = RuleKey(
-            name=rule.name,
-            category=RuleCategory.TRANSFORM,
-            priority=rule.priority,
-        )
-        registry.register_transform_rule(key, rule)
-
-        # Custom rule should be applied - use mocks
-        source = Mock(spec=CodeJobNode)
-        target = Mock(spec=CodeJobNode)
-
-        transforms = DataTransformRules.get_data_transform(source, target)
-        assert transforms.get("custom_transform") == "applied"
diff --git a/tests/infrastructure/execution/rules/test_registry.py b/tests/infrastructure/execution/rules/test_registry.py
deleted file mode 100644
index c49699411..000000000
--- a/tests/infrastructure/execution/rules/test_registry.py
+++ /dev/null
@@ -1,344 +0,0 @@
-"""Unit tests for ExecutionRuleRegistry."""
-
-from unittest.mock import Mock
-
-import pytest
-
-from dipeo.diagram_generated import NodeType
-from dipeo.infrastructure.execution.rules import (
-    BaseConnectionRule,
-    BaseTransformRule,
-    ExecutionRuleRegistry,
-    RuleCategory,
-    RuleKey,
-    RulePriority,
-)
-
-
-class MockConnectionRule(BaseConnectionRule):
-    """Mock connection rule for testing."""
-
-    def __init__(self, name: str = "test_rule", allow_all: bool = True):
-        super().__init__(name, "Test rule", RulePriority.NORMAL)
-        self.allow_all = allow_all
-
-    def can_connect(self, source_type: NodeType, target_type: NodeType) -> bool:
-        return self.allow_all
-
-
-class MockTransformRule(BaseTransformRule):
-    """Mock transform rule for testing."""
-
-    def __init__(self, name: str = "test_transform", transform_dict: dict | None = None):
-        super().__init__(name, "Test transform", RulePriority.NORMAL)
-        self.transform_dict = transform_dict or {}
-
-    def applies_to(self, source, target):
-        return True
-
-    def get_transform(self, source, target):
-        return self.transform_dict
-
-
-class TestExecutionRuleRegistry:
-    """Test ExecutionRuleRegistry functionality."""
-
-    def test_create_registry(self):
-        """Test creating a registry."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-        assert registry is not None
-        assert registry.list_rules() == []
-
-    def test_register_connection_rule(self):
-        """Test registering a connection rule."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-        rule = MockConnectionRule("test_rule")
-        key = RuleKey(
-            name="test_rule",
-            category=RuleCategory.CONNECTION,
-            priority=RulePriority.NORMAL,
-        )
-
-        registry.register_connection_rule(key, rule)
-
-        rules = registry.list_rules(RuleCategory.CONNECTION)
-        assert len(rules) == 1
-        assert str(key) in rules
-
-    def test_register_transform_rule(self):
-        """Test registering a transform rule."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-        rule = MockTransformRule("test_transform", {"test": True})
-        key = RuleKey(
-            name="test_transform",
-            category=RuleCategory.TRANSFORM,
-            priority=RulePriority.NORMAL,
-        )
-
-        registry.register_transform_rule(key, rule)
-
-        rules = registry.list_rules(RuleCategory.TRANSFORM)
-        assert len(rules) == 1
-        assert str(key) in rules
-
-    def test_can_connect_with_rules(self):
-        """Test can_connect with registered rules."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-
-        # Register a rule that blocks START as target
-        rule = MockConnectionRule("no_start_target", allow_all=False)
-        key = RuleKey(
-            name="no_start_target",
-            category=RuleCategory.CONNECTION,
-            priority=RulePriority.HIGH,
-        )
-        registry.register_connection_rule(key, rule)
-
-        # Should block all connections
-        assert not registry.can_connect(NodeType.PERSON_JOB, NodeType.START)
-
-        # Register an allow-all rule with lower priority
-        rule2 = MockConnectionRule("allow_all", allow_all=True)
-        key2 = RuleKey(
-            name="allow_all",
-            category=RuleCategory.CONNECTION,
-            priority=RulePriority.LOW,
-        )
-        registry.register_connection_rule(key2, rule2)
-
-        # High priority rule should still block
-        assert not registry.can_connect(NodeType.PERSON_JOB, NodeType.START)
-
-    def test_get_data_transform(self):
-        """Test getting data transforms."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-
-        # Register a transform rule
-        rule = MockTransformRule("extract_tools", {"extract_tool_results": True})
-        key = RuleKey(
-            name="extract_tools",
-            category=RuleCategory.TRANSFORM,
-            priority=RulePriority.NORMAL,
-        )
-        registry.register_transform_rule(key, rule)
-
-        # Use mock nodes
-        source = Mock()
-        target = Mock()
-
-        transforms = registry.get_data_transform(source, target)
-        assert transforms == {"extract_tool_results": True}
-
-    def test_merge_transforms(self):
-        """Test merging transforms."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-
-        edge_transform = {"custom": "value", "priority": "edge"}
-        type_transform = {"priority": "type", "default": "value"}
-
-        merged = registry.merge_transforms(edge_transform, type_transform)
-
-        # Edge transforms should take precedence
-        assert merged["priority"] == "edge"
-        assert merged["custom"] == "value"
-        assert merged["default"] == "value"
-
-    def test_rule_priority_ordering(self):
-        """Test that rules are applied in priority order."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-
-        # Register low priority rule first
-        rule1 = MockTransformRule("low_priority", {"value": "low"})
-        key1 = RuleKey(
-            name="low_priority",
-            category=RuleCategory.TRANSFORM,
-            priority=RulePriority.LOW,
-        )
-        registry.register_transform_rule(key1, rule1)
-
-        # Register high priority rule
-        rule2 = MockTransformRule("high_priority", {"value": "high"})
-        key2 = RuleKey(
-            name="high_priority",
-            category=RuleCategory.TRANSFORM,
-            priority=RulePriority.HIGH,
-        )
-        registry.register_transform_rule(key2, rule2)
-
-        # Use mock nodes
-        source = Mock()
-        target = Mock()
-
-        # High priority rule should override low priority
-        transforms = registry.get_data_transform(source, target)
-        assert transforms["value"] == "high"
-
-    def test_unregister_rule(self):
-        """Test unregistering a rule."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-
-        rule = MockConnectionRule("test_rule")
-        key = RuleKey(
-            name="test_rule",
-            category=RuleCategory.CONNECTION,
-            priority=RulePriority.NORMAL,
-        )
-        registry.register_connection_rule(key, rule)
-
-        assert len(registry.list_rules(RuleCategory.CONNECTION)) == 1
-
-        registry.unregister(key)
-        assert len(registry.list_rules(RuleCategory.CONNECTION)) == 0
-
-    def test_immutable_rule_protection(self):
-        """Test that immutable rules cannot be overridden."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-
-        rule = MockConnectionRule("immutable_rule")
-        key = RuleKey(
-            name="immutable_rule",
-            category=RuleCategory.CONNECTION,
-            priority=RulePriority.NORMAL,
-            immutable=True,
-        )
-        registry.register_connection_rule(key, rule)
-
-        # Try to register another rule with same key
-        rule2 = MockConnectionRule("immutable_rule", allow_all=False)
-        with pytest.raises(RuntimeError, match="Cannot override immutable rule"):
-            registry.register_connection_rule(key, rule2, override=True)
-
-    def test_freeze_registry(self):
-        """Test freezing the registry."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-
-        rule = MockConnectionRule("test_rule")
-        key = RuleKey(
-            name="test_rule",
-            category=RuleCategory.CONNECTION,
-            priority=RulePriority.NORMAL,
-        )
-        registry.register_connection_rule(key, rule)
-
-        registry.freeze()
-        assert registry.is_frozen()
-
-        # Try to override existing rule (should fail when frozen)
-        rule2 = MockConnectionRule("test_rule", allow_all=False)
-        with pytest.raises(RuntimeError, match="Registry is frozen"):
-            registry.register_connection_rule(key, rule2, override=True)
-
-    def test_temporary_override(self):
-        """Test temporary rule override context manager."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-        registry._environment = "testing"  # Enable temporary overrides
-
-        rule = MockConnectionRule("test_rule", allow_all=True)
-        key = RuleKey(
-            name="test_rule",
-            category=RuleCategory.CONNECTION,
-            priority=RulePriority.NORMAL,
-        )
-        registry.register_connection_rule(key, rule)
-
-        # Original behavior
-        assert registry.can_connect(NodeType.PERSON_JOB, NodeType.START)
-
-        # Temporary override
-        override_rule = MockConnectionRule("test_rule", allow_all=False)
-        with registry.temporary_override({key: override_rule}):
-            assert not registry.can_connect(NodeType.PERSON_JOB, NodeType.START)
-
-        # Should be restored
-        assert registry.can_connect(NodeType.PERSON_JOB, NodeType.START)
-
-    def test_get_rule_info(self):
-        """Test getting rule information."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-
-        rule = MockConnectionRule("test_rule")
-        key = RuleKey(
-            name="test_rule",
-            category=RuleCategory.CONNECTION,
-            priority=RulePriority.HIGH,
-            description="Test description",
-        )
-        registry.register_connection_rule(key, rule)
-
-        info = registry.get_rule_info(key)
-        assert info is not None
-        assert info["name"] == "test_rule"
-        assert info["category"] == "connection"
-        assert info["priority"] == "HIGH"
-        assert info["description"] == "Test description"
-
-    def test_connection_constraints(self):
-        """Test getting connection constraints for a node type."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-
-        # Register a simple rule
-        rule = MockConnectionRule("simple", allow_all=True)
-        key = RuleKey(
-            name="simple",
-            category=RuleCategory.CONNECTION,
-            priority=RulePriority.NORMAL,
-        )
-        registry.register_connection_rule(key, rule)
-
-        constraints = registry.get_connection_constraints(NodeType.PERSON_JOB)
-
-        assert "can_receive_from" in constraints
-        assert "can_send_to" in constraints
-        assert isinstance(constraints["can_receive_from"], list)
-        assert isinstance(constraints["can_send_to"], list)
-
-    def test_audit_trail(self):
-        """Test audit trail recording."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=True)
-
-        rule = MockConnectionRule("test_rule")
-        key = RuleKey(
-            name="test_rule",
-            category=RuleCategory.CONNECTION,
-            priority=RulePriority.NORMAL,
-        )
-        registry.register_connection_rule(key, rule)
-
-        trail = registry.get_audit_trail()
-        assert len(trail) > 0
-
-        last_record = trail[-1]
-        assert last_record.rule_key == str(key)
-        assert last_record.action == "register"
-        assert last_record.success is True
-
-    def test_override_without_permission(self):
-        """Test that overriding without permission fails."""
-        registry = ExecutionRuleRegistry(allow_override=False, enable_audit=False)
-
-        rule = MockConnectionRule("test_rule")
-        key = RuleKey(
-            name="test_rule",
-            category=RuleCategory.CONNECTION,
-            priority=RulePriority.NORMAL,
-        )
-        registry.register_connection_rule(key, rule)
-
-        # Try to register another rule with same name
-        rule2 = MockConnectionRule("test_rule", allow_all=False)
-        with pytest.raises(RuntimeError, match="without override=True"):
-            registry.register_connection_rule(key, rule2)
-
-    def test_wrong_category_error(self):
-        """Test that using wrong category raises error."""
-        registry = ExecutionRuleRegistry(allow_override=True, enable_audit=False)
-
-        rule = MockConnectionRule("test_rule")
-        key = RuleKey(
-            name="test_rule",
-            category=RuleCategory.TRANSFORM,  # Wrong category for connection rule
-            priority=RulePriority.NORMAL,
-        )
-
-        with pytest.raises(ValueError, match="category must be CONNECTION"):
-            registry.register_connection_rule(key, rule)
diff --git a/uv.lock b/uv.lock
index d47ea2d9a..24eccc4ee 100644
--- a/uv.lock
+++ b/uv.lock
@@ -9,6 +9,7 @@ resolution-markers = [
 [manifest]
 members = [
     "dipeo",
+    "dipeo-cli",
     "dipeo-monorepo",
     "dipeo-server",
 ]
@@ -418,12 +419,16 @@ wheels = [
 
 [[package]]
 name = "claude-agent-sdk"
-version = "0.1.0"
-source = { git = "https://github.com/anthropics/claude-agent-sdk-python.git#2a9693e2585228e956ab240c1fad557d4eb510d4" }
+version = "0.1.6"
+source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "mcp" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/4f/b6/b73279eb875333fcc3e14c28fc080f815abaf35d2a65b132be0c8b05851c/claude_agent_sdk-0.1.6.tar.gz", hash = "sha256:3090a595896d65a5d951e158e191b462759aafc97399e700e4f857d5265a8f23", size = 49328, upload-time = "2025-10-31T05:15:55.803Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cc/12/38e4e9f7f79f2c04c7be34cd995ef4a56681f8266e382a5288ce815ede71/claude_agent_sdk-0.1.6-py3-none-any.whl", hash = "sha256:54227b096e8c7cfb60fc8b570082fce1f91ea060413092f65a08a2824cb9cb4b", size = 36369, upload-time = "2025-10-31T05:15:54.767Z" },
+]
 
 [[package]]
 name = "click"
@@ -656,6 +661,11 @@ name = "dipeo"
 version = "1.0.0"
 source = { editable = "dipeo" }
 
+[[package]]
+name = "dipeo-cli"
+version = "1.0.0"
+source = { editable = "cli" }
+
 [[package]]
 name = "dipeo-monorepo"
 version = "1.0.0"
@@ -797,12 +807,12 @@ requires-dist = [
     { name = "cachetools", specifier = "==5.5.2" },
     { name = "certifi", specifier = "==2025.6.15" },
     { name = "charset-normalizer", specifier = "==3.4.2" },
-    { name = "claude-agent-sdk", git = "https://github.com/anthropics/claude-agent-sdk-python.git" },
+    { name = "claude-agent-sdk", specifier = "==0.1.6" },
     { name = "click", specifier = "~=8.2.1" },
     { name = "datamodel-code-generator", specifier = ">=0.21.0" },
     { name = "dependency-injector", specifier = "==4.48.1" },
     { name = "dill", specifier = "==0.3.8" },
-    { name = "dipeo-server", editable = "apps/server" },
+    { name = "dipeo-server", editable = "server" },
     { name = "distro", specifier = "==1.9.0" },
     { name = "fastapi", specifier = "==0.115.13" },
     { name = "filelock", specifier = "==3.18.0" },
@@ -904,7 +914,7 @@ dev = []
 [[package]]
 name = "dipeo-server"
 version = "1.0.0"
-source = { editable = "apps/server" }
+source = { editable = "server" }
 
 [[package]]
 name = "distlib"