feat: Fault Proof support (Phase 1 & 2)

theo-learner · theo-learner · commit e656abafc83e · 2026-02-20T02:52:27.000+09:00
Phase 1 - Challenger EOA Monitoring:
- Add op-challenger component to OP Stack topology
- Implement Challenger EOA balance monitoring
- Add auto-refill playbooks for critical balance
- Update component dependencies and K8s configs

Phase 2 - Dispute Game &amp; Bond Management:
- Implement DisputeGameMonitor for on-chain game tracking
- Implement BondManager for automatic bond claims
- Add playbooks: deadline alerts, unclaimed bonds, proof timeouts
- Add dispute game metrics and monitoring

New Playbooks:
- challenger-balance-low (Phase 1)
- challenger-balance-critical with auto-refill (Phase 1)
- op-challenger-failure detection (Phase 1)
- dispute-game-deadline-near (Phase 2)
- unclaimed-bond-alert with auto-claim (Phase 2)
- proof-generation-timeout (Phase 2)

Configuration:
- Add FAULT_PROOF_ENABLED flag
- Add CHALLENGER_EOA_ADDRESS / CHALLENGER_PRIVATE_KEY
- Add DISPUTE_GAME_FACTORY_ADDRESS
- Add bond and deadline threshold configs

AI Prompts:
- Update RCA system prompt with op-challenger component
- Add Fault Proof failure patterns

Note: Phase 2 contracts require DisputeGameFactory ABI
for full on-chain integration (marked as TODO).
diff --git a/.env.local.sample b/.env.local.sample
@@ -49,10 +49,12 @@ AWS_CLUSTER_NAME=my-cluster-name
 # 1. Manual EOA addresses (if you know the addresses)
 #    BATCHER_EOA_ADDRESS=0x...     # Batcher EOA address to monitor
 #    PROPOSER_EOA_ADDRESS=0x...    # Proposer EOA address to monitor
+#    CHALLENGER_EOA_ADDRESS=0x...  # Challenger EOA address (Fault Proof only)
 #
 # 2. Private key derivation (derive EOA from private key, no L1 RPC required)
 #    BATCHER_PRIVATE_KEY=0x...     # Batcher private key → derive EOA address
 #    PROPOSER_PRIVATE_KEY=0x...    # Proposer private key → derive EOA address
+#    CHALLENGER_PRIVATE_KEY=0x...  # Challenger private key (Fault Proof only)
 #
 # Auto-refill Configuration (requires treasury wallet):
 # TREASURY_PRIVATE_KEY=0x...    # Treasury wallet private key for auto-refill (omit for monitor-only)
@@ -115,6 +117,26 @@ AWS_CLUSTER_NAME=my-cluster-name
 # LLM_TEST_PARALLELISM_DEFAULT=5                                    # Default concurrent requests
 # LLM_TEST_OUTPUT_DIR=src/lib/__tests__/llm-stress-test/output      # Result output directory
 
+# === Fault Proof (Optional - Phase 1 & 2) ===
+# Enable Fault Proof monitoring for OP Stack deployments with dispute game system.
+# Requires op-challenger component to be deployed.
+#
+# Phase 1: Challenger EOA monitoring + auto-refill
+# FAULT_PROOF_ENABLED=true                   # Enable Fault Proof features
+# CHALLENGER_EOA_ADDRESS=0x...               # Challenger EOA to monitor (see EOA section above)
+# CHALLENGER_PRIVATE_KEY=0x...               # For bond claims + balance derivation
+#
+# Phase 2: Dispute Game monitoring + Bond management
+# DISPUTE_GAME_FACTORY_ADDRESS=0x...         # L1 DisputeGameFactory contract address
+# GAME_DEADLINE_ALERT_HOURS=24               # Alert when game deadline < N hours (default: 24)
+# AUTO_BOND_CLAIM=true                       # Auto-claim bonds from won games (default: false)
+#
+# Bond thresholds (Challenger-specific):
+# CHALLENGER_BOND_MIN_ETH=0.8                # Minimum total balance for bond participation (10 games × 0.08)
+# CHALLENGER_BOND_REFILL_ETH=1.0             # Auto-refill amount when balance critical
+# CHALLENGER_BALANCE_WARNING_ETH=0.5         # Warning threshold
+# CHALLENGER_BALANCE_CRITICAL_ETH=0.1        # Critical threshold (triggers auto-refill)
+
 # === Display (Optional) ===
 # NEXT_PUBLIC_NETWORK_NAME=Thanos Sepolia   # Network name shown in dashboard header
 
diff --git a/src/chains/thanos/components.ts b/src/chains/thanos/components.ts
@@ -13,7 +13,7 @@ import type {
 // Component Names
 // ============================================================
 
-export const OP_COMPONENTS = ['op-geth', 'op-node', 'op-batcher', 'op-proposer'] as const;
+export const OP_COMPONENTS = ['op-geth', 'op-node', 'op-batcher', 'op-proposer', 'op-challenger'] as const;
 export type OptimismComponent = (typeof OP_COMPONENTS)[number];
 
 export const META_COMPONENTS = ['l1', 'system'] as const;
@@ -29,7 +29,7 @@ export const DEPENDENCY_GRAPH: Record<string, ComponentDependency> = {
   },
   'op-node': {
     dependsOn: ['l1'],
-    feeds: ['op-geth', 'op-batcher', 'op-proposer'],
+    feeds: ['op-geth', 'op-batcher', 'op-proposer', 'op-challenger'],
   },
   'op-batcher': {
     dependsOn: ['op-node', 'l1'],
@@ -39,13 +39,17 @@ export const DEPENDENCY_GRAPH: Record<string, ComponentDependency> = {
     dependsOn: ['op-node', 'l1'],
     feeds: [],
   },
+  'op-challenger': {
+    dependsOn: ['op-node', 'l1'],
+    feeds: [],
+  },
   'l1': {
     dependsOn: [],
-    feeds: ['op-node', 'op-batcher', 'op-proposer'],
+    feeds: ['op-node', 'op-batcher', 'op-proposer', 'op-challenger'],
   },
   'system': {
     dependsOn: [],
-    feeds: ['op-geth', 'op-node', 'op-batcher', 'op-proposer'],
+    feeds: ['op-geth', 'op-node', 'op-batcher', 'op-proposer', 'op-challenger'],
   },
 };
 
@@ -62,6 +66,8 @@ export const COMPONENT_ALIASES: Record<string, string> = {
   'batcher': 'op-batcher',
   'op-proposer': 'op-proposer',
   'proposer': 'op-proposer',
+  'op-challenger': 'op-challenger',
+  'challenger': 'op-challenger',
   'l1': 'l1',
   'system': 'system',
 };
@@ -98,6 +104,13 @@ export const K8S_COMPONENTS: K8sComponentConfig[] = [
     l1RpcEnvVar: 'OP_PROPOSER_L1_ETH_RPC',
     isPrimaryExecution: false,
   },
+  {
+    component: 'op-challenger',
+    labelSuffix: 'challenger',
+    statefulSetSuffix: 'op-challenger',
+    l1RpcEnvVar: 'OP_CHALLENGER_L1_ETH_RPC',
+    isPrimaryExecution: false,
+  },
 ];
 
 // ============================================================
@@ -115,7 +128,12 @@ export const EOA_CONFIGS: EOAConfig[] = [
     addressEnvVar: 'PROPOSER_EOA_ADDRESS',
     displayName: 'Proposer',
   },
+  {
+    role: 'challenger',
+    addressEnvVar: 'CHALLENGER_EOA_ADDRESS',
+    displayName: 'Challenger',
+  },
 ];
 
 /** Balance metrics for anomaly detection */
-export const BALANCE_METRICS = ['batcherBalance', 'proposerBalance'];
+export const BALANCE_METRICS = ['batcherBalance', 'proposerBalance', 'challengerBalance'];
diff --git a/src/chains/thanos/index.ts b/src/chains/thanos/index.ts
@@ -77,6 +77,9 @@ export class ThanosPlugin implements ChainPlugin {
     if (metric.includes('proposer') || metric === 'proposerBalance') {
       return 'op-proposer';
     }
+    if (metric.includes('challenger') || metric === 'challengerBalance') {
+      return 'op-challenger';
+    }
     return 'system';
   }
 
diff --git a/src/chains/thanos/playbooks.ts b/src/chains/thanos/playbooks.ts
@@ -222,4 +222,177 @@ export const THANOS_PLAYBOOKS: Playbook[] = [
     ],
     maxAttempts: 0, // Immediate escalation
   },
+
+  // Playbook 8: Challenger EOA Balance Low (Phase 1)
+  {
+    name: 'challenger-balance-low',
+    description: 'Challenger EOA balance below warning threshold',
+    trigger: {
+      component: 'op-challenger',
+      indicators: [
+        { type: 'metric', condition: 'challengerBalance < warning' },
+      ],
+    },
+    actions: [
+      {
+        type: 'escalate_operator',
+        safetyLevel: 'safe',
+        params: { urgency: 'medium', message: 'Challenger EOA balance low. Refill recommended.' },
+      },
+    ],
+    maxAttempts: 0, // Alert only
+  },
+
+  // Playbook 9: Challenger EOA Balance Critical (Phase 1)
+  {
+    name: 'challenger-balance-critical',
+    description: 'Challenger EOA balance critically low — auto-refill for dispute game participation',
+    trigger: {
+      component: 'op-challenger',
+      indicators: [
+        { type: 'metric', condition: 'challengerBalance < critical' },
+      ],
+    },
+    actions: [
+      {
+        type: 'check_treasury_balance',
+        safetyLevel: 'safe',
+      },
+      {
+        type: 'check_l1_gas_price',
+        safetyLevel: 'safe',
+      },
+      {
+        type: 'refill_eoa',
+        safetyLevel: 'guarded',
+        params: { role: 'challenger', amount: '1.0' },
+        waitAfterMs: 30000,
+      },
+      {
+        type: 'verify_balance_restored',
+        safetyLevel: 'safe',
+        params: { role: 'challenger' },
+      },
+    ],
+    fallback: [
+      {
+        type: 'escalate_operator',
+        safetyLevel: 'safe',
+        params: { urgency: 'critical', message: 'Challenger EOA refill failed. Cannot participate in dispute games — security risk!' },
+      },
+    ],
+    maxAttempts: 1,
+  },
+
+  // Playbook 10: op-challenger Component Failure (Phase 1)
+  {
+    name: 'op-challenger-failure',
+    description: 'op-challenger pod crash or proof generation failure',
+    trigger: {
+      component: 'op-challenger',
+      indicators: [
+        { type: 'log_pattern', condition: 'proof generation failed|panic|fatal error' },
+        { type: 'metric', condition: 'pod restart count > 3' },
+      ],
+    },
+    actions: [
+      {
+        type: 'collect_logs',
+        safetyLevel: 'safe',
+        target: 'op-challenger',
+        params: { lines: 500 },
+      },
+      {
+        type: 'escalate_operator',
+        safetyLevel: 'safe',
+        params: { urgency: 'critical', message: 'op-challenger failure detected. Dispute resolution compromised.' },
+      },
+    ],
+    maxAttempts: 0, // Manual intervention required
+  },
+
+  // Playbook 11: Dispute Game Deadline Approaching (Phase 2)
+  {
+    name: 'dispute-game-deadline-near',
+    description: 'Dispute game deadline approaching — requires immediate action',
+    trigger: {
+      component: 'op-challenger',
+      indicators: [
+        { type: 'metric', condition: 'gameDeadlineProximity < 1h' },
+      ],
+    },
+    actions: [
+      {
+        type: 'escalate_operator',
+        safetyLevel: 'safe',
+        params: { urgency: 'critical', message: 'Dispute game deadline < 1 hour. Immediate review required.' },
+      },
+      {
+        type: 'collect_logs',
+        safetyLevel: 'safe',
+        target: 'op-challenger',
+        params: { lines: 200 },
+      },
+    ],
+    maxAttempts: 0, // Alert only
+  },
+
+  // Playbook 12: Unclaimed Bond Alert (Phase 2)
+  {
+    name: 'unclaimed-bond-alert',
+    description: 'Bond from resolved game not claimed after 24h',
+    trigger: {
+      component: 'op-challenger',
+      indicators: [
+        { type: 'metric', condition: 'unclaimedBonds > 0 && unclaimedAge > 24h' },
+      ],
+    },
+    actions: [
+      {
+        type: 'claim_bond',
+        safetyLevel: 'guarded',
+        params: { auto: true },
+      },
+      {
+        type: 'escalate_operator',
+        safetyLevel: 'safe',
+        params: { urgency: 'medium', message: 'Auto-claiming unclaimed bonds from resolved games.' },
+      },
+    ],
+    fallback: [
+      {
+        type: 'escalate_operator',
+        safetyLevel: 'safe',
+        params: { urgency: 'high', message: 'Bond claim failed. Manual intervention required.' },
+      },
+    ],
+    maxAttempts: 2,
+  },
+
+  // Playbook 13: Proof Generation Timeout (Phase 2)
+  {
+    name: 'proof-generation-timeout',
+    description: 'Fault proof generation taking too long or stalled',
+    trigger: {
+      component: 'op-challenger',
+      indicators: [
+        { type: 'metric', condition: 'proofGenerationLatency > 300s' },
+        { type: 'log_pattern', condition: 'proof generation timeout|MIPS execution stalled' },
+      ],
+    },
+    actions: [
+      {
+        type: 'collect_logs',
+        safetyLevel: 'safe',
+        target: 'op-challenger',
+        params: { lines: 500 },
+      },
+      {
+        type: 'escalate_operator',
+        safetyLevel: 'safe',
+        params: { urgency: 'high', message: 'Proof generation timeout. Check op-program logs and game deadline.' },
+      },
+    ],
+    maxAttempts: 0, // Requires manual investigation
+  },
 ];
diff --git a/src/chains/thanos/prompts.ts b/src/chains/thanos/prompts.ts
@@ -39,10 +39,18 @@ const RCA_SYSTEM_PROMPT = `You are performing Root Cause Analysis (RCA) for a Th
    - Depends on op-node for state data and L1 for submission
    - If proposer fails: withdrawals delayed, but L2 continues operating
 
+6. **op-challenger (Fault Proof / Dispute Game)**
+   - Monitors state root proposals on L1
+   - Participates in dispute games to challenge invalid proposals
+   - Generates fault proofs via MIPS execution (op-program)
+   - Depends on op-node for L2 state verification and L1 for game participation
+   - If challenger fails: SECURITY RISK — invalid proposals may be accepted
+
 == Component Dependency Graph ==
 L1 -> op-node -> op-geth
                 -> op-batcher -> L1
                 -> op-proposer -> L1
+                -> op-challenger -> L1
 
 == Common Thanos Failure Patterns ==
 
@@ -51,6 +59,9 @@ L1 -> op-node -> op-geth
 3. **op-geth Crash / OOM**: CPU/Memory anomalies, connection refused errors
 4. **Batcher Backlog**: txpool monotonically increasing, no batch submissions
 5. **Network Partition / P2P Issues**: Peer disconnections, gossip failures
+6. **Challenger EOA Low Balance**: Cannot participate in dispute games, bond deposit fails — SECURITY CRITICAL
+7. **Proof Generation Timeout**: op-program MIPS execution stalls, game deadline missed
+8. **Dispute Game Deadline Missed**: Invalid proposal accepted, withdrawal security compromised
 
 == Your Task ==
 
diff --git a/src/lib/bond-manager.ts b/src/lib/bond-manager.ts
diff --git a/src/lib/dispute-game-monitor.ts b/src/lib/dispute-game-monitor.ts

Original file line number	Diff line number	Diff line change
`@@ -77,6 +77,9 @@ export class ThanosPlugin implements ChainPlugin {`
`77`	`77`	`if (metric.includes('proposer') \|\| metric === 'proposerBalance') {`
`78`	`78`	`return 'op-proposer';`
`79`	`79`	`}`
	`80`	`+ if (metric.includes('challenger') \|\| metric === 'challengerBalance') {`
	`81`	`+ return 'op-challenger';`
	`82`	`+ }`
`80`	`83`	`return 'system';`
`81`	`84`	`}`
`82`	`85`