PostHog
diff --git a/‎bin.ts‎
Lines changed: 13 additions & 2 deletions b/‎bin.ts‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎src/lib/__tests__/agent-interface.test.ts‎
Lines changed: 1 addition & 0 deletions b/‎src/lib/__tests__/agent-interface.test.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/lib/agent-interface.ts‎
Lines changed: 21 additions & 2 deletions b/‎src/lib/agent-interface.ts‎
Lines changed: 21 additions & 2 deletions
diff --git a/‎src/lib/agent-runner.ts‎
Lines changed: 6 additions & 2 deletions b/‎src/lib/agent-runner.ts‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎src/lib/middleware/benchmark.ts‎
Lines changed: 105 additions & 0 deletions b/‎src/lib/middleware/benchmark.ts‎
Lines changed: 105 additions & 0 deletions
@@ -18,7 +18,7 @@ if (!satisfies(process.version, NODE_VERSION_RANGE)) {
 }
 
 import { runMCPInstall, runMCPRemove } from './src/mcp';
-import type { CloudRegion, WizardOptions } from './src/utils/types';
+import type { CloudRegion } from './src/utils/types';
 import { runWizard } from './src/run';
 import { isNonInteractiveEnvironment } from './src/utils/environment';
 import clack from './src/utils/clack';
@@ -79,6 +79,11 @@ yargs(hideBin(process.argv))
         'PostHog personal API key (phx_xxx) for authentication\nenv: POSTHOG_WIZARD_API_KEY',
       type: 'string',
     },
+    'project-id': {
+      describe:
+        'PostHog project ID to use (optional; when not set, uses default from API key or OAuth)\nenv: POSTHOG_WIZARD_PROJECT_ID',
+      type: 'string',
+    },
   })
   .command(
     ['$0'],
@@ -115,6 +120,12 @@ yargs(hideBin(process.argv))
             'Show menu for manual integration selection instead of auto-detecting\nenv: POSTHOG_WIZARD_MENU',
           type: 'boolean',
         },
+        benchmark: {
+          default: false,
+          describe:
+            'Run in benchmark mode with per-phase token tracking\nenv: POSTHOG_WIZARD_BENCHMARK',
+          type: 'boolean',
+        },
       });
     },
     (argv) => {
@@ -155,7 +166,7 @@ yargs(hideBin(process.argv))
         process.exit(1);
       }
 
-      void runWizard(options as unknown as WizardOptions);
+      void runWizard(options as unknown as Parameters<typeof runWizard>[0]);
     },
   )
   .command('mcp <command>', 'MCP server management commands', (yargs) => {
 
@@ -32,6 +32,7 @@ describe('runAgent', () => {
     localMcp: false,
     ci: false,
     menu: false,
+    benchmark: false,
   };
 
   const defaultAgentConfig = {
 
@@ -5,7 +5,7 @@
 
 import path from 'path';
 import clack from '../utils/clack';
-import { debug, logToFile, initLogFile, LOG_FILE_PATH } from '../utils/debug';
+import { debug, logToFile, initLogFile, getLogFilePath } from '../utils/debug';
 import type { WizardOptions } from '../utils/types';
 import { analytics } from '../utils/analytics';
 import {
@@ -50,6 +50,8 @@ export const AgentSignals = {
   ERROR_RESOURCE_MISSING: '[ERROR-RESOURCE-MISSING]',
   /** Signal emitted when the agent provides a remark about its run */
   WIZARD_REMARK: '[WIZARD-REMARK]',
+  /** Signal prefix for benchmark logging */
+  BENCHMARK: '[BENCHMARK]',
 } as const;
 
 export type AgentSignal = (typeof AgentSignals)[keyof typeof AgentSignals];
@@ -393,7 +395,7 @@ export async function initializeAgent(
       });
     }
 
-    clack.log.step(`Verbose logs: ${LOG_FILE_PATH}`);
+    clack.log.step(`Verbose logs: ${getLogFilePath()}`);
     clack.log.success("Agent initialized. Let's get cooking!");
     return agentRunConfig;
   } catch (error) {
@@ -421,6 +423,10 @@ export async function runAgent(
     successMessage?: string;
     errorMessage?: string;
   },
+  middleware?: {
+    onMessage(message: any): void;
+    finalize(resultMessage: any, totalDurationMs: number): any;
+  },
 ): Promise<{ error?: AgentErrorType; message?: string }> {
   const {
     estimatedDurationMinutes = 8,
@@ -446,6 +452,7 @@ export async function runAgent(
   const collectedText: string[] = [];
   // Track if we received a successful result (before any cleanup errors)
   let receivedSuccessResult = false;
+  let lastResultMessage: any = null;
 
   // Workaround for SDK bug: stdin closes before canUseTool responses can be sent.
   // The fix is to use an async generator for the prompt that stays open until
@@ -505,6 +512,11 @@ export async function runAgent(
       duration_ms: durationMs,
       duration_seconds: durationSeconds,
     });
+    try {
+      middleware?.finalize(lastResultMessage, durationMs);
+    } catch (e) {
+      logToFile(`${AgentSignals.BENCHMARK} Middleware finalize error:`, e);
+    }
     spinner.stop(successMessage);
     return {};
   };
@@ -604,12 +616,19 @@ export async function runAgent(
         receivedSuccessResult,
       );
 
+      try {
+        middleware?.onMessage(message);
+      } catch (e) {
+        logToFile(`${AgentSignals.BENCHMARK} Middleware onMessage error:`, e);
+      }
+
       // Signal completion when result received
       if (message.type === 'result') {
         // Track successful results before any potential cleanup errors
         // The SDK may emit a second error result during cleanup due to a race condition
         if (message.subtype === 'success' && !message.is_error) {
           receivedSuccessResult = true;
+          lastResultMessage = message;
         }
         signalDone!();
       }
 
@@ -35,6 +35,7 @@ import {
 } from '../steps';
 import { checkAnthropicStatusWithPrompt } from '../utils/anthropic-status';
 import { enableDebugLogs } from '../utils/debug';
+import { createBenchmarkPipeline } from './middleware/benchmark';
 
 /**
  * Universal agent-powered wizard runner.
@@ -159,7 +160,6 @@ export async function runAgentWizard(
     analytics.setTag(key, value);
   });
 
-  // Build integration prompt
   const integrationPrompt = buildIntegrationPrompt(
     config,
     {
@@ -197,6 +197,10 @@ export async function runAgentWizard(
     options,
   );
 
+  const middleware = options.benchmark
+    ? createBenchmarkPipeline(spinner, options)
+    : undefined;
+
   const agentResult = await runAgent(
     agent,
     integrationPrompt,
@@ -208,6 +212,7 @@ export async function runAgentWizard(
       successMessage: config.ui.successMessage,
       errorMessage: 'Integration failed',
     },
+    middleware,
   );
 
   // Handle error cases detected in agent output
@@ -355,7 +360,6 @@ ${chalk.dim(`How did this work for you? Drop us a line: wizard@posthog.com`)}`;
 
 /**
  * Build the integration prompt for the agent.
- * Uses shared base prompt with optional framework-specific addendum.
  */
 function buildIntegrationPrompt(
   config: FrameworkConfig,
 
@@ -0,0 +1,105 @@
+/**
+ * Benchmark tracking for wizard runs.
+ *
+ * Usage:
+ *   const pipeline = createBenchmarkPipeline(spinner, options);
+ *   pipeline.onMessage(message);
+ *   pipeline.finalize(resultMessage, durationMs);
+ */
+
+import chalk from 'chalk';
+import clack from '../../utils/clack';
+import { logToFile, getLogFilePath, configureLogFile } from '../../utils/debug';
+import { MiddlewarePipeline } from './pipeline';
+import { PhaseDetector } from './phase-detector';
+import { loadBenchmarkConfig } from './config';
+import { createPluginsFromConfig } from './benchmarks';
+import type { BenchmarkConfig } from './config';
+import type { WizardOptions } from '../../utils/types';
+import { AgentSignals } from '../agent-interface';
+
+// ── Types ──────────────────────────────────────────────────────────────
+
+export interface StepUsage {
+  name: string;
+  usage: {
+    input_tokens: number;
+    output_tokens: number;
+    cache_creation_input_tokens: number;
+    cache_read_input_tokens: number;
+    cache_creation?: {
+      ephemeral_5m_input_tokens: number;
+      ephemeral_1h_input_tokens: number;
+    };
+  };
+  modelUsage: Record<string, unknown>;
+  totalCostUsd: number;
+  durationMs: number;
+  durationApiMs: number;
+  numTurns: number;
+  contextTokensIn?: number;
+  contextTokensOut?: number;
+  compactions?: number;
+  compactionPreTokens?: number[];
+}
+
+export interface BenchmarkData {
+  timestamp: string;
+  steps: StepUsage[];
+  totals: {
+    totalCostUsd: number;
+    durationMs: number;
+    inputTokens: number;
+    outputTokens: number;
+    numTurns: number;
+    totalCompactions: number;
+    totalCacheReadTokens: number;
+    totalCacheCreation5mTokens: number;
+    totalCacheCreation1hTokens: number;
+  };
+}
+
+// ── Factory ────────────────────────────────────────────────────────────
+
+/**
+ * Create a middleware pipeline configured for benchmarking.
+ * Loads .benchmark-config.json from the install dir, falls back to defaults.
+ */
+export function createBenchmarkPipeline(
+  spinner: ReturnType<typeof clack.spinner>,
+  options: WizardOptions,
+  configOverride?: BenchmarkConfig,
+): MiddlewarePipeline {
+  const config = configOverride ?? loadBenchmarkConfig(options.installDir);
+
+  configureLogFile({
+    path: config.output.logPath,
+    enabled: config.output.logEnabled,
+  });
+
+  const plugins = createPluginsFromConfig(config, {
+    spinner,
+    phased: false,
+    outputPath: config.output.benchmarkPath,
+  });
+
+  if (!config.output.suppressWizardLogs) {
+    clack.log.info(
+      `${chalk.cyan(AgentSignals.BENCHMARK)} Verbose logs: ${getLogFilePath()}`,
+    );
+    clack.log.info(
+      `${chalk.cyan(
+        AgentSignals.BENCHMARK,
+      )} Benchmark data will be written to: ${config.output.benchmarkPath}`,
+    );
+  }
+
+  logToFile(
+    `${AgentSignals.BENCHMARK} Tracking enabled, starting with setup phase`,
+  );
+
+  return new MiddlewarePipeline(plugins, {
+    phaseDetector: new PhaseDetector(),
+    autoDetectPhases: true,
+  });
+}