wip first crack at scenario and runner

pcarleton · pcarleton · commit 98bf5aaf87d2 · 2025-10-29T14:38:11.000Z
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 node_modules
+results/
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -15,6 +15,8 @@
     "typescript": "^5.3.0"
   },
   "dependencies": {
-    "@modelcontextprotocol/sdk": "^1.20.1"
+    "@modelcontextprotocol/sdk": "^1.20.1",
+    "express": "^5.1.0",
+    "zod": "^3.25.76"
   }
 }
diff --git a/src/runner.ts b/src/runner.ts
@@ -1,19 +1,4 @@
-import { createClientInitializationCheck, createServerInfoCheck } from './checks.ts';
+import { runConformanceTest } from './runner/index.js';
 
-const initializeRequest = {
-  protocolVersion: '2025-06-18',
-  clientInfo: {
-    name: 'ExampleClient',
-    version: '1.0.0'
-  }
-};
-
-const checks = [
-  createClientInitializationCheck(initializeRequest),
-  createServerInfoCheck({
-    name: 'ExampleMCPServer',
-    version: '1.0.0'
-  })
-];
-
-console.log(JSON.stringify(checks, null, 2));
+export { runConformanceTest };
+export type { ScenarioConfig } from './runner/index.js';
diff --git a/src/runner/DESIGN.md b/src/runner/DESIGN.md
@@ -0,0 +1,91 @@
+# Conformance Test Runner Design
+
+## Overview
+
+The conformance test runner is a framework for testing MCP client implementations against the MCP specification. It executes client implementations in controlled scenarios and validates their behavior through a series of conformance checks.
+
+## Architecture
+
+### Components
+
+1. **Runner** - Orchestrates test execution
+2. **Test Server** - MCP server (and optionally auth server for auth scenarios)
+3. **Client Process** - The MCP client implementation under test
+4. **Scenario** - A specific test case with expected behaviors
+5. **Checks** - Validation functions that produce ConformanceCheck results
+
+### Execution Flow
+
+```
+1. Runner starts test server(s) on available port(s)
+2. Runner spawns client process with server URL as final argument
+3. Server captures MCP interactions
+4. Runner captures client stdout/stderr
+5. Scenario-specific checks are executed
+6. Results are written to results/<scenario>-<timestamp>/
+```
+
+## CLI Interface
+
+```bash
+npm run start -- --command "tsx examples/clients/typescript/test1.ts --verbose" --scenario initialize
+```
+
+### Arguments
+
+- `--command` - The command to run the client (can include existing flags)
+- `--scenario` - The scenario to test (e.g., "initialize", "list-tools", "call-tool")
+
+The runner will append the server URL as the final argument to the command.
+
+## Scenarios
+
+A scenario represents a specific test case that validates one or more aspects of MCP client behavior. Each scenario:
+
+- Configures the test server with expected behavior
+- May run multiple conformance checks
+- Validates both client behavior and server responses
+
+Example scenarios:
+- `initialize` - Tests client initialization handshake
+- `list-tools` - Tests tool discovery
+- `call-tool` - Tests tool invocation
+- `auth-flow` - Tests OAuth/authorization flows (requires auth server)
+
+## Output Structure
+
+Results are written to: `results/<scenario>-<timestamp>/`
+
+Files:
+- `checks.json` - Array of ConformanceCheck objects with validation results
+- `stdout.txt` - Complete stdout from the client process
+- `stderr.txt` - Complete stderr from the client process
+
+### checks.json Format
+
+```json
+[
+  {
+    "id": "mcp-client-initialization",
+    "name": "MCPClientInitialization",
+    "description": "Validates that MCP client properly initializes with server",
+    "status": "SUCCESS",
+    "timestamp": "2024-10-29T14:30:00.000Z",
+    "specReferences": [
+      {
+        "id": "MCP-Lifecycle",
+        "url": "https://modelcontextprotocol.io/specification/2025-06-18/basic/lifecycle"
+      }
+    ],
+    "details": { ... }
+  }
+]
+```
+
+## Future Enhancements
+
+- **Test Suites** - Group multiple scenarios for convenience
+- **Configurable Timeouts** - Per-scenario timeout configuration
+- **Parallel Execution** - Run multiple scenarios concurrently
+- **Custom Servers** - Allow custom MCP server implementations for advanced scenarios
+- **Report Formats** - HTML, Markdown, or other human-readable formats
diff --git a/src/runner/index.ts b/src/runner/index.ts
@@ -0,0 +1,188 @@
+import { spawn } from 'child_process';
+import { promises as fs } from 'fs';
+import path from 'path';
+import { ConformanceCheck } from '../types.js';
+import { getScenario } from '../scenarios/index.js';
+
+export interface ClientExecutionResult {
+  exitCode: number;
+  stdout: string;
+  stderr: string;
+  timedOut: boolean;
+}
+
+async function ensureResultsDir(): Promise<string> {
+  const resultsDir = path.join(process.cwd(), 'results');
+  await fs.mkdir(resultsDir, { recursive: true });
+  return resultsDir;
+}
+
+function createResultDir(scenario: string): string {
+  const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
+  return path.join('results', `${scenario}-${timestamp}`);
+}
+
+async function executeClient(
+  command: string,
+  serverUrl: string,
+  timeout: number = 30000
+): Promise<ClientExecutionResult> {
+  const commandParts = command.split(' ');
+  const executable = commandParts[0];
+  const args = [...commandParts.slice(1), serverUrl];
+
+  let stdout = '';
+  let stderr = '';
+  let timedOut = false;
+
+  return new Promise((resolve) => {
+    const process = spawn(executable, args, {
+      shell: true,
+      stdio: 'pipe'
+    });
+
+    const timeoutHandle = setTimeout(() => {
+      timedOut = true;
+      process.kill();
+    }, timeout);
+
+    if (process.stdout) {
+      process.stdout.on('data', (data) => {
+        stdout += data.toString();
+      });
+    }
+
+    if (process.stderr) {
+      process.stderr.on('data', (data) => {
+        stderr += data.toString();
+      });
+    }
+
+    process.on('close', (code) => {
+      clearTimeout(timeoutHandle);
+      resolve({
+        exitCode: code || 0,
+        stdout,
+        stderr,
+        timedOut
+      });
+    });
+
+    process.on('error', (error) => {
+      clearTimeout(timeoutHandle);
+      resolve({
+        exitCode: -1,
+        stdout,
+        stderr: stderr + `\nProcess error: ${error.message}`,
+        timedOut
+      });
+    });
+  });
+}
+
+export async function runConformanceTest(
+  clientCommand: string,
+  scenarioName: string,
+  timeout: number = 30000
+): Promise<{
+  checks: ConformanceCheck[];
+  clientOutput: ClientExecutionResult;
+  resultDir: string;
+}> {
+  await ensureResultsDir();
+  const resultDir = createResultDir(scenarioName);
+  await fs.mkdir(resultDir, { recursive: true });
+
+  const scenario = getScenario(scenarioName);
+  if (!scenario) {
+    throw new Error(`Unknown scenario: ${scenarioName}`);
+  }
+
+  console.log(`Starting scenario: ${scenarioName}`);
+  const urls = await scenario.start();
+
+  console.log(`Executing client: ${clientCommand} ${urls.serverUrl}`);
+
+  try {
+    const clientOutput = await executeClient(clientCommand, urls.serverUrl, timeout);
+
+    const checks = scenario.getChecks();
+
+    await fs.writeFile(
+      path.join(resultDir, 'checks.json'),
+      JSON.stringify(checks, null, 2)
+    );
+
+    await fs.writeFile(
+      path.join(resultDir, 'stdout.txt'),
+      clientOutput.stdout
+    );
+
+    await fs.writeFile(
+      path.join(resultDir, 'stderr.txt'),
+      clientOutput.stderr
+    );
+
+    console.log(`Results saved to ${resultDir}`);
+
+    return {
+      checks,
+      clientOutput,
+      resultDir
+    };
+  } finally {
+    await scenario.stop();
+  }
+}
+
+async function main(): Promise<void> {
+  const args = process.argv.slice(2);
+  let command: string | null = null;
+  let scenario: string | null = null;
+
+  for (let i = 0; i < args.length; i++) {
+    if (args[i] === '--command' && i + 1 < args.length) {
+      command = args[i + 1];
+      i++;
+    } else if (args[i] === '--scenario' && i + 1 < args.length) {
+      scenario = args[i + 1];
+      i++;
+    }
+  }
+
+  if (!command || !scenario) {
+    console.error('Usage: runner --command "<command>" --scenario <scenario>');
+    console.error('Example: runner --command "tsx examples/clients/typescript/test1.ts" --scenario initialize');
+    process.exit(1);
+  }
+
+  try {
+    const result = await runConformanceTest(command, scenario);
+
+    const passed = result.checks.filter(c => c.status === 'SUCCESS').length;
+    const failed = result.checks.filter(c => c.status === 'FAILURE').length;
+
+    console.log(`\nTest Results:`);
+    console.log(`Passed: ${passed}/${result.checks.length}`);
+    console.log(`Failed: ${failed}/${result.checks.length}`);
+
+    if (failed > 0) {
+      console.log('\nFailed Checks:');
+      result.checks
+        .filter(c => c.status === 'FAILURE')
+        .forEach(c => {
+          console.log(`  - ${c.name}: ${c.description}`);
+          if (c.errorMessage) {
+            console.log(`    Error: ${c.errorMessage}`);
+          }
+        });
+    }
+
+    process.exit(failed > 0 ? 1 : 0);
+  } catch (error) {
+    console.error('Test runner error:', error);
+    process.exit(1);
+  }
+}
+
+main();
diff --git a/src/scenarios/index.ts b/src/scenarios/index.ts
@@ -0,0 +1,15 @@
+import { Scenario } from '../types.js';
+
+export const scenarios = new Map<string, Scenario>();
+
+export function registerScenario(name: string, scenario: Scenario): void {
+  scenarios.set(name, scenario);
+}
+
+export function getScenario(name: string): Scenario | undefined {
+  return scenarios.get(name);
+}
+
+export function listScenarios(): string[] {
+  return Array.from(scenarios.keys());
+}
diff --git a/src/scenarios/initialize/TODO.md b/src/scenarios/initialize/TODO.md
diff --git a/src/types.ts b/src/types.ts

Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,8 @@`
`15`	`15`	`"typescript": "^5.3.0"`
`16`	`16`	`},`
`17`	`17`	`"dependencies": {`
`18`		`- "@modelcontextprotocol/sdk": "^1.20.1"`
	`18`	`+ "@modelcontextprotocol/sdk": "^1.20.1",`
	`19`	`+ "express": "^5.1.0",`
	`20`	`+ "zod": "^3.25.76"`
`19`	`21`	`}`
`20`	`22`	`}`