Skip to content

Commit 98bf5aa

Browse files
committed
wip first crack at scenario and runner
1 parent fb34825 commit 98bf5aa

File tree

9 files changed

+520
-20
lines changed

9 files changed

+520
-20
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
node_modules
2+
results/

package-lock.json

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
"typescript": "^5.3.0"
1616
},
1717
"dependencies": {
18-
"@modelcontextprotocol/sdk": "^1.20.1"
18+
"@modelcontextprotocol/sdk": "^1.20.1",
19+
"express": "^5.1.0",
20+
"zod": "^3.25.76"
1921
}
2022
}

src/runner.ts

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,4 @@
1-
import { createClientInitializationCheck, createServerInfoCheck } from './checks.ts';
1+
import { runConformanceTest } from './runner/index.js';
22

3-
const initializeRequest = {
4-
protocolVersion: '2025-06-18',
5-
clientInfo: {
6-
name: 'ExampleClient',
7-
version: '1.0.0'
8-
}
9-
};
10-
11-
const checks = [
12-
createClientInitializationCheck(initializeRequest),
13-
createServerInfoCheck({
14-
name: 'ExampleMCPServer',
15-
version: '1.0.0'
16-
})
17-
];
18-
19-
console.log(JSON.stringify(checks, null, 2));
3+
export { runConformanceTest };
4+
export type { ScenarioConfig } from './runner/index.js';

src/runner/DESIGN.md

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# Conformance Test Runner Design
2+
3+
## Overview
4+
5+
The conformance test runner is a framework for testing MCP client implementations against the MCP specification. It executes client implementations in controlled scenarios and validates their behavior through a series of conformance checks.
6+
7+
## Architecture
8+
9+
### Components
10+
11+
1. **Runner** - Orchestrates test execution
12+
2. **Test Server** - MCP server (and optionally auth server for auth scenarios)
13+
3. **Client Process** - The MCP client implementation under test
14+
4. **Scenario** - A specific test case with expected behaviors
15+
5. **Checks** - Validation functions that produce ConformanceCheck results
16+
17+
### Execution Flow
18+
19+
```
20+
1. Runner starts test server(s) on available port(s)
21+
2. Runner spawns client process with server URL as final argument
22+
3. Server captures MCP interactions
23+
4. Runner captures client stdout/stderr
24+
5. Scenario-specific checks are executed
25+
6. Results are written to results/<scenario>-<timestamp>/
26+
```
27+
28+
## CLI Interface
29+
30+
```bash
31+
npm run start -- --command "tsx examples/clients/typescript/test1.ts --verbose" --scenario initialize
32+
```
33+
34+
### Arguments
35+
36+
- `--command` - The command to run the client (can include existing flags)
37+
- `--scenario` - The scenario to test (e.g., "initialize", "list-tools", "call-tool")
38+
39+
The runner will append the server URL as the final argument to the command.
40+
41+
## Scenarios
42+
43+
A scenario represents a specific test case that validates one or more aspects of MCP client behavior. Each scenario:
44+
45+
- Configures the test server with expected behavior
46+
- May run multiple conformance checks
47+
- Validates both client behavior and server responses
48+
49+
Example scenarios:
50+
- `initialize` - Tests client initialization handshake
51+
- `list-tools` - Tests tool discovery
52+
- `call-tool` - Tests tool invocation
53+
- `auth-flow` - Tests OAuth/authorization flows (requires auth server)
54+
55+
## Output Structure
56+
57+
Results are written to: `results/<scenario>-<timestamp>/`
58+
59+
Files:
60+
- `checks.json` - Array of ConformanceCheck objects with validation results
61+
- `stdout.txt` - Complete stdout from the client process
62+
- `stderr.txt` - Complete stderr from the client process
63+
64+
### checks.json Format
65+
66+
```json
67+
[
68+
{
69+
"id": "mcp-client-initialization",
70+
"name": "MCPClientInitialization",
71+
"description": "Validates that MCP client properly initializes with server",
72+
"status": "SUCCESS",
73+
"timestamp": "2024-10-29T14:30:00.000Z",
74+
"specReferences": [
75+
{
76+
"id": "MCP-Lifecycle",
77+
"url": "https://modelcontextprotocol.io/specification/2025-06-18/basic/lifecycle"
78+
}
79+
],
80+
"details": { ... }
81+
}
82+
]
83+
```
84+
85+
## Future Enhancements
86+
87+
- **Test Suites** - Group multiple scenarios for convenience
88+
- **Configurable Timeouts** - Per-scenario timeout configuration
89+
- **Parallel Execution** - Run multiple scenarios concurrently
90+
- **Custom Servers** - Allow custom MCP server implementations for advanced scenarios
91+
- **Report Formats** - HTML, Markdown, or other human-readable formats

src/runner/index.ts

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
import { spawn } from 'child_process';
2+
import { promises as fs } from 'fs';
3+
import path from 'path';
4+
import { ConformanceCheck } from '../types.js';
5+
import { getScenario } from '../scenarios/index.js';
6+
7+
export interface ClientExecutionResult {
8+
exitCode: number;
9+
stdout: string;
10+
stderr: string;
11+
timedOut: boolean;
12+
}
13+
14+
async function ensureResultsDir(): Promise<string> {
15+
const resultsDir = path.join(process.cwd(), 'results');
16+
await fs.mkdir(resultsDir, { recursive: true });
17+
return resultsDir;
18+
}
19+
20+
function createResultDir(scenario: string): string {
21+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
22+
return path.join('results', `${scenario}-${timestamp}`);
23+
}
24+
25+
async function executeClient(
26+
command: string,
27+
serverUrl: string,
28+
timeout: number = 30000
29+
): Promise<ClientExecutionResult> {
30+
const commandParts = command.split(' ');
31+
const executable = commandParts[0];
32+
const args = [...commandParts.slice(1), serverUrl];
33+
34+
let stdout = '';
35+
let stderr = '';
36+
let timedOut = false;
37+
38+
return new Promise((resolve) => {
39+
const process = spawn(executable, args, {
40+
shell: true,
41+
stdio: 'pipe'
42+
});
43+
44+
const timeoutHandle = setTimeout(() => {
45+
timedOut = true;
46+
process.kill();
47+
}, timeout);
48+
49+
if (process.stdout) {
50+
process.stdout.on('data', (data) => {
51+
stdout += data.toString();
52+
});
53+
}
54+
55+
if (process.stderr) {
56+
process.stderr.on('data', (data) => {
57+
stderr += data.toString();
58+
});
59+
}
60+
61+
process.on('close', (code) => {
62+
clearTimeout(timeoutHandle);
63+
resolve({
64+
exitCode: code || 0,
65+
stdout,
66+
stderr,
67+
timedOut
68+
});
69+
});
70+
71+
process.on('error', (error) => {
72+
clearTimeout(timeoutHandle);
73+
resolve({
74+
exitCode: -1,
75+
stdout,
76+
stderr: stderr + `\nProcess error: ${error.message}`,
77+
timedOut
78+
});
79+
});
80+
});
81+
}
82+
83+
export async function runConformanceTest(
84+
clientCommand: string,
85+
scenarioName: string,
86+
timeout: number = 30000
87+
): Promise<{
88+
checks: ConformanceCheck[];
89+
clientOutput: ClientExecutionResult;
90+
resultDir: string;
91+
}> {
92+
await ensureResultsDir();
93+
const resultDir = createResultDir(scenarioName);
94+
await fs.mkdir(resultDir, { recursive: true });
95+
96+
const scenario = getScenario(scenarioName);
97+
if (!scenario) {
98+
throw new Error(`Unknown scenario: ${scenarioName}`);
99+
}
100+
101+
console.log(`Starting scenario: ${scenarioName}`);
102+
const urls = await scenario.start();
103+
104+
console.log(`Executing client: ${clientCommand} ${urls.serverUrl}`);
105+
106+
try {
107+
const clientOutput = await executeClient(clientCommand, urls.serverUrl, timeout);
108+
109+
const checks = scenario.getChecks();
110+
111+
await fs.writeFile(
112+
path.join(resultDir, 'checks.json'),
113+
JSON.stringify(checks, null, 2)
114+
);
115+
116+
await fs.writeFile(
117+
path.join(resultDir, 'stdout.txt'),
118+
clientOutput.stdout
119+
);
120+
121+
await fs.writeFile(
122+
path.join(resultDir, 'stderr.txt'),
123+
clientOutput.stderr
124+
);
125+
126+
console.log(`Results saved to ${resultDir}`);
127+
128+
return {
129+
checks,
130+
clientOutput,
131+
resultDir
132+
};
133+
} finally {
134+
await scenario.stop();
135+
}
136+
}
137+
138+
async function main(): Promise<void> {
139+
const args = process.argv.slice(2);
140+
let command: string | null = null;
141+
let scenario: string | null = null;
142+
143+
for (let i = 0; i < args.length; i++) {
144+
if (args[i] === '--command' && i + 1 < args.length) {
145+
command = args[i + 1];
146+
i++;
147+
} else if (args[i] === '--scenario' && i + 1 < args.length) {
148+
scenario = args[i + 1];
149+
i++;
150+
}
151+
}
152+
153+
if (!command || !scenario) {
154+
console.error('Usage: runner --command "<command>" --scenario <scenario>');
155+
console.error('Example: runner --command "tsx examples/clients/typescript/test1.ts" --scenario initialize');
156+
process.exit(1);
157+
}
158+
159+
try {
160+
const result = await runConformanceTest(command, scenario);
161+
162+
const passed = result.checks.filter(c => c.status === 'SUCCESS').length;
163+
const failed = result.checks.filter(c => c.status === 'FAILURE').length;
164+
165+
console.log(`\nTest Results:`);
166+
console.log(`Passed: ${passed}/${result.checks.length}`);
167+
console.log(`Failed: ${failed}/${result.checks.length}`);
168+
169+
if (failed > 0) {
170+
console.log('\nFailed Checks:');
171+
result.checks
172+
.filter(c => c.status === 'FAILURE')
173+
.forEach(c => {
174+
console.log(` - ${c.name}: ${c.description}`);
175+
if (c.errorMessage) {
176+
console.log(` Error: ${c.errorMessage}`);
177+
}
178+
});
179+
}
180+
181+
process.exit(failed > 0 ? 1 : 0);
182+
} catch (error) {
183+
console.error('Test runner error:', error);
184+
process.exit(1);
185+
}
186+
}
187+
188+
main();

src/scenarios/index.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import { Scenario } from '../types.js';
2+
3+
export const scenarios = new Map<string, Scenario>();
4+
5+
export function registerScenario(name: string, scenario: Scenario): void {
6+
scenarios.set(name, scenario);
7+
}
8+
9+
export function getScenario(name: string): Scenario | undefined {
10+
return scenarios.get(name);
11+
}
12+
13+
export function listScenarios(): string[] {
14+
return Array.from(scenarios.keys());
15+
}

0 commit comments

Comments
 (0)