Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -187,4 +187,7 @@ config.local.*
# API keys and secrets
.secrets
secrets.json
api-keys.json
api-keys.json

# MCP config with secrets
.amazonq
43 changes: 43 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# LLM Testing for Actorized MCP Servers

This directory provides a framework for automated testing of Actorized MCP servers deployed to the Apify platform.

> **⚠️ IMPORTANT NOTICE:**
>
> **Tests must NEVER be run in parallel. Only one test should run at a time for a single user. Running multiple tests concurrently may cause unpredictable results or failures. Always ensure tests are executed sequentially.**
>
> **LLM runs in YOLO mode and can do anything it wants - run commands, remove files, or perform any action. You must trust the LLM gods 🙏 that it does not brick your device. Proceed at your own risk!**

## How It Works

- MCP server configuration is added to the Amazon Q Developer CLI configuration.
- The CLI is executed with a prompt to run a simple test on all the exposed MCP server tools and then gives a final verdict based on the output.
- The CLI must output a final verdict in the format:
```
VERDICT: OK
or
VERDICT: FAILED
```
- If `VERDICT: OK` is found in the evaluation output, the test passes; otherwise, it fails.

## Usage

### Prerequisites

- [Amazon Q Developer CLI](https://github.com/aws/amazon-q-developer-cli) must be installed and configured. See the [official documentation](https://docs.aws.amazon.com/amazonq/latest/qdeveloper-ug/command-line.html).

### Running Tests

To run the tests, use the following command:

```bash
APIFY_TOKEN="your-apify-token" node run.js "<mcp-server-url>"
```

For example:
```bash
APIFY_TOKEN="your-apify-token" node run.js "https://your-mcp-server.apify.com/mcp"
APIFY_TOKEN="your-apify-token" node run.js "https://your-mcp-server.apify.com/sse"
```

The script will print the evaluation and final verdict.
176 changes: 176 additions & 0 deletions tests/run.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
/* eslint-disable no-console */
const { spawn } = require('node:child_process');

const fs = require('node:fs');
const path = require('node:path');

// Global timeout constants
const MCP_COMMAND_TIMEOUT = 120000;
const MCP_INIT_TIMEOUT = 60000;

/**
* Adds and imports an MCP server configuration.
* @param {string} url - The URL of the MCP server to evaluate.
* @param {string} apifyToken - The Apify token for authentication.
* @returns {Promise<void>}
*/
async function addAndImportMCPServer(url, apifyToken) {
const configDir = '.amazonq';
const configPath = path.join(configDir, 'mcp.json');
if (!fs.existsSync(configDir)) {
fs.mkdirSync(configDir, { recursive: true });
}
const config = {
mcpServers: {
'mcp-server-to-evaluate': {
command: 'npx',
args: [
'-y',
'mcp-remote',
url,
'--header',
`Authorization: Bearer ${apifyToken}`
],
timeout: MCP_COMMAND_TIMEOUT,
}
}
};
fs.writeFileSync(configPath, JSON.stringify(config, null, 4));
console.log(`Wrote MCP server config to ${configPath}`);

console.log('Importing MCP server config...');
const importCmd = spawn('qchat', ['mcp', 'import', '--file', configPath, 'global', '--force'], { stdio: ['ignore', 'pipe', 'pipe'] });
await new Promise((resolve, reject) => {
importCmd.stdout.on('data', (data) => process.stdout.write(data));
importCmd.stderr.on('data', (data) => process.stderr.write(data));
importCmd.on('close', (code) => {
if (code !== 0) {
reject(new Error(`qchat import exited with code ${code}`));
} else {
resolve();
}
});
});
}

/**
* Removes the MCP server configuration and cleans up.
* This function also removes the .amazonq directory.
* @returns {Promise<void>}
* @throws {Error} If there is an error during the removal process.
*/
async function removeMCPServerAndCleanUp() {
console.log('Removing MCP server config...');
await new Promise((resolve, reject) => {
const removeCmd = spawn('qchat', ['mcp', 'remove', '--name', 'mcp-server-to-evaluate'], { stdio: ['ignore', 'pipe', 'pipe'] });
removeCmd.stdout.on('data', (data) => process.stdout.write(data));
removeCmd.stderr.on('data', (data) => process.stderr.write(data));
removeCmd.on('close', (code) => {
if (code !== 0) {
console.error(`qchat remove exited with code ${code}`);
reject(new Error(`qchat remove exited with code ${code}`));
} else {
resolve();
}
});
});
// Remove the .amazonq directory after command completes
try {
fs.rmSync('.amazonq', { recursive: true, force: true });
console.log('Removed .amazonq directory.');
} catch (err) {
console.error('Failed to remove .amazonq directory:', err);
}
}

/**
* Sets the MCP server initialization timeout using the 'q' CLI.
* @returns {Promise<void>}
*/
async function setMCPInitTimeout() {
const setTimeoutCmd = spawn('q', ['settings', 'mcp.initTimeout', String(MCP_INIT_TIMEOUT)], { stdio: ['ignore', 'pipe', 'pipe'] });
await new Promise((resolve, reject) => {
setTimeoutCmd.on('close', (code) => {
if (code !== 0) {
reject(new Error(`q settings exited with code ${code}`));
} else {
resolve();
}
});
});
}

/**
* Runs the evaluation of the MCP server tools.
* This function writes the MCP server configuration, sets a longer timeout,
* and runs a chat command to evaluate the tools.
* It then checks the output for a verdict and cleans up the MCP server configuration.
* @param {string} url - The URL of the MCP server to evaluate.
* @param {string} apifyToken - The Apify token for authentication.
* @returns {Promise<boolean>} - Returns true if the verdict is OK, false otherwise.
*/
async function runEval(url, apifyToken) {
// Write .amazonq/mcp.json with the provided url and token
await addAndImportMCPServer(url, apifyToken);

// Set MCP server initialization timeout
await setMCPInitTimeout();

console.log('Evaluating scenario requirements...');
const evalPrompt = `You are an MCP server tools QA tester. Run a simple test on all the tools of the connected MCP server, then give a final verdict: "OK" or "FAILED" in the format "VERDICT: OK" or "VERDICT: FAILED" on the last output line. Do not test tools like these, since they are built-in tools, not from the MCP server:
- dummy
- execute_bash
- fs_read
- fs_write
- report_issue
- use_aws
Never execute any command outside of calling the MCP server tools. The tools from the MCP server do not follow any naming convention, so any tool outside of the list above is from the MCP server and needs to be tested. First, reason step by step to find the list of correct tools to test. Then run the test on each tool, one by one, and finally give the verdict.`;
return new Promise((resolve, reject) => {
let output = '';
const child = spawn('q', ['chat', '-a', '--no-interactive', evalPrompt], { stdio: ['ignore', 'pipe', 'pipe'] });
child.stdout.on('data', (data) => {
process.stdout.write(data);
output += data.toString();
});
child.stderr.on('data', (data) => {
process.stderr.write(data);
output += data.toString();
});
child.on('close', (code) => {
// Remove the MCP server and clean up
removeMCPServerAndCleanUp().catch((err) => {
console.error('Error during MCP server cleanup:', err);
}).then(() => {
if (code !== 0) {
reject(new Error(`q chat exited with code ${code}`));
return;
}
if (/VERDICT:\s*OK/.test(output)) {
console.log('VERDICT: OK');
resolve(true);
} else if (/VERDICT:\s*FAILED/.test(output)) {
console.log('VERDICT: FAILED');
resolve(false);
} else {
console.log('VERDICT: Not found in output, treating as FAILED');
resolve(false);
}
});
});
});
}


async function main() {
const mcpServerUrl = process.argv[2];
const apifyToken = process.env.APIFY_TOKEN;
if (!mcpServerUrl || !apifyToken) {
process.stderr.write('Usage: node run.js <mcp-server-url> (and set APIFY_TOKEN env var)\n');
process.exit(1);
}
console.log(`Using MCP server URL: ${mcpServerUrl}`);
const res = await runEval(mcpServerUrl, apifyToken);
process.exit(res ? 0 : 1);
}

main();
Loading