Skip to content
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
959e421
first version
jirispilka Jan 3, 2025
9b06472
Change logger, simplify fetching of Actor input
jirispilka Jan 3, 2025
672d79e
Change env variable APIFY_API_TOKEN to APIFY_TOKEN
jirispilka Jan 3, 2025
1d27199
Push data into Apify dataset
jirispilka Jan 3, 2025
cbc172a
Fix standby mode
jirispilka Jan 3, 2025
7dd8fb2
Improve log messages
jirispilka Jan 3, 2025
b4d3bd9
- rename `actorNames` to `actors`
jirispilka Jan 3, 2025
e1984a0
Update README.md
jirispilka Jan 3, 2025
0c792d5
Ability to initiate SSE with a selected actors
jirispilka Jan 6, 2025
65f6e41
Update README.md, remove defaults.
jirispilka Jan 7, 2025
552cedd
Remove query parameters from SSE
jirispilka Jan 7, 2025
5cf7c00
Update README.md
jirispilka Jan 7, 2025
ff4ec05
Add eventsource
jirispilka Jan 8, 2025
f516fe2
Add example clients
jirispilka Jan 8, 2025
76bf0fb
Load 5 default Actors as a tool, to simplify onboarding
jirispilka Jan 8, 2025
1655d70
Update README.md
jirispilka Jan 8, 2025
259e1c4
Add chat stdio
jirispilka Jan 8, 2025
8a9cad3
Improve error handling
jirispilka Jan 9, 2025
405f70c
Update clientStdioChat.ts
jirispilka Jan 9, 2025
af4d696
Update README.md
jirispilka Jan 9, 2025
1c92990
Change env variable
jirispilka Jan 10, 2025
19bb130
Clean up ts-config
jirispilka Jan 10, 2025
22ac5ed
Update package.json and add github workflow from apify-eslint-config
jirispilka Jan 10, 2025
147f7a8
Fix lint issues in before-beta-release.js
jirispilka Jan 10, 2025
7e975e5
Remove dead code
jirispilka Jan 10, 2025
d2bc65a
docs: Update documentation (#2)
jirispilka Jan 10, 2025
c6128c3
Update section title
jirispilka Jan 10, 2025
c676bc2
Update docs
jirispilka Jan 10, 2025
7ced4a4
Update docs
jirispilka Jan 10, 2025
5cbf12c
Update docs
jirispilka Jan 10, 2025
8355f3a
fix return value
jirispilka Jan 12, 2025
1f5c841
Update README.md
jirispilka Jan 13, 2025
a857a74
Update README.md
jirispilka Jan 13, 2025
472be0c
Update README.md
jirispilka Jan 13, 2025
036c6b3
Update README.md
jirispilka Jan 13, 2025
e1a0c6a
Apply suggestions from code review
jirispilka Jan 13, 2025
34ecf25
Update README.md
jirispilka Jan 13, 2025
50e3d85
Replace APIFY-API-TOKEN by APIFY_API_TOKEN
jirispilka Jan 13, 2025
364f49d
Update package-lock.json
jirispilka Jan 13, 2025
1e7d6f8
Fix clientStdio.ts for win
jirispilka Jan 15, 2025
5233088
Fix clientStdio.ts for win
jirispilka Jan 15, 2025
5c9d89d
Update mcp typescript sdk to the newest version.
jirispilka Jan 15, 2025
57af156
Fix clientStdio.ts
jirispilka Jan 15, 2025
0aae5c6
Add roadmap to README.md
jirispilka Jan 15, 2025
c8b2da4
Start Standby server with Actors provided at input
jirispilka Jan 15, 2025
e12cf8e
Truncate tool output and limit tool response.
jirispilka Jan 15, 2025
2d474ce
Limit number of default Actors
jirispilka Jan 15, 2025
ca69a88
Rename APIFY_API_TOKEN to APIFY_TOKEN (env variable at Apify platform…
jirispilka Jan 15, 2025
b84651e
Minor changes, add clientSse.ts
jirispilka Jan 15, 2025
479e9be
Update README.md with task changes.
jirispilka Jan 15, 2025
f39dfc5
Add explanation to Actor definition
jirispilka Jan 15, 2025
d6e03fc
Fix lint issues
jirispilka Jan 15, 2025
3a5be34
Update .actor/input_schema.json
jirispilka Jan 16, 2025
fc40b59
Add log message to explain users how to add Actors. Simplify handling…
jirispilka Jan 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .actor/input_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@
"type": "object",
"schemaVersion": 1,
"properties": {
"actors": {
"title": "Actors names to be exposed for an AI application (AI agent)",
"type": "array",
"description": "List the names of Actors to be exposed to an AI application (AI agent) for communication via the MCP protocol. \n\n Ensure the Actor definitions fit within the LLM context by limiting the number of used Actors.",
"editor": "stringList",
"prefill": [
"apify/instagram-scraper",
"apify/rag-web-browser",
"lukaskrivka/google-maps-with-contact-details"
]
},
"debugActor": {
"title": "Debug actor",
"type": "string",
Expand Down
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
APIFY_API_TOKEN=
APIFY_TOKEN=
# ANTHROPIC_API_KEY is only required when you want to run examples/clientStdioChat.js
ANTHROPIC_API_KEY=
76 changes: 42 additions & 34 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,26 @@ The server can be used in two ways:

The MCP Server Actor allows an AI assistant to use any [Apify Actor](https://apify.com/store) as a tool to perform a specific task.
For example it can:
- [Facebook Posts Scraper](https://apify.com/apify/facebook-posts-scraper) extracts data from Facebook posts from multiple pages/profiles
- [Google Maps Email Extractor](https://apify.com/lukaskrivka/google-maps-with-contact-details) extracts Google Maps contact details
- [Google Search Results Scraper](https://apify.com/apify/google-search-scraper) scrapes Google Search Engine Results Pages (SERPs)
- [Instagram Scraper](https://apify.com/apify/instagram-scraper) scrapes Instagram posts, profiles, places, hashtags, photos, and comments
- [RAG Web Browser](https://apify.com/apify/web-scraper) performs web search, scrape the top N URLs from the results, and return content
- use [Facebook Posts Scraper](https://apify.com/apify/facebook-posts-scraper) to extract data from Facebook posts from multiple pages/profiles
- use [Google Maps Email Extractor](https://apify.com/lukaskrivka/google-maps-with-contact-details) to extract Google Maps contact details
- use [Google Search Results Scraper](https://apify.com/apify/google-search-scraper) to scrape Google Search Engine Results Pages (SERPs)
- use [Instagram Scraper](https://apify.com/apify/instagram-scraper) to scrape Instagram posts, profiles, places, hashtags, photos, and comments
- use [RAG Web Browser](https://apify.com/apify/web-scraper) to perform a web search, scrape the top N URLs from the results, and return content

To interact with the Apify MCP server, you can use MCP clients such as [Claude Desktop](https://claude.ai/download), [Superinference.ai](https://superinterface.ai/), or [LibreChat](https://www.librechat.ai/).
Additionally, you can use simple example clients found in the [examples](https://github.com/apify/actor-mcp-server/tree/main/src/examples) directory.

When you have Actors integrated with the MCP server, you can ask:
- Search web and summarize recent trends about AI Agents
- Find top 10 best Italian restaurants in San Francisco
- Find and analyze Instagram profile of The Rock
- Provide a step-by-step guide on using the Model Context Protocol with source URLs.
- What Apify Actors I can use?
- "Search web and summarize recent trends about AI Agents"
- "Find top 10 best Italian restaurants in San Francisco"
- "Find and analyze Instagram profile of The Rock"
- "Provide a step-by-step guide on using the Model Context Protocol with source URLs."
- "What Apify Actors I can use?"

# 🔄 What is model context protocol?
In the future, we plan to load Actors dynamically and provide Apify's dataset and key-value store as resources.
See the [Roadmap](#-roadmap-january-2025) for more details.

# 🔄 What is the Model Context Protocol?

The Model Context Protocol (MCP) allows AI applications (and AI agents), such as Claude Desktop, to connect to external tools and data sources.
MCP is an open protocol that enables secure, controlled interactions between AI applications, AI Agents, and local or remote resources.
Expand All @@ -37,18 +40,12 @@ MCP is an open protocol that enables secure, controlled interactions between AI
## Tools

Any [Apify Actor](https://apify.com/store) can be used as a tool.
By default, the server is pre-configured with the Actors specified below, but it can be overridden by providing a list of Actor names in the `actors` query parameter.
By default, the server is pre-configured with the Actors specified below, but it can be overridden by providing Actor input.

```text
'apidojo/tweet-scraper',
'apify/facebook-posts-scraper',
'apify/google-search-scraper',
'apify/instagram-scraper',
'apify/rag-web-browser',
'clockworks/free-tiktok-scraper',
'compass/crawler-google-places',
'lukaskrivka/google-maps-with-contact-details',
'voyager/booking-scraper'
```
The MCP server loads the Actor input schema and creates MCP tools corresponding to the Actors.
See this example of input schema for the [RAG Web Browser](https://apify.com/apify/rag-web-browser/input-schema).
Expand Down Expand Up @@ -86,14 +83,17 @@ The Actor runs in [**Standby mode**](https://docs.apify.com/platform/actors/runn
Start server with default Actors. To use the Apify MCP Server with set of default Actors,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit problematic, Actor Standby doesn't have a concept of "starting a server". It just starts an Actor run when it receives the first requests to its Standby URL, the run will run until the idle timeout, and then it's shut down. So with this scenario this could happen:

  1. user "starts" the Actor with specified Actors through the ?actors=... parameter
  2. user uses the MCP server on the /sse path, everything works
  3. user waits a bit, Actor run started in 1 gets shut down
  4. user tries to use the MCP server again, but now it's not "started", and the specific Actors are not loaded

Or, if the user uses the MCP server a lot, an additional run is started to handle the requests, and that is not prepared with the specific Actors.

It also doesn't give the user an option how to load different Actors after the run is started.

What would work is one of these two options:

  • specify Actors in every request to the /sse endpoint
  • specify Actors in the Standby Actor input, and when users would want to use different Actors in the MCP server, they'd create an Actor task which would override the Actor input, and use the task's standby URL instead of the Actor standby URL

Copy link
Collaborator Author

@jirispilka jirispilka Jan 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ohhh 💡, this didn't occur to me. Thanks Franto!

specify Actors in every request to the /sse endpoint

This is a bit problematic as it depends on the MCP client (e.g., Claude Desktop, LibreChat). These clients do not pass parameters to the SSE endpoint

specify Actors in the Standby Actor input, and when users would want to use different Actors in the MCP server, they'd create an Actor task which would override the Actor input, and use the task's standby URL instead of the Actor standby URL

This seems to be the easiest and quickest solution.

There is one more option.

  • The MCP server can dynamically load Actors. Basically, based on the use case, the MCP will include a tool to search for Actors and "load" them (i.e., add them as a tool). I've already experimented with this in the [feat/internal-tools]() branch. It "works", but I encountered some issues and haven’t had much time to fix them yet.

send an HTTP GET request with your [Apify API token](https://console.apify.com/settings/integrations) to the following URL.
```
https://mcp-server.apify.actor?token=<APIFY_API_TOKEN>
```
It is also possible to start MCP server with a different set of tools by providing a list of Actor names in the `actors` query parameter.
Provide a comma-separated list of Actors in the `actors` query parameter:
https://actors-mcp-server.apify.actor?token=<APIFY_TOKEN>
```
https://mcp-server.apify.actor?token=<APIFY_API_TOKEN>&actors=junglee/free-amazon-product-scraper,lukaskrivka/google-maps-with-contact-details
It is also possible to start the MCP server with a different set of Actors.
To do this, create a [task](https://docs.apify.com/platform/actors/running/tasks) and specify the list of Actors you want to use.

Then, run task in Standby mode with the selected Actors using your Apify API token.
```shell
https://actors-mcp-server-task.apify.actor?token=<APIFY_TOKEN>
```
Find list of all available Actors in the [Apify Store](https://apify.com/store).

You can find a list of all available Actors in the [Apify Store](https://apify.com/store).

#### 💬 Interact with the MCP Server

Expand All @@ -107,9 +107,9 @@ In the client settings you need to provide server configuration:
"mcpServers": {
"apify": {
"type": "sse",
"url": "https://mcp-server.apify.actor/sse",
"url": "https://actors-mcp-server.apify.actor/sse",
"env": {
"APIFY-API-TOKEN": "your-apify-api-token"
"APIFY_TOKEN": "your-apify-token"
}
}
}
Expand All @@ -119,7 +119,7 @@ Alternatively, you can use simple python [client_see.py](https://github.com/apif

1. Initiate Server-Sent-Events (SSE) by sending a GET request to the following URL:
```
curl https://mcp-server.apify.actor/sse?token=<APIFY_API_TOKEN>
curl https://actors-mcp-server.apify.actor/sse?token=<APIFY_TOKEN>
```
The server will respond with a `sessionId`, which you can use to send messages to the server:
```shell
Expand All @@ -129,7 +129,7 @@ Alternatively, you can use simple python [client_see.py](https://github.com/apif

2. Send a message to the server by making a POST request with the `sessionId`:
```shell
curl -X POST "https://mcp-server.apify.actor?token=<APIFY_API_TOKEN>&session_id=a1b" -H "Content-Type: application/json" -d '{
curl -X POST "https://actors-mcp-server.apify.actor?token=<APIFY_TOKEN>&session_id=a1b" -H "Content-Type: application/json" -d '{
"jsonrpc": "2.0",
"id": 1,
"method": "tools/call",
Expand Down Expand Up @@ -161,7 +161,7 @@ Alternatively, you can use simple python [client_see.py](https://github.com/apif
- MacOS or Windows
- The latest version of Claude Desktop must be installed (or another MCP client)
- [Node.js](https://nodejs.org/en) (v18 or higher)
- [Apify API Token](https://docs.apify.com/platform/integrations/api#api-token) (`APIFY_API_TOKEN`)
- [Apify API Token](https://docs.apify.com/platform/integrations/api#api-token) (`APIFY_TOKEN`)

### Install

Expand Down Expand Up @@ -201,7 +201,7 @@ Configure Claude Desktop to recognize the MCP server.
"/path/to/actor-mcp-server/dist/index.js"
]
"env": {
"APIFY-API-TOKEN": "your-apify-api-token"
"APIFY_TOKEN": "your-apify-token"
}
}
}
Expand All @@ -217,7 +217,7 @@ Configure Claude Desktop to recognize the MCP server.
"lukaskrivka/google-maps-with-contact-details,apify/instagram-scraper"
]
"env": {
"APIFY-API-TOKEN": "your-apify-api-token"
"APIFY_TOKEN": "your-apify-token"
}
}
}
Expand All @@ -242,7 +242,7 @@ Configure Claude Desktop to recognize the MCP server.

Create environment file `.env` with the following content:
```text
APIFY_API_TOKEN=your-apify-api-token
APIFY_TOKEN=your-apify-token
# ANTHROPIC_API_KEY is only required when you want to run examples/clientStdioChat.js
ANTHROPIC_API_KEY=your-anthropic-api-token
```
Expand Down Expand Up @@ -274,7 +274,7 @@ standard input/output (stdio):

Create environment file `.env` with the following content:
```text
APIFY_API_TOKEN=your-apify-api-token
APIFY_TOKEN=your-apify-token
# ANTHROPIC_API_KEY is only required when you want to run examples/clientStdioChat.js
ANTHROPIC_API_KEY=your-anthropic-api-token
```
Expand Down Expand Up @@ -302,7 +302,15 @@ npm run build
You can launch the MCP Inspector via [`npm`](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) with this command:

```bash
npx @modelcontextprotocol/inspector node /path/to/actor-mcp-server/dist/index.js --env APIFY_API_TOKEN=your-apify-api-token
npx @modelcontextprotocol/inspector node /path/to/actor-mcp-server/dist/index.js --env APIFY_TOKEN=your-apify-token
```

Upon launching, the Inspector will display a URL that you can access in your browser to begin debugging.

# 🚀 Roadmap (January 2025)

- Document examples for [Superinference.ai](https://superinterface.ai/) and [LibreChat](https://www.librechat.ai/).
- Provide tools to search for Actors and load them as needed.
- Add Apify's dataset and key-value store as resources.
- Add tools such as Actor logs and Actor runs for debugging.
- Prune Actors input schema to reduce context size.
24 changes: 12 additions & 12 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"model context protocol"
],
"dependencies": {
"@modelcontextprotocol/sdk": "^1.1.0",
"@modelcontextprotocol/sdk": "^1.1.1",
"ajv": "^8.17.1",
"apify": "^3.2.6",
"apify-client": "^2.11.1",
Expand All @@ -38,7 +38,7 @@
"@anthropic-ai/tokenizer": "^0.0.4",
"@apify/eslint-config": "^0.5.0-beta.2",
"@apify/tsconfig": "^0.1.0",
"@types/express": "^5.0.0",
"@types/express": "^4.0.0",
"@types/minimist": "^1.2.5",
"dotenv": "^16.4.7",
"eslint": "^9.17.0",
Expand Down
13 changes: 9 additions & 4 deletions src/actorDefinition.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ import type { ActorDefinitionWithDesc, Tool } from './types';
* @returns {Promise<ActorDefinitionWithDesc | null>} - The actor definition with description or null if not found.
*/
async function fetchActorDefinition(actorFullName: string): Promise<ActorDefinitionWithDesc | null> {
if (!process.env.APIFY_API_TOKEN) {
log.error('APIFY_API_TOKEN is required but not set. Please set it as an environment variable');
if (!process.env.APIFY_TOKEN) {
log.error('APIFY_TOKEN is required but not set. Please set it as an environment variable');
return null;
}
const client = new ApifyClient({ token: process.env.APIFY_API_TOKEN });
const client = new ApifyClient({ token: process.env.APIFY_TOKEN });
const actorClient = client.actor(actorFullName);

try {
Expand All @@ -27,7 +27,9 @@ async function fetchActorDefinition(actorFullName: string): Promise<ActorDefinit
return null;
}

// Extract default build label
// fnesveda: The default build is not necessarily tagged, you can specify any build number as default build.
// There will be a new API endpoint to fetch a default build.
// For now, we'll use the tagged build, it will work for 90% of Actors. Later, we can update this.
const tag = actor.defaultRunOptions?.build || '';
const buildId = actor.taggedBuilds?.[tag]?.buildId || '';

Expand Down Expand Up @@ -56,6 +58,8 @@ async function fetchActorDefinition(actorFullName: string): Promise<ActorDefinit
* This function retrieves the input schemas for the specified actors and compiles them into MCP tools.
* It uses the AJV library to validate the input schemas.
*
* Tool name can't contain /, so it is replaced with _
*
* @param {string[]} actors - An array of actor full names.
* @returns {Promise<Tool[]>} - A promise that resolves to an array of MCP tools.
*/
Expand All @@ -69,6 +73,7 @@ export async function getActorsAsTools(actors: string[]): Promise<Tool[]> {
try {
tools.push({
name: result.name.replace('/', '_'),
actorName: result.name,
description: result.description,
inputSchema: result.input || {},
ajvValidate: ajv.compile(result.input || {}),
Expand Down
10 changes: 4 additions & 6 deletions src/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,16 @@ export const SERVER_VERSION = '0.1.0';

export const defaults = {
actors: [
'apidojo/tweet-scraper',
'apify/facebook-posts-scraper',
'apify/google-search-scraper',
'apify/instagram-scraper',
'apify/rag-web-browser',
'clockworks/free-tiktok-scraper',
'compass/crawler-google-places',
'lukaskrivka/google-maps-with-contact-details',
'voyager/booking-scraper',
],
};

export const ACTOR_OUTPUT_MAX_CHARS_PER_ITEM = 2_000;
export const ACTOR_OUTPUT_TRUNCATED_MESSAGE = `Output was truncated because it will not fit into context.`
+ ` There is no reason to call this tool again!`;

export enum Routes {
ROOT = '/',
SSE = '/sse',
Expand Down
Loading
Loading