diff --git a/.actor/input_schema.json b/.actor/input_schema.json index 4616796c..90189ec4 100644 --- a/.actor/input_schema.json +++ b/.actor/input_schema.json @@ -14,6 +14,19 @@ "lukaskrivka/google-maps-with-contact-details" ] }, + "enableActorAutoLoading": { + "title": "Enable automatic loading of Actors based on context and use-case (experimental, check if it supported by your client)", + "type": "boolean", + "description": "When enabled, the server can dynamically add Actors as tools based on user requests and context. \n\nNote: Not all MCP clients support this feature. To try it, you can use the [Tester MCP Client](https://apify.com/jiri.spilka/tester-mcp-client). This is an experimental feature and may require client-specific support.", + "default": false + }, + "maxActorMemoryBytes": { + "title": "Limit the maximum memory used by an Actor", + "type": "integer", + "description": "Limit the maximum memory used by an Actor in bytes. This is important setting for Free plan users to avoid exceeding the memory limit.", + "prefill": 4096, + "default": 4096 + }, "debugActor": { "title": "Debug Actor", "type": "string", @@ -28,7 +41,7 @@ "description": "Specify the input for the Actor that will be used for debugging in normal mode", "editor": "json", "prefill": { - "query": "hello world" + "query": "hello world" } } } diff --git a/README.md b/README.md index 99a21397..7cc609e8 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,10 @@ Implementation of an MCP server for all [Apify Actors](https://apify.com/store). This server enables interaction with one or more Apify Actors that can be defined in the MCP Server configuration. The server can be used in two ways: -- 🇦 **Apify MCP Server Actor**: runs an HTTP server with MCP protocol via Server-Sent Events. -- ⾕ **Apify MCP Server Stdio**: provides support for the MCP protocol via standard input/output stdio. +- 🇦 **Apify MCP Server Actor**: runs an HTTP server with MCP and can be accessed via Server-Sent Events (SSE). +- ⾕ **Apify MCP Server Stdio**: runs the server locally with MCP via standard input/output (stdio). + +You can test the MCP server using [Tester MCP Client](https://apify.com/jiri.spilka/tester-mcp-client) # 🎯 What does Apify MCP server do? @@ -19,8 +21,15 @@ For example it can: - use [Instagram Scraper](https://apify.com/apify/instagram-scraper) to scrape Instagram posts, profiles, places, photos, and comments - use [RAG Web Browser](https://apify.com/apify/web-scraper) to search the web, scrape the top N URLs, and return their content +# MCP Clients + +To interact with the Apify MCP server, you can use MCP clients such as: +- [Claude Desktop](https://claude.ai/download) (only Stdio support) +- [LibreChat](https://www.librechat.ai/) (stdio and SSE support (yeah without Authorization header)) +- [Apify Tester MCP Client](https://apify.com/jiri.spilka/tester-mcp-client) (SSE support with Authorization headers) +- other clients at [https://modelcontextprotocol.io/clients](https://modelcontextprotocol.io/clients) +- more clients at [https://glama.ai/mcp/clients](https://glama.ai/mcp/clients) -To interact with the Apify MCP server, you can use MCP clients such as [Claude Desktop](https://claude.ai/download), [LibreChat](https://www.librechat.ai/), or other [MCP clients](https://glama.ai/mcp/clients). Additionally, you can use simple example clients found in the [examples](https://github.com/apify/actor-mcp-server/tree/main/src/examples) directory. When you have Actors integrated with the MCP server, you can ask: @@ -54,6 +63,8 @@ To learn more, check out the blog post: [What are AI Agents?](https://blog.apify ## Tools +### Actors + Any [Apify Actor](https://apify.com/store) can be used as a tool. By default, the server is pre-configured with the Actors specified below, but it can be overridden by providing Actor input. @@ -79,6 +90,19 @@ You don't need to specify the input parameters or which Actor to call, everythin When a tool is called, the arguments are automatically passed to the Actor by the LLM. You can refer to the specific Actor's documentation for a list of available arguments. +### Helper tools + +The server provides a set of helper tools to discover available Actors and retrieve their details: +- `get-actor-details`: Retrieves documentation, input schema, and other details about a specific Actor. +- `discover-actors`: Searches for relevant Actors using keywords and returns their details. + +There are also tools to manage the available tools list. However, dynamically adding and removing tools requires the MCP client to have the capability to manage the tools list, which is typically not supported. + +You can try this functionality using the [Apify Tester MCP Client](https://apify.com/jiri.spilka/tester-mcp-client) Actor. To enable it, set the `enableActorAutoLoading` parameter. + +- `add-actor-as-tool`: Adds an Actor by name to the available tools list without executing it, requiring user consent to run later. +- `remove-actor-from-tool`: Removes an Actor by name from the available tools list when it's no longer needed. + ## Prompt & Resources The server does not provide any resources and prompts. @@ -110,10 +134,13 @@ https://actors-mcp-server-task.apify.actor?token= You can find a list of all available Actors in the [Apify Store](https://apify.com/store). -#### 💬 Interact with the MCP Server +#### 💬 Interact with the MCP Server over SSE Once the server is running, you can interact with Server-Sent Events (SSE) to send messages to the server and receive responses. -You can use MCP clients such as [Superinference.ai](https://superinterface.ai/) or [LibreChat](https://www.librechat.ai/). +The easiest way is to use [Tester MCP Client](https://apify.com/jiri.spilka/tester-mcp-client) on Apify. + +Other clients do not support SSE yet, but this will likely change. +Please verify if MCP clients such ass [Superinference.ai](https://superinterface.ai/) or [LibreChat](https://www.librechat.ai/) support SSE with custom headers. ([Claude Desktop](https://claude.ai/download) does not support SSE transport yet, see [Claude Desktop Configuration](#claude-desktop) section for more details). In the client settings you need to provide server configuration: @@ -273,6 +300,7 @@ ANTHROPIC_API_KEY=your-anthropic-api-token ``` In the `examples` directory, you can find two clients that interact with the server via standard input/output (stdio): + 1. [`clientStdio.ts`](https://github.com/apify/actor-mcp-server/tree/main/src/examples/clientStdio.ts) This client script starts the MCP server with two specified Actors. It then calls the `apify/rag-web-browser` tool with a query and prints the result. @@ -305,12 +333,12 @@ ANTHROPIC_API_KEY=your-anthropic-api-key ``` ## Local client (SSE) -To test the server with the SSE transport, you can use python script `examples/client_sse.py`: +To test the server with the SSE transport, you can use python script `examples/clientSse.ts`: Currently, the node.js client does not support to establish a connection to remote server witch custom headers. You need to change URL to your local server URL in the script. ```bash -python src/examples/client_sse.py +node dist/examples/clientSse.js ``` ## Debugging @@ -334,17 +362,15 @@ Upon launching, the Inspector will display a URL that you can access in your bro ## ⓘ Limitations and feedback -To limit the context size the properties in the `input schema` are pruned and description is truncated to 200 characters. +To limit the context size the properties in the `input schema` are pruned and description is truncated to 500 characters. Enum fields and titles are truncated to max 50 options. Memory for each Actor is limited to 4GB. Free users have an 8GB limit, 128MB needs to be allocated for running `Actors-MCP-Server`. -If you need other features or have any feedback, please [submit an issue](https://console.apify.com/actors/3ox4R101TgZz67sLr/issues) in Apify Console to let us know. +If you need other features or have any feedback, please [submit an issue](https://console.apify.com/actors/1lSvMAaRcadrM1Vgv/issues) in Apify Console to let us know. # 🚀 Roadmap (January 2025) -- Document examples for [LibreChat](https://www.librechat.ai/). -- Provide tools to search for Actors and load them as needed. - Add Apify's dataset and key-value store as resources. - Add tools such as Actor logs and Actor runs for debugging. diff --git a/docs/actors-mcp-server.png b/docs/actors-mcp-server.png index e58c84a3..098fa14f 100644 Binary files a/docs/actors-mcp-server.png and b/docs/actors-mcp-server.png differ diff --git a/package-lock.json b/package-lock.json index 17494218..49b45467 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,7 +14,9 @@ "apify": "^3.2.6", "apify-client": "^2.11.1", "express": "^4.21.2", - "minimist": "^1.2.8" + "minimist": "^1.2.8", + "zod": "^3.24.1", + "zod-to-json-schema": "^3.24.1" }, "bin": { "actors-mcp-server": "dist/index.js" @@ -6636,7 +6638,6 @@ "version": "3.24.1", "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.1.tgz", "integrity": "sha512-muH7gBL9sI1nciMZV67X5fTKKBLtwpZ5VBp1vsOQzj1MhrBZ4wlVCm3gedKZWLp0Oyel8sIGfeiz54Su+OVT+A==", - "license": "MIT", "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -6645,7 +6646,6 @@ "version": "3.24.1", "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.24.1.tgz", "integrity": "sha512-3h08nf3Vw3Wl3PK+q3ow/lIil81IT2Oa7YpQyUUDsEWbXveMesdfK1xBd2RhCkynwZndAxixji/7SYJJowr62w==", - "license": "ISC", "peerDependencies": { "zod": "^3.24.1" } diff --git a/package.json b/package.json index 33fd8b2d..481eeecd 100644 --- a/package.json +++ b/package.json @@ -35,7 +35,9 @@ "apify": "^3.2.6", "apify-client": "^2.11.1", "express": "^4.21.2", - "minimist": "^1.2.8" + "minimist": "^1.2.8", + "zod": "^3.24.1", + "zod-to-json-schema": "^3.24.1" }, "devDependencies": { "@anthropic-ai/sdk": "^0.33.1", diff --git a/src/actorDefinition.ts b/src/actors.ts similarity index 67% rename from src/actorDefinition.ts rename to src/actors.ts index 40ab02c2..807edf56 100644 --- a/src/actorDefinition.ts +++ b/src/actors.ts @@ -1,9 +1,22 @@ import { Ajv } from 'ajv'; import { ApifyClient } from 'apify-client'; -import { MAX_DESCRIPTION_LENGTH, MAX_ENUM_LENGTH, MAX_MEMORY_MBYTES } from './const.js'; +import { ACTOR_ADDITIONAL_INSTRUCTIONS, defaults, MAX_DESCRIPTION_LENGTH } from './const.js'; import { log } from './logger.js'; -import type { ActorDefinitionWithDesc, SchemaProperties, Tool } from './types.js'; +import type { + ActorDefinitionPruned, + ActorDefinitionWithDesc, + SchemaProperties, + Tool, +} from './types.js'; + +export function actorNameToToolName(actorName: string): string { + return actorName.replace('/', '--'); +} + +export function toolNameToActorName(toolName: string): string { + return toolName.replace('--', '/'); +} /** * Get actor input schema by actor name. @@ -12,11 +25,7 @@ import type { ActorDefinitionWithDesc, SchemaProperties, Tool } from './types.js * @param {string} actorFullName - The full name of the actor. * @returns {Promise} - The actor definition with description or null if not found. */ -async function fetchActorDefinition(actorFullName: string): Promise { - if (!process.env.APIFY_TOKEN) { - log.error('APIFY_TOKEN is required but not set. Please set it as an environment variable'); - return null; - } +export async function getActorDefinition(actorFullName: string): Promise { const client = new ApifyClient({ token: process.env.APIFY_TOKEN }); const actorClient = client.actor(actorFullName); @@ -43,9 +52,9 @@ async function fetchActorDefinition(actorFullName: string): Promise MAX_DESCRIPTION_LENGTH) { property.description = `${property.description.slice(0, MAX_DESCRIPTION_LENGTH)}...`; } - if (property.enum) { - property.enum = property.enum.slice(0, MAX_ENUM_LENGTH); - } - if (property.enumTitles) { - property.enumTitles = property.enumTitles.slice(0, MAX_ENUM_LENGTH); - } } return properties; } @@ -77,11 +91,11 @@ function shortenProperties(properties: { [key: string]: SchemaProperties}): { [k * Filters schema properties to include only the necessary fields. * @param properties */ -function filterSchemaProperties(properties: { [key: string]: SchemaProperties }): { [key: string]: SchemaProperties } { +export function filterSchemaProperties(properties: { [key: string]: SchemaProperties }): { [key: string]: SchemaProperties } { const filteredProperties: { [key: string]: SchemaProperties } = {}; for (const [key, property] of Object.entries(properties)) { - const { title, description, enum: enumValues, enumTitles, type, default: defaultValue, prefill } = property; - filteredProperties[key] = { title, description, enum: enumValues, enumTitles, type, default: defaultValue, prefill }; + const { title, description, enum: enumValues, type, default: defaultValue, prefill } = property; + filteredProperties[key] = { title, description, enum: enumValues, type, default: defaultValue, prefill }; } return filteredProperties; } @@ -98,9 +112,8 @@ function filterSchemaProperties(properties: { [key: string]: SchemaProperties }) * @returns {Promise} - A promise that resolves to an array of MCP tools. */ export async function getActorsAsTools(actors: string[]): Promise { - // Fetch input schemas in parallel const ajv = new Ajv({ coerceTypes: 'array', strict: false }); - const results = await Promise.all(actors.map(fetchActorDefinition)); + const results = await Promise.all(actors.map(getActorDefinition)); const tools = []; for (const result of results) { if (result) { @@ -109,17 +122,17 @@ export async function getActorsAsTools(actors: string[]): Promise { result.input.properties = shortenProperties(properties); } try { - const memoryMbytes = result.defaultRunOptions?.memoryMbytes || MAX_MEMORY_MBYTES; + const memoryMbytes = result.defaultRunOptions?.memoryMbytes || defaults.maxMemoryMbytes; tools.push({ - name: result.name.replace('/', '_'), - actorName: result.name, - description: result.description, + name: actorNameToToolName(result.actorFullName), + actorFullName: result.actorFullName, + description: `${result.description} Instructions: ${ACTOR_ADDITIONAL_INSTRUCTIONS}`, inputSchema: result.input || {}, ajvValidate: ajv.compile(result.input || {}), - memoryMbytes: memoryMbytes > MAX_MEMORY_MBYTES ? MAX_MEMORY_MBYTES : memoryMbytes, + memoryMbytes: memoryMbytes > defaults.maxMemoryMbytes ? defaults.maxMemoryMbytes : memoryMbytes, }); } catch (validationError) { - log.error(`Failed to compile AJV schema for actor: ${result.name}. Error: ${validationError}`); + log.error(`Failed to compile AJV schema for actor: ${result.actorFullName}. Error: ${validationError}`); } } } diff --git a/src/const.ts b/src/const.ts index e45fe5b5..be4f97c7 100644 --- a/src/const.ts +++ b/src/const.ts @@ -2,12 +2,7 @@ export const SERVER_NAME = 'apify-mcp-server'; export const SERVER_VERSION = '0.1.0'; export const HEADER_READINESS_PROBE = 'x-apify-container-server-readiness-probe'; - -export const MAX_ENUM_LENGTH = 50; -export const MAX_DESCRIPTION_LENGTH = 200; -// Limit memory to 4GB for Actors. Free users have 8 GB limit, but we need to reserve some memory for Actors-MCP-Server too -export const MAX_MEMORY_MBYTES = 4096; - +export const MAX_DESCRIPTION_LENGTH = 500; export const USER_AGENT_ORIGIN = 'Origin/mcp-server'; export const defaults = { @@ -16,11 +11,22 @@ export const defaults = { 'apify/rag-web-browser', 'lukaskrivka/google-maps-with-contact-details', ], + enableActorAutoLoading: false, + maxMemoryMbytes: 4096, }; -export const ACTOR_OUTPUT_MAX_CHARS_PER_ITEM = 2_000; +export const ACTOR_OUTPUT_MAX_CHARS_PER_ITEM = 5_000; export const ACTOR_OUTPUT_TRUNCATED_MESSAGE = `Output was truncated because it will not fit into context.` - + ` There is no reason to call this tool again!`; + + `There is no reason to call this tool again!`; +export const ACTOR_ADDITIONAL_INSTRUCTIONS = 'Never call/execute tool/Actor unless confirmed by the user. ' + + 'Always limit the number of results in the call arguments.'; + +export enum InternalTools { + DISCOVER_ACTORS = 'discover-actors', + ADD_ACTOR_TO_TOOLS = 'add-actor-to-tools', + REMOVE_ACTOR_FROM_TOOLS = 'remove-actor-from-tools', + GET_ACTOR_DETAILS = 'get-actor-details', +} export enum Routes { ROOT = '/', diff --git a/src/examples/clientSse.ts b/src/examples/clientSse.ts index 1e35a77f..53ab4ee0 100644 --- a/src/examples/clientSse.ts +++ b/src/examples/clientSse.ts @@ -3,6 +3,7 @@ * Connect to the MCP server using SSE transport and call a tool. * The Actors MCP Server will load default Actors. * + * It requires the `APIFY_TOKEN` in the `.env` file. */ import path from 'path'; @@ -15,7 +16,6 @@ import dotenv from 'dotenv'; import { EventSource } from 'eventsource'; const REQUEST_TIMEOUT = 120_000; // 2 minutes -// Resolve dirname equivalent in ES module const filename = fileURLToPath(import.meta.url); const dirname = path.dirname(filename); diff --git a/src/index.ts b/src/index.ts index 68712faa..1b1c4292 100644 --- a/src/index.ts +++ b/src/index.ts @@ -23,6 +23,11 @@ log.setLevel(log.LEVELS.ERROR); const argv = minimist(process.argv.slice(2)); const argActors = argv.actors?.split(',').map((actor: string) => actor.trim()) || []; +if (!process.env.APIFY_TOKEN) { + log.error('APIFY_TOKEN is required but not set in the environment variables.'); + process.exit(1); +} + async function main() { const server = new ApifyMcpServer(); await (argActors.length !== 0 diff --git a/src/input.ts b/src/input.ts index e4b856db..a59bc1a2 100644 --- a/src/input.ts +++ b/src/input.ts @@ -12,5 +12,8 @@ export async function processInput(originalInput: Partial): Promise format.trim()) as string[]; } + if (!input.enableActorAutoLoading) { + input.enableActorAutoLoading = false; + } return input; } diff --git a/src/main.ts b/src/main.ts index b850a176..99fc2c8b 100644 --- a/src/main.ts +++ b/src/main.ts @@ -7,10 +7,11 @@ import type { ActorCallOptions } from 'apify-client'; import type { Request, Response } from 'express'; import express from 'express'; -import { HEADER_READINESS_PROBE, MAX_MEMORY_MBYTES, Routes } from './const.js'; +import { HEADER_READINESS_PROBE, Routes } from './const.js'; import { processInput } from './input.js'; import { log } from './logger.js'; import { ApifyMcpServer } from './server.js'; +import { getActorDiscoveryTools, getActorAutoLoadingTools } from './tools.js'; import type { Input } from './types.js'; await Actor.init(); @@ -19,6 +20,11 @@ const STANDBY_MODE = Actor.getEnv().metaOrigin === 'STANDBY'; const HOST = Actor.isAtHome() ? process.env.ACTOR_STANDBY_URL : 'http://localhost'; const PORT = Actor.isAtHome() ? process.env.ACTOR_STANDBY_PORT : 3001; +if (!process.env.APIFY_TOKEN) { + log.error('APIFY_TOKEN is required but not set in the environment variables.'); + process.exit(1); +} + const app = express(); const mcpServer = new ApifyMcpServer(); @@ -36,7 +42,7 @@ async function processParamsAndUpdateTools(url: string) { const params = parse(url.split('?')[1] || '') as ParsedUrlQuery; delete params.token; log.debug(`Received input parameters: ${JSON.stringify(params)}`); - const input = await processInput(params as Input); + const input = await processInput(params as unknown as Input); if (input.actors) { await mcpServer.addToolsFromActors(input.actors as string[]); } else { @@ -107,6 +113,10 @@ log.info(`Loaded input: ${JSON.stringify(input)} `); if (STANDBY_MODE) { log.info('Actor is running in the STANDBY mode.'); await mcpServer.addToolsFromDefaultActors(); + mcpServer.updateTools(getActorDiscoveryTools()); + if (input.enableActorAutoLoading) { + mcpServer.updateTools(getActorAutoLoadingTools()); + } app.listen(PORT, () => { log.info(`The Actor web server is listening for user requests at ${HOST}`); }); @@ -116,7 +126,7 @@ if (STANDBY_MODE) { if (input && !input.debugActor && !input.debugActorInput) { await Actor.fail('If you need to debug a specific actor, please provide the debugActor and debugActorInput fields in the input'); } - const options = { memory: MAX_MEMORY_MBYTES } as ActorCallOptions; + const options = { memory: input.maxActorMemoryBytes } as ActorCallOptions; await mcpServer.callActorGetDataset(input.debugActor!, input.debugActorInput!, options); await Actor.exit(); } diff --git a/src/server.ts b/src/server.ts index 3e308ef2..fb829185 100644 --- a/src/server.ts +++ b/src/server.ts @@ -11,17 +11,32 @@ import type { ActorCallOptions } from 'apify-client'; import { ApifyClient } from 'apify-client'; import type { AxiosRequestConfig } from 'axios'; -import { getActorsAsTools } from './actorDefinition.js'; +import { + actorNameToToolName, + filterSchemaProperties, + getActorDefinition, + getActorsAsTools, + shortenProperties, + toolNameToActorName, +} from './actors.js'; import { ACTOR_OUTPUT_MAX_CHARS_PER_ITEM, ACTOR_OUTPUT_TRUNCATED_MESSAGE, defaults, + InternalTools, SERVER_NAME, SERVER_VERSION, USER_AGENT_ORIGIN, } from './const.js'; import { log } from './logger.js'; -import type { Tool } from './types'; +import { + RemoveActorToolArgsSchema, + AddActorToToolsArgsSchema, + DiscoverActorsArgsSchema, + searchActorsByKeywords, + GetActorDefinition, +} from './tools.js'; +import type { SchemaProperties, Tool } from './types.js'; /** * Create Apify MCP server @@ -76,19 +91,17 @@ export class ApifyMcpServer { input: unknown, callOptions: ActorCallOptions | undefined = undefined, ): Promise { - if (!process.env.APIFY_TOKEN) { - throw new Error('APIFY_TOKEN is required but not set. Please set it as an environment variable'); - } + const name = toolNameToActorName(actorName); try { - log.info(`Calling actor ${actorName} with input: ${JSON.stringify(input)}`); + log.info(`Calling actor ${name} with input: ${JSON.stringify(input)}`); const options: ApifyClientOptions = { requestInterceptors: [this.addUserAgent] }; const client = new ApifyClient({ ...options, token: process.env.APIFY_TOKEN }); - const actorClient = client.actor(actorName); + const actorClient = client.actor(name); const results = await actorClient.call(input, callOptions); const dataset = await client.dataset(results.defaultDatasetId).listItems(); - log.info(`Actor ${actorName} finished with ${dataset.items.length} items`); + log.info(`Actor ${name} finished with ${dataset.items.length} items`); if (process.env.APIFY_IS_AT_HOME) { await Actor.pushData(dataset.items); @@ -96,7 +109,7 @@ export class ApifyMcpServer { } return dataset.items; } catch (error) { - log.error(`Error calling actor: ${error}. Actor: ${actorName}, input: ${JSON.stringify(input)}`); + log.error(`Error calling actor: ${error}. Actor: ${name}, input: ${JSON.stringify(input)}`); throw new Error(`Error calling actor: ${error}`); } } @@ -145,24 +158,59 @@ export class ApifyMcpServer { const { name, arguments: args } = request.params; // Anthropic can't handle '/' in tool names. The replace is only necessary when calling the tool from stdio clients. - const tool = this.tools.get(name) || this.tools.get(name.replace('/', '_')); + const tool = this.tools.get(name) || this.tools.get(actorNameToToolName(name)); if (!tool) { throw new Error(`Unknown tool: ${name}`); } + if (!args) { + throw new Error(`Missing arguments for tool: ${name}`); + } log.info(`Validate arguments for tool: ${tool.name} with arguments: ${JSON.stringify(args)}`); if (!tool.ajvValidate(args)) { throw new Error(`Invalid arguments for tool ${tool.name}: args: ${JSON.stringify(args)} error: ${JSON.stringify(tool?.ajvValidate.errors)}`); } try { - const items = await this.callActorGetDataset(tool.actorName, args, { memory: tool.memoryMbytes } as ActorCallOptions); - const content = items.map((item) => { - const text = JSON.stringify(item).slice(0, ACTOR_OUTPUT_MAX_CHARS_PER_ITEM); - return text.length === ACTOR_OUTPUT_MAX_CHARS_PER_ITEM - ? { type: 'text', text: `${text} ... ${ACTOR_OUTPUT_TRUNCATED_MESSAGE}` } - : { type: 'text', text }; - }); - return { content }; + switch (name) { + case InternalTools.ADD_ACTOR_TO_TOOLS: { + const parsed = AddActorToToolsArgsSchema.parse(args); + await this.addToolsFromActors([parsed.actorFullName]); + return { content: [{ type: 'text', text: `Actor ${args.name} was added to tools` }] }; + } + case InternalTools.REMOVE_ACTOR_FROM_TOOLS: { + const parsed = RemoveActorToolArgsSchema.parse(args); + this.tools.delete(parsed.toolName); + return { content: [{ type: 'text', text: `Actor ${args.name} was removed from tools` }] }; + } + case InternalTools.DISCOVER_ACTORS: { + const parsed = DiscoverActorsArgsSchema.parse(args); + const actors = await searchActorsByKeywords( + parsed.search, + parsed.limit, + parsed.offset, + ); + return { content: actors?.map((item) => ({ type: 'text', text: JSON.stringify(item) })) }; + } + case InternalTools.GET_ACTOR_DETAILS: { + const parsed = GetActorDefinition.parse(args); + const v = await getActorDefinition(parsed.actorFullName); + if (v && v.input && 'properties' in v.input && v.input) { + const properties = filterSchemaProperties(v.input.properties as { [key: string]: SchemaProperties }); + v.input.properties = shortenProperties(properties); + } + return { content: [{ type: 'text', text: JSON.stringify(v) }] }; + } + default: { + const items = await this.callActorGetDataset(tool.actorFullName, args, { memory: tool.memoryMbytes } as ActorCallOptions); + const content = items.map((item) => { + const text = JSON.stringify(item).slice(0, ACTOR_OUTPUT_MAX_CHARS_PER_ITEM); + return text.length === ACTOR_OUTPUT_MAX_CHARS_PER_ITEM + ? { type: 'text', text: `${text} ... ${ACTOR_OUTPUT_TRUNCATED_MESSAGE}` } + : { type: 'text', text }; + }); + return { content }; + } + } } catch (error) { log.error(`Error calling tool: ${error}`); throw new Error(`Error calling tool: ${error}`); diff --git a/src/tools.ts b/src/tools.ts new file mode 100644 index 00000000..8be69196 --- /dev/null +++ b/src/tools.ts @@ -0,0 +1,138 @@ +import { Ajv } from 'ajv'; +import type { ActorStoreList } from 'apify-client'; +import { ApifyClient } from 'apify-client'; +import { z } from 'zod'; +import { zodToJsonSchema } from 'zod-to-json-schema'; + +import { actorNameToToolName, toolNameToActorName } from './actors.js'; +import { InternalTools } from './const.js'; +import type { ActorStorePruned, PricingInfo, Tool } from './types.js'; + +export const DiscoverActorsArgsSchema = z.object({ + limit: z.number() + .int() + .min(1) + .max(100) + .default(10) + .describe('The maximum number of Actors to return. Default value is 10.'), + offset: z.number() + .int() + .min(0) + .default(0) + .describe('The number of elements that should be skipped at the start. Default value is 0.'), + search: z.string() + .default('') + .describe('String of key words to search by. ' + + 'Searches the title, name, description, username, and readme of an Actor.' + + 'Only key word search is supported, no advanced search.', + ), + category: z.string() + .default('') + .describe('Filters the results by the specified category.'), +}); + +export const RemoveActorToolArgsSchema = z.object({ + toolName: z.string() + .describe('Full name of the Actor to remove. Actor full name is always composed from `username--name`' + + 'Never use name or username only') + .transform((val) => actorNameToToolName(val)), +}); + +export const AddActorToToolsArgsSchema = z.object({ + actorFullName: z.string() + .describe('Full name of the Actor to add as tool. Tool name is always composed from `username--name`' + + 'Never use name or username only') + .transform((val) => actorNameToToolName(val)), +}); + +export const GetActorDefinition = z.object({ + actorFullName: z.string() + .describe('Full name of the Actor to retrieve documentation. Actor full name is always composed from `username--name`.' + + 'Never use name or username only') + .transform((val) => toolNameToActorName(val)), +}); + +export function getActorAutoLoadingTools(): Tool[] { + const ajv = new Ajv({ coerceTypes: 'array', strict: false }); + return [ + { + name: InternalTools.ADD_ACTOR_TO_TOOLS, + actorFullName: InternalTools.ADD_ACTOR_TO_TOOLS, + description: 'Add an Actor tool by name to available tools. Do not execute the actor, only add it and list it in available tools. ' + + 'Never run the tool without user consent! ' + + 'For example, add a tool with username--name when user wants to scrape/extract data', + inputSchema: zodToJsonSchema(AddActorToToolsArgsSchema), + ajvValidate: ajv.compile(zodToJsonSchema(AddActorToToolsArgsSchema)), + }, + { + name: InternalTools.REMOVE_ACTOR_FROM_TOOLS, + actorFullName: InternalTools.ADD_ACTOR_TO_TOOLS, + description: 'Remove an actor tool by name from available toos. ' + + 'For example, when user says, I do not need a tool username--name anymore', + inputSchema: zodToJsonSchema(RemoveActorToolArgsSchema), + ajvValidate: ajv.compile(zodToJsonSchema(RemoveActorToolArgsSchema)), + }, + ]; +} + +export function getActorDiscoveryTools(): Tool[] { + const ajv = new Ajv({ coerceTypes: 'array', strict: false }); + return [ + { + name: InternalTools.DISCOVER_ACTORS, + actorFullName: InternalTools.DISCOVER_ACTORS, + description: `Discover available Actors using full text search using keywords.` + + `Users try to discover Actors using free form query in this case search query needs to be converted to full text search. ` + + `Prefer Actors from Apify as they are generally more reliable and have better support. ` + + `Returns a list of Actors with name, description, run statistics, pricing, starts, and URL. ` + + `You perhaps need to use this tool several times to find the right Actor. ` + + `Limit number of results returned but ensure that relevant results are returned. `, + inputSchema: zodToJsonSchema(DiscoverActorsArgsSchema), + ajvValidate: ajv.compile(zodToJsonSchema(DiscoverActorsArgsSchema)), + }, + { + name: InternalTools.GET_ACTOR_DETAILS, + actorFullName: InternalTools.GET_ACTOR_DETAILS, + description: 'Get documentation, readme, input schema and other details about Actor. ' + + 'For example, when user says, I need to know more about web crawler Actor.' + + 'Get details for Actors with username--name.', + inputSchema: zodToJsonSchema(GetActorDefinition), + ajvValidate: ajv.compile(zodToJsonSchema(GetActorDefinition)), + }, + ]; +} + +function pruneActorStoreInfo(response: ActorStoreList): ActorStorePruned { + const stats = response.stats || {}; + const pricingInfo = (response.currentPricingInfo || {}) as PricingInfo; + return { + name: response.name?.toString() || '', + username: response.username?.toString() || '', + actorFullName: `${response.username}/${response.name}`, + title: response.title?.toString() || '', + description: response.description?.toString() || '', + stats: { + totalRuns: stats.totalRuns, + totalUsers30Days: stats.totalUsers30Days, + publicActorRunStats30Days: 'publicActorRunStats30Days' in stats + ? stats.publicActorRunStats30Days : {}, + }, + currentPricingInfo: { + pricingModel: pricingInfo.pricingModel?.toString() || '', + pricePerUnitUsd: pricingInfo?.pricePerUnitUsd ?? 0, + trialMinutes: pricingInfo?.trialMinutes ?? 0, + }, + url: response.url?.toString() || '', + totalStars: 'totalStars' in response ? (response.totalStars as number) : null, + }; +} + +export async function searchActorsByKeywords( + search: string, + limit: number | undefined = undefined, + offset: number | undefined = undefined, +): Promise { + const client = new ApifyClient({ token: process.env.APIFY_TOKEN }); + const results = await client.store().list({ search, limit, offset }); + return results.items.map((x) => pruneActorStoreInfo(x)); +} diff --git a/src/types.ts b/src/types.ts index 1c73cdc8..fc8e6515 100644 --- a/src/types.ts +++ b/src/types.ts @@ -3,30 +3,67 @@ import type { ActorDefaultRunOptions, ActorDefinition } from 'apify-client'; export type Input = { actors: string[] | string; + enableActorAutoLoading?: boolean; + maxActorMemoryBytes?: number; debugActor?: string; debugActorInput?: unknown; }; +export interface ActorDefinitionPruned { + actorFullName: string; + buildTag?: string; + readme?: string | null; + input?: object | null; + description: string; + defaultRunOptions: ActorDefaultRunOptions; +} + export interface ActorDefinitionWithDesc extends ActorDefinition { + actorFullName: string; description: string; defaultRunOptions: ActorDefaultRunOptions } export interface Tool { name: string; - actorName: string; + actorFullName: string; description: string; inputSchema: object; ajvValidate: ValidateFunction; - memoryMbytes: number; + memoryMbytes?: number; } export interface SchemaProperties { title: string; description: string; enum: string[]; // Array of string options for the enum - enumTitles: string[]; // Array of string titles for the enum + enumTitles?: string[]; // Array of string titles for the enum type: string; // Data type (e.g., "string") default: string; prefill: string; } + +// ActorStoreList for actor-search tool +export interface ActorStats { + totalRuns: number; + totalUsers30Days: number; + publicActorRunStats30Days: unknown; +} + +export interface PricingInfo { + pricingModel?: string; + pricePerUnitUsd?: number; + trialMinutes?: number +} + +export interface ActorStorePruned { + name: string; + username: string; + actorFullName?: string; + title?: string; + description?: string; + stats: ActorStats; + currentPricingInfo: PricingInfo; + url: string; + totalStars?: number | null; +}