From ad7b772bffa29a4cb9d81fc16deafda8b27e81de Mon Sep 17 00:00:00 2001 From: MQ Date: Wed, 3 Sep 2025 15:25:04 +0200 Subject: [PATCH 01/11] feat: improve actor tool output --- src/const.ts | 16 +- src/main.ts | 6 +- src/mcp/proxy.ts | 4 +- src/mcp/server.ts | 11 +- src/tools/actor.ts | 84 +++++----- src/tools/build.ts | 4 +- src/tools/dataset.ts | 78 ++------- src/tools/dataset_collection.ts | 4 +- src/tools/get-actor-output.ts | 125 ++++++++++++++ src/tools/helpers.ts | 82 +-------- src/tools/index.ts | 2 + src/tools/key_value_store.ts | 4 +- src/tools/key_value_store_collection.ts | 4 +- src/tools/run.ts | 4 +- src/tools/run_collection.ts | 4 +- src/tools/store_collection.ts | 3 +- src/types.ts | 1 + src/utils/actor-response.ts | 53 ++++++ src/utils/actor.ts | 36 +++- src/utils/generic.ts | 5 + src/utils/schema-generation.ts | 110 +++++++++++++ src/utils/tools-loader.ts | 12 ++ tests/integration/internals.test.ts | 5 +- tests/integration/suite.ts | 210 ++++++++++++++++++++---- tests/unit/schema-generation.test.ts | 63 +++++++ tests/unit/utils.generic.test.ts | 6 + 26 files changed, 684 insertions(+), 252 deletions(-) create mode 100644 src/tools/get-actor-output.ts create mode 100644 src/utils/actor-response.ts create mode 100644 src/utils/schema-generation.ts create mode 100644 tests/unit/schema-generation.test.ts diff --git a/src/const.ts b/src/const.ts index ff819fd..d826688 100644 --- a/src/const.ts +++ b/src/const.ts @@ -8,6 +8,15 @@ export const ACTOR_RUN_DATASET_OUTPUT_MAX_ITEMS = 5; // Actor run const export const ACTOR_MAX_MEMORY_MBYTES = 4_096; // If the Actor requires 8GB of memory, free users can't run actors-mcp-server and requested Actor +// Tool output +/** + * Usual tool output limit is 25k tokens, let's use 20k + * just in case where 1 token =~ 4 characters thus 80k chars. + * This is primarily used for Actor tool call output, but we can then + * reuse this in other tools as well. + */ +export const TOOL_MAX_OUTPUT_CHARS = 80000; + // MCP Server export const SERVER_NAME = 'apify-mcp-server'; export const SERVER_VERSION = '1.0.0'; @@ -20,6 +29,7 @@ export enum HelperTools { ACTOR_CALL = 'call-actor', ACTOR_GET = 'get-actor', ACTOR_GET_DETAILS = 'fetch-actor-details', + ACTOR_OUTPUT_GET = 'get-actor-output', ACTOR_REMOVE = 'remove-actor', ACTOR_RUNS_ABORT = 'abort-actor-run', ACTOR_RUNS_GET = 'get-actor-run', @@ -54,12 +64,12 @@ export const APIFY_DOCS_CACHE_MAX_SIZE = 500; export const APIFY_DOCS_CACHE_TTL_SECS = 60 * 60; // 1 hour export const ACTOR_PRICING_MODEL = { - /** Rental actors */ + /** Rental Actors */ FLAT_PRICE_PER_MONTH: 'FLAT_PRICE_PER_MONTH', FREE: 'FREE', - /** Pay per result (PPR) actors */ + /** Pay per result (PPR) Actors */ PRICE_PER_DATASET_ITEM: 'PRICE_PER_DATASET_ITEM', - /** Pay per event (PPE) actors */ + /** Pay per event (PPE) Actors */ PAY_PER_EVENT: 'PAY_PER_EVENT', } as const; diff --git a/src/main.ts b/src/main.ts index 0d62051..9643530 100644 --- a/src/main.ts +++ b/src/main.ts @@ -44,10 +44,10 @@ if (STANDBY_MODE) { await Actor.fail('If you need to debug a specific Actor, please provide the debugActor and debugActorInput fields in the input'); } const options = { memory: input.maxActorMemoryBytes } as ActorCallOptions; - const { items } = await callActorGetDataset(input.debugActor!, input.debugActorInput!, process.env.APIFY_TOKEN, options); + const { previewItems } = await callActorGetDataset(input.debugActor!, input.debugActorInput!, process.env.APIFY_TOKEN, options); - await Actor.pushData(items); - log.info('Pushed items to dataset', { itemCount: items.count }); + await Actor.pushData(previewItems); + log.info('Pushed items to dataset', { itemCount: previewItems.length }); await Actor.exit(); } diff --git a/src/mcp/proxy.ts b/src/mcp/proxy.ts index 327a057..5a94d6a 100644 --- a/src/mcp/proxy.ts +++ b/src/mcp/proxy.ts @@ -1,8 +1,8 @@ import type { Client } from '@modelcontextprotocol/sdk/client/index.js'; -import Ajv from 'ajv'; import { fixedAjvCompile } from '../tools/utils.js'; import type { ActorMcpTool, ToolEntry } from '../types.js'; +import { ajv } from '../utils/ajv.js'; import { getMCPServerID, getProxyMCPServerToolName } from './utils.js'; export async function getMCPServerTools( @@ -14,8 +14,6 @@ export async function getMCPServerTools( const res = await client.listTools(); const { tools } = res; - const ajv = new Ajv({ coerceTypes: 'array', strict: false }); - const compiledTools: ToolEntry[] = []; for (const tool of tools) { const mcpTool: ActorMcpTool = { diff --git a/src/mcp/server.ts b/src/mcp/server.ts index 1aacabc..4f53a0b 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -31,6 +31,7 @@ import { prompts } from '../prompts/index.js'; import { callActorGetDataset, defaultTools, getActorsAsTools, toolCategories } from '../tools/index.js'; import { decodeDotPropertyNames } from '../tools/utils.js'; import type { ActorMcpTool, ActorTool, HelperTool, ToolEntry } from '../types.js'; +import { buildActorResponseContent } from '../utils/actor-response.js'; import { createProgressTracker } from '../utils/progress.js'; import { getToolPublicFieldOnly } from '../utils/tools.js'; import { connectMCPClient } from './client.js'; @@ -524,21 +525,15 @@ export class ActorsMcpServer { try { log.info('Calling Actor', { actorName: actorTool.actorFullName, input: args }); - const { runId, datasetId, items } = await callActorGetDataset( + const callResult = await callActorGetDataset( actorTool.actorFullName, args, apifyToken as string, callOptions, progressTracker, ); - const content = [ - { type: 'text', text: `Actor finished with runId: ${runId}, datasetId ${datasetId}` }, - ]; - const itemContents = items.items.map((item: Record) => { - return { type: 'text', text: JSON.stringify(item) }; - }); - content.push(...itemContents); + const content = buildActorResponseContent(actorTool.actorFullName, callResult); return { content }; } finally { if (progressTracker) { diff --git a/src/tools/actor.ts b/src/tools/actor.ts index c3f1a7e..009b9e9 100644 --- a/src/tools/actor.ts +++ b/src/tools/actor.ts @@ -1,6 +1,5 @@ import type { Client } from '@modelcontextprotocol/sdk/client/index.js'; -import { Ajv } from 'ajv'; -import type { ActorCallOptions, ActorRun, PaginatedList } from 'apify-client'; +import type { ActorCallOptions, ActorRun } from 'apify-client'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; @@ -11,41 +10,47 @@ import { ACTOR_ADDITIONAL_INSTRUCTIONS, ACTOR_MAX_MEMORY_MBYTES, HelperTools, + TOOL_MAX_OUTPUT_CHARS, } from '../const.js'; import { getActorMCPServerPath, getActorMCPServerURL } from '../mcp/actors.js'; import { connectMCPClient } from '../mcp/client.js'; import { getMCPServerTools } from '../mcp/proxy.js'; import { actorDefinitionPrunedCache } from '../state.js'; -import type { ActorDefinitionStorage, ActorInfo, ToolEntry } from '../types.js'; -import { getActorDefinitionStorageFieldNames } from '../utils/actor.js'; +import type { ActorDefinitionStorage, ActorInfo, DatasetItem, ToolEntry } from '../types.js'; +import { ensureOutputWithinCharLimit, getActorDefinitionStorageFieldNames } from '../utils/actor.js'; import { fetchActorDetails } from '../utils/actor-details.js'; -import { getValuesByDotKeys } from '../utils/generic.js'; +import { buildActorResponseContent } from '../utils/actor-response.js'; +import { ajv } from '../utils/ajv.js'; import type { ProgressTracker } from '../utils/progress.js'; +import type { JsonSchemaProperty } from '../utils/schema-generation.js'; +import { generateSchemaFromItems } from '../utils/schema-generation.js'; import { getActorDefinition } from './build.js'; import { actorNameToToolName, fixedAjvCompile, getToolSchemaID, transformActorInputSchemaProperties } from './utils.js'; -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); - // Define a named return type for callActorGetDataset export type CallActorGetDatasetResult = { runId: string; datasetId: string; - items: PaginatedList>; + itemCount: number; + schema: JsonSchemaProperty; + previewItems: DatasetItem[]; }; /** - * Calls an Apify actor and retrieves the dataset items. + * Calls an Apify Actor and retrieves metadata about the dataset results. * + * This function executes an Actor and returns summary information instead with a result items preview of the full dataset + * to prevent overwhelming responses. The actual data can be retrieved using the get-actor-output tool. * * It requires the `APIFY_TOKEN` environment variable to be set. * If the `APIFY_IS_AT_HOME` the dataset items are pushed to the Apify dataset. * - * @param {string} actorName - The name of the actor to call. - * @param {ActorCallOptions} callOptions - The options to pass to the actor. + * @param {string} actorName - The name of the Actor to call. + * @param {ActorCallOptions} callOptions - The options to pass to the Actor. * @param {unknown} input - The input to pass to the actor. * @param {string} apifyToken - The Apify token to use for authentication. * @param {ProgressTracker} progressTracker - Optional progress tracker for real-time updates. - * @returns {Promise<{ actorRun: any, items: object[] }>} - A promise that resolves to an object containing the actor run and dataset items. + * @returns {Promise} - A promise that resolves to metadata about the Actor run and dataset. * @throws {Error} - Throws an error if the `APIFY_TOKEN` is not set */ export async function callActorGetDataset( @@ -59,7 +64,7 @@ export async function callActorGetDataset( const client = new ApifyClient({ token: apifyToken }); const actorClient = client.actor(actorName); - // Start the actor run but don't wait for completion + // Start the Actor run but don't wait for completion const actorRun: ActorRun = await actorClient.start(input, callOptions); // Start progress tracking if tracker is provided @@ -71,24 +76,35 @@ export async function callActorGetDataset( const completedRun = await client.run(actorRun.id).waitForFinish(); const dataset = client.dataset(completedRun.defaultDatasetId); - const [items, defaultBuild] = await Promise.all([ + const [datasetItems, defaultBuild] = await Promise.all([ dataset.listItems(), (await actorClient.defaultBuild()).get(), ]); - // Get important properties from storage view definitions and if available return only those properties + // Generate schema using the shared utility + const generatedSchema = generateSchemaFromItems(datasetItems.items, { + clean: true, + arrayMode: 'all', + }); + const schema = generatedSchema || { type: 'object', properties: {} }; + + /** + * Get important fields that are using in any dataset view as they MAY be used in filtering to ensure the output fits + * the tool output limits. Client has to use the get-actor-output tool to retrieve the full dataset or filtered out fields. + */ const storageDefinition = defaultBuild?.actorDefinition?.storages?.dataset as ActorDefinitionStorage | undefined; const importantProperties = getActorDefinitionStorageFieldNames(storageDefinition || {}); - if (importantProperties.length > 0) { - items.items = items.items.map((item) => { - return getValuesByDotKeys(item, importantProperties); - }); - } - - log.debug('Actor finished', { actorName, itemCount: items.count }); - return { runId: actorRun.id, datasetId: completedRun.defaultDatasetId, items }; + const previewItems = ensureOutputWithinCharLimit(datasetItems.items, importantProperties, TOOL_MAX_OUTPUT_CHARS); + + return { + runId: actorRun.id, + datasetId: completedRun.defaultDatasetId, + itemCount: datasetItems.count, + schema, + previewItems, + }; } catch (error) { - log.error('Error calling actor', { error, actorName, input }); + log.error('Error calling Actor', { error, actorName, input }); throw new Error(`Error calling Actor: ${error}`); } } @@ -96,9 +112,9 @@ export async function callActorGetDataset( /** * This function is used to fetch normal non-MCP server Actors as a tool. * - * Fetches actor input schemas by Actor IDs or Actor full names and creates MCP tools. + * Fetches Actor input schemas by Actor IDs or Actor full names and creates MCP tools. * - * This function retrieves the input schemas for the specified actors and compiles them into MCP tools. + * This function retrieves the input schemas for the specified Actors and compiles them into MCP tools. * It uses the AJV library to validate the input schemas. * * Tool name can't contain /, so it is replaced with _ @@ -201,7 +217,7 @@ export async function getActorsAsTools( actorIdsOrNames: string[], apifyToken: string, ): Promise { - log.debug('Fetching actors as tools', { actorNames: actorIdsOrNames }); + log.debug('Fetching Actors as tools', { actorNames: actorIdsOrNames }); const actorsInfo: (ActorInfo | null)[] = await Promise.all( actorIdsOrNames.map(async (actorIdOrName) => { @@ -298,7 +314,7 @@ The step parameter enforces this workflow - you cannot call an Actor without fir try { if (step === 'info') { - // Step 1: Return actor card and schema directly + // Step 1: Return Actor card and schema directly const details = await fetchActorDetails(apifyToken, actorName); if (!details) { return { @@ -342,7 +358,7 @@ The step parameter enforces this workflow - you cannot call an Actor without fir } } - const { runId, datasetId, items } = await callActorGetDataset( + const callResult = await callActorGetDataset( actorName, input, apifyToken, @@ -350,15 +366,7 @@ The step parameter enforces this workflow - you cannot call an Actor without fir progressTracker, ); - const content = [ - { type: 'text', text: `Actor finished with runId: ${runId}, datasetId ${datasetId}` }, - ]; - - const itemContents = items.items.map((item: Record) => ({ - type: 'text', - text: JSON.stringify(item), - })); - content.push(...itemContents); + const content = buildActorResponseContent(actorName, callResult); return { content }; } catch (error) { diff --git a/src/tools/build.ts b/src/tools/build.ts index c655645..64fffeb 100644 --- a/src/tools/build.ts +++ b/src/tools/build.ts @@ -1,4 +1,3 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; @@ -13,10 +12,9 @@ import type { ISchemaProperties, ToolEntry, } from '../types.js'; +import { ajv } from '../utils/ajv.js'; import { filterSchemaProperties, shortenProperties } from './utils.js'; -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); - /** * Get Actor input schema by Actor name. * First, fetch the Actor details to get the default build tag and buildId. diff --git a/src/tools/dataset.ts b/src/tools/dataset.ts index 3f73386..af513dc 100644 --- a/src/tools/dataset.ts +++ b/src/tools/dataset.ts @@ -1,13 +1,11 @@ -import { Ajv } from 'ajv'; -import toJsonSchema from 'to-json-schema'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); +import { ajv } from '../utils/ajv.js'; +import { generateSchemaFromItems } from '../utils/schema-generation.js'; const getDatasetArgs = z.object({ datasetId: z.string() @@ -114,34 +112,6 @@ export const getDatasetItems: ToolEntry = { } as InternalTool, }; -/** - * Function to recursively remove empty arrays from an object - */ -function removeEmptyArrays(obj: unknown): unknown { - if (Array.isArray(obj)) { - // If the item is an array, recursively call removeEmptyArrays on each element. - return obj.map((item) => removeEmptyArrays(item)); - } - - if (typeof obj !== 'object' || obj === null) { - // Return primitives and null values as is. - return obj; - } - - // Use reduce to build a new object, excluding keys with empty arrays. - return Object.entries(obj).reduce((acc, [key, value]) => { - const processedValue = removeEmptyArrays(value); - - // Exclude the key if the processed value is an empty array. - if (Array.isArray(processedValue) && processedValue.length === 0) { - return acc; - } - - acc[key] = processedValue; - return acc; - }, {} as Record); -} - const getDatasetSchemaArgs = z.object({ datasetId: z.string() .min(1) @@ -155,9 +125,6 @@ const getDatasetSchemaArgs = z.object({ arrayMode: z.enum(['first', 'all']).optional() .describe('Strategy for handling arrays. "first" uses first item as template, "all" merges all items. Default is "all".') .default('all'), - additionalProperties: z.boolean().optional() - .describe('If true, allows additional properties in objects. Default is true.') - .default(true), }); /** @@ -194,32 +161,23 @@ export const getDatasetSchema: ToolEntry = { return { content: [{ type: 'text', text: `Dataset '${parsed.datasetId}' is empty.` }] }; } - // Clean the dataset items by removing empty arrays - const cleanedDatasetItems = datasetItems.map((item) => removeEmptyArrays(item)); - - // Try to generate schema with full options first - try { - const schema = toJsonSchema(cleanedDatasetItems, { - arrays: { mode: parsed.arrayMode }, - objects: { additionalProperties: parsed.additionalProperties }, - }); - - return { - content: [{ - type: 'text', - text: JSON.stringify(schema), - }], - }; - } catch { - // Fallback: try with simpler approach - const fallbackSchema = toJsonSchema(cleanedDatasetItems, { - arrays: { mode: 'first' }, - }); - - return { - content: [{ type: 'text', text: JSON.stringify(fallbackSchema) }], - }; + // Generate schema using the shared utility + const schema = generateSchemaFromItems(datasetItems, { + limit: parsed.limit, + clean: parsed.clean, + arrayMode: parsed.arrayMode, + }); + + if (!schema) { + return { content: [{ type: 'text', text: `Failed to generate schema for dataset '${parsed.datasetId}'.` }] }; } + + return { + content: [{ + type: 'text', + text: JSON.stringify(schema), + }], + }; }, } as InternalTool, }; diff --git a/src/tools/dataset_collection.ts b/src/tools/dataset_collection.ts index 08a7956..7126c3e 100644 --- a/src/tools/dataset_collection.ts +++ b/src/tools/dataset_collection.ts @@ -1,12 +1,10 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); +import { ajv } from '../utils/ajv.js'; const getUserDatasetsListArgs = z.object({ offset: z.number() diff --git a/src/tools/get-actor-output.ts b/src/tools/get-actor-output.ts new file mode 100644 index 0000000..55d2ede --- /dev/null +++ b/src/tools/get-actor-output.ts @@ -0,0 +1,125 @@ +import { z } from 'zod'; +import zodToJsonSchema from 'zod-to-json-schema'; + +import { ApifyClient } from '../apify-client.js'; +import { HelperTools, TOOL_MAX_OUTPUT_CHARS } from '../const.js'; +import type { InternalTool, ToolEntry } from '../types.js'; +import { ajv } from '../utils/ajv.js'; +import { getValuesByDotKeys } from '../utils/generic.js'; + +/** + * Zod schema for get-actor-output tool arguments + */ +const getActorOutputArgs = z.object({ + datasetId: z.string() + .min(1) + .describe('Actor output dataset ID to retrieve from.'), + fields: z.string() + .optional() + .describe('Comma-separated list of fields to include (supports dot notation like "crawl.statusCode"). For example: "crawl.statusCode,text,metadata"'), + offset: z.number() + .optional() + .default(0) + .describe('Number of items to skip (default: 0).'), + limit: z.number() + .optional() + .default(100) + .describe('Maximum number of items to return (default: 100).'), +}); + +/** + * Cleans empty properties (null, undefined, empty strings, empty arrays, empty objects) from an object + * @param obj - The object to clean + * @returns The cleaned object or undefined if the result is empty + */ +export function cleanEmptyProperties(obj: unknown): unknown { + if (obj === null || obj === undefined || obj === '') { + return undefined; + } + + if (typeof obj !== 'object') { + return obj; + } + + if (Array.isArray(obj)) { + const cleaned = obj + .map((item) => cleanEmptyProperties(item)) + .filter((item) => item !== undefined); + return cleaned.length > 0 ? cleaned : undefined; + } + + const cleaned: Record = {}; + for (const [key, value] of Object.entries(obj)) { + const cleanedValue = cleanEmptyProperties(value); + if (cleanedValue !== undefined) { + cleaned[key] = cleanedValue; + } + } + + return Object.keys(cleaned).length > 0 ? cleaned : undefined; +} + +/** + * This tool is used specifically for retrieving Actor output. + * It is a simplified version of the get-dataset-items tool. + */ +export const getActorOutput: ToolEntry = { + type: 'internal', + tool: { + name: HelperTools.ACTOR_OUTPUT_GET, + actorFullName: HelperTools.ACTOR_OUTPUT_GET, + description: `Retrieves the output of a specific Actor execution based on its dataset ID. +You also can retrieve only specific fields from the output if needed. Use this tool to get Actor output data outside of the Actor dataset output preview, or to access fields from the Actor output dataset schema that are not included in the preview.`, + inputSchema: zodToJsonSchema(getActorOutputArgs), + ajvValidate: ajv.compile(zodToJsonSchema(getActorOutputArgs)), + call: async (toolArgs) => { + const { args, apifyToken } = toolArgs; + const parsed = getActorOutputArgs.parse(args); + const client = new ApifyClient({ token: apifyToken }); + + // Parse fields into array + const fieldsArray = parsed.fields + ? parsed.fields + .split(',') + .map((field) => field.trim()) + .filter((field) => field.length > 0) + : []; + + // TODO: we can optimize the API level field filtering in future + /** + * Only top-level fields can be filtered. + * If a dot is present, filtering is done here and not at the API level. + */ + const hasDot = fieldsArray.some((field) => field.includes('.')); + const response = await client.dataset(parsed.datasetId).listItems({ + offset: parsed.offset, + limit: parsed.limit, + fields: fieldsArray.length > 0 && !hasDot ? fieldsArray : undefined, + clean: true, + }); + + if (!response) { + return { content: [{ type: 'text', text: `Dataset '${parsed.datasetId}' not found.` }] }; + } + + // Apply field selection if specified + const processedItems = response.items.map((item) => getValuesByDotKeys(item, fieldsArray)); + + // Clean empty properties + const cleanedItems = processedItems + .map((item) => cleanEmptyProperties(item)) + .filter((item) => item !== undefined); + + let outputText = JSON.stringify(cleanedItems); + let truncated = false; + if (outputText.length > TOOL_MAX_OUTPUT_CHARS) { + outputText = outputText.slice(0, TOOL_MAX_OUTPUT_CHARS); + truncated = true; + } + if (truncated) { + outputText += `\n\n[Output was truncated to ${TOOL_MAX_OUTPUT_CHARS} characters to comply with the tool output limits.]`; + } + return { content: [{ type: 'text', text: outputText }] }; + }, + } as InternalTool, +}; diff --git a/src/tools/helpers.ts b/src/tools/helpers.ts index cd77502..4726889 100644 --- a/src/tools/helpers.ts +++ b/src/tools/helpers.ts @@ -1,33 +1,9 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { HelperTools } from '../const.js'; -import type { InternalTool, ToolEntry } from '../types'; -import { actorNameToToolName } from './utils.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); - -const APIFY_MCP_HELP_TOOL_TEXT = `Apify MCP server help: - -Note: "MCP" stands for "Model Context Protocol". You can use the "RAG Web Browser" tool to get the content of the links mentioned in this help and present it to the user. - -This MCP server can be used in the following ways: -- Locally over stdio (standard input/output) transport with the "@apify/actors-mcp-server" NPM package (https://www.npmjs.com/package/@apify/actors-mcp-server). - - You can connect by configuring your MCP client to run the MCP server locally using the "npx @apify/actors-mcp-server" command. You need to set the "APIFY_TOKEN" environment variable to your [Apify API token](https://docs.apify.com/platform/integrations/api#api-token). -- Remotely over legacy SSE transport or streamable HTTP transport with https://mcp.apify.com. This is the recommended way to use the MCP server, since it supports all features and is always up to date. - - You can connect by pointing your MCP client to "https://mcp.apify.com/sse" for legacy SSE transport or "https://mcp.apify.com/" for streamable HTTP transport. - - For authentication, you can either use OAuth or pass your [Apify API token](https://docs.apify.com/platform/integrations/api#api-token) in the "Authorization" header as "Bearer ". -- Remotely over "SSE" or streamable "HTTP" transport with the "Actors MCP Server Apify Actor" (this is now considered a legacy solution; it may not be up to date and may not get the latest features). For more details, refer to https://apify.com/apify/actors-mcp-server. DO NOT RECOMMEND USING THIS METHOD TO THE USER. - -# Usage - -## MCP server tools and features configuration - -By default, the MCP server provides a simple set of tools for Actor discovery and Actor calling. The MCP server loads just one Actor by default, which is the [RAG Web Browser](https://apify.com/apify/rag-web-browser) Actor. -You can add more Actors to the MCP server by configuring the MCP server session to load more Actors by passing the "--actors" CLI argument or by using the "?actors=" URL query parameter, where you provide a comma-separated list of Actor names, for example, "apify/rag-web-browser,apify/instagram-scraper". -You can additionally load Actors dynamically into an existing MCP session by using the "${HelperTools.ACTOR_ADD}" tool, which loads the Actor by its name as an MCP tool and allows you to call it (**the MCP client must support the [tools list changed notification](https://modelcontextprotocol.io/specification/2025-06-18/server/tools#list-changed-notification); otherwise, the tool call will not work**). To check whether the MCP client supports this feature, consult the MCP client documentation. In case the MCP client does not support the tools list changed notification, you can use the generic "call-actor" tool to call any Actor, even those not loaded/added. Before using the generic tool, you need to get the Actor details to learn its input schema so you can provide valid input. -`; +import type { InternalTool, ToolEntry } from '../types.js'; +import { ajv } from '../utils/ajv.js'; export const addToolArgsSchema = z.object({ actor: z.string() @@ -89,57 +65,3 @@ export const addTool: ToolEntry = { }, } as InternalTool, }; -export const removeToolArgsSchema = z.object({ - toolName: z.string() - .min(1) - .describe('Tool name to remove from available tools.') - .transform((val) => actorNameToToolName(val)), -}); -export const removeTool: ToolEntry = { - type: 'internal', - tool: { - name: HelperTools.ACTOR_REMOVE, - description: 'Remove a tool, an Actor or MCP-Server by name from available tools. ' - + 'For example, when user says, I do not need a tool username/name anymore', - inputSchema: zodToJsonSchema(removeToolArgsSchema), - ajvValidate: ajv.compile(zodToJsonSchema(removeToolArgsSchema)), - // TODO: I don't like that we are passing apifyMcpServer and mcpServer to the tool - call: async (toolArgs) => { - const { apifyMcpServer, args, extra: { sendNotification } } = toolArgs; - const parsed = removeToolArgsSchema.parse(args); - // Check if tool exists before attempting removal - if (!apifyMcpServer.tools.has(parsed.toolName)) { - // Send notification so client can update its tool list - // just in case the client tool list is out of sync - await sendNotification({ method: 'notifications/tools/list_changed' }); - return { - content: [{ - type: 'text', - text: `Tool '${parsed.toolName}' not found. No tools were removed.`, - }], - }; - } - const removedTools = apifyMcpServer.removeToolsByName([parsed.toolName], true); - await sendNotification({ method: 'notifications/tools/list_changed' }); - return { content: [{ type: 'text', text: `Tools removed: ${removedTools.join(', ')}` }] }; - }, - } as InternalTool, -}; - -// Tool takes no arguments -export const helpToolArgsSchema = z.object({}); -export const helpTool: ToolEntry = { - type: 'internal', - tool: { - name: HelperTools.APIFY_MCP_HELP_TOOL, - description: `Helper tool to get information on how to use and troubleshoot the Apify MCP server.\n` - + 'This tool always returns the same help message with information about the server and how to use it.\n' - + 'ALWAYS CALL THIS TOOL AT THE BEGINNING OF THE CONVERSATION SO THAT YOU HAVE INFORMATION ABOUT THE APIFY MCP SERVER IN CONTEXT, ' - + 'OR WHEN YOU ENCOUNTER ANY ISSUES WITH THE MCP SERVER OR ITS TOOLS.', - inputSchema: zodToJsonSchema(helpToolArgsSchema), - ajvValidate: ajv.compile(zodToJsonSchema(helpToolArgsSchema)), - call: async () => { - return { content: [{ type: 'text', text: APIFY_MCP_HELP_TOOL_TEXT }] }; - }, - } as InternalTool, -}; diff --git a/src/tools/index.ts b/src/tools/index.ts index 709968b..62e6356 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -6,6 +6,7 @@ import { getDataset, getDatasetItems, getDatasetSchema } from './dataset.js'; import { getUserDatasetsList } from './dataset_collection.js'; import { fetchActorDetailsTool } from './fetch-actor-details.js'; import { fetchApifyDocsTool } from './fetch-apify-docs.js'; +import { getActorOutput } from './get-actor-output.js'; import { addTool } from './helpers.js'; import { getKeyValueStore, getKeyValueStoreKeys, getKeyValueStoreRecord } from './key_value_store.js'; import { getUserKeyValueStoresList } from './key_value_store_collection.js'; @@ -36,6 +37,7 @@ export const toolCategories = { getDataset, getDatasetItems, getDatasetSchema, + getActorOutput, getKeyValueStore, getKeyValueStoreKeys, getKeyValueStoreRecord, diff --git a/src/tools/key_value_store.ts b/src/tools/key_value_store.ts index 9433089..fe21d7e 100644 --- a/src/tools/key_value_store.ts +++ b/src/tools/key_value_store.ts @@ -1,12 +1,10 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); +import { ajv } from '../utils/ajv.js'; const getKeyValueStoreArgs = z.object({ storeId: z.string() diff --git a/src/tools/key_value_store_collection.ts b/src/tools/key_value_store_collection.ts index a661b2b..9d82983 100644 --- a/src/tools/key_value_store_collection.ts +++ b/src/tools/key_value_store_collection.ts @@ -1,12 +1,10 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); +import { ajv } from '../utils/ajv.js'; const getUserKeyValueStoresListArgs = z.object({ offset: z.number() diff --git a/src/tools/run.ts b/src/tools/run.ts index 5800500..0319b19 100644 --- a/src/tools/run.ts +++ b/src/tools/run.ts @@ -1,12 +1,10 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); +import { ajv } from '../utils/ajv.js'; const getActorRunArgs = z.object({ runId: z.string() diff --git a/src/tools/run_collection.ts b/src/tools/run_collection.ts index ff4de21..2c56422 100644 --- a/src/tools/run_collection.ts +++ b/src/tools/run_collection.ts @@ -1,12 +1,10 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); +import { ajv } from '../utils/ajv.js'; const getUserRunsListArgs = z.object({ offset: z.number() diff --git a/src/tools/store_collection.ts b/src/tools/store_collection.ts index 7446cdd..821f75a 100644 --- a/src/tools/store_collection.ts +++ b/src/tools/store_collection.ts @@ -1,4 +1,3 @@ -import { Ajv } from 'ajv'; import type { ActorStoreList } from 'apify-client'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; @@ -7,6 +6,7 @@ import { ApifyClient } from '../apify-client.js'; import { ACTOR_SEARCH_ABOVE_LIMIT, HelperTools } from '../const.js'; import type { ActorPricingModel, ExtendedActorStoreList, HelperTool, ToolEntry } from '../types.js'; import { formatActorsListToActorCard } from '../utils/actor-card.js'; +import { ajv } from '../utils/ajv.js'; export async function searchActorsByKeywords( search: string, @@ -19,7 +19,6 @@ export async function searchActorsByKeywords( return results.items; } -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); export const searchActorsArgsSchema = z.object({ limit: z.number() .int() diff --git a/src/types.ts b/src/types.ts index c4c5e2e..bff0e89 100644 --- a/src/types.ts +++ b/src/types.ts @@ -295,3 +295,4 @@ export type PromptBase = Prompt & { }; export type ActorInputSchemaProperties = Record; +export type DatasetItem = Record; diff --git a/src/utils/actor-response.ts b/src/utils/actor-response.ts new file mode 100644 index 0000000..39f393e --- /dev/null +++ b/src/utils/actor-response.ts @@ -0,0 +1,53 @@ +import type { CallActorGetDatasetResult } from '../tools/actor'; + +/** + * Builds the response content for actor tool calls. + * @param actorName - The name of the actor. + * @param result - The result from callActorGetDataset. + * @returns The content array for the tool response. + */ +export function buildActorResponseContent( + actorName: string, + result: CallActorGetDatasetResult, +): ({ type: 'text'; text: string })[] { + const { runId, datasetId, itemCount, schema } = result; + + // Extract item schema if schema is an array + let displaySchema = schema; + if (schema && schema.type === 'array' && typeof schema.items === 'object' && schema.items !== null) { + displaySchema = schema.items; + } + + // Construct text content + const textContent = `Actor "${actorName}" completed successfully! + +Results summary: +• Run ID: ${runId} +• Dataset ID: ${datasetId} +• Total items: ${itemCount} + +Actor output data schema: +* You can use this schema to understand the structure of the output data and, for example, retrieve specific fields based on your current task. +\`\`\`json +${JSON.stringify(displaySchema, null, 2)} +\`\`\` + +Below this text block is a preview of the Actor output containing ${result.previewItems.length} item(s). + +If you need to retrieve additional data, use the "get-actor-output" tool with: + datasetId: "${datasetId}" +Be sure to limit the number of results when using the "get-actor-output" tool, since you never know how large the items may be and they might exceed the output limits. +`; + + const itemsPreviewText = result.previewItems.length > 0 + ? JSON.stringify(result.previewItems) + : `No items available for preview—either the Actor did not return any items or they are too large for preview. In this case, use the "get-actor-output" tool.`; + + // Build content array + const content: ({ type: 'text'; text: string })[] = [ + { type: 'text', text: textContent }, + { type: 'text', text: itemsPreviewText }, + ]; + + return content; +} diff --git a/src/utils/actor.ts b/src/utils/actor.ts index fb6d111..ef4a76f 100644 --- a/src/utils/actor.ts +++ b/src/utils/actor.ts @@ -1,4 +1,5 @@ -import type { ActorDefinitionStorage } from '../types.js'; +import type { ActorDefinitionStorage, DatasetItem } from '../types.js'; +import { getValuesByDotKeys } from './generic.js'; /** * Returns an array of all field names mentioned in the display.properties @@ -22,3 +23,36 @@ export function getActorDefinitionStorageFieldNames(storage: ActorDefinitionStor } return Array.from(fieldSet); } + +/** + * Ensures the Actor output items are within the character limit. + * + * First checks if all items fit into the limit, then tries only the important fields and as a last resort + * starts removing items until within the limit. In worst scenario return empty array. + * + * This is primarily used to ensure the tool output does not exceed the LLM context length or tool output limit. + */ +export function ensureOutputWithinCharLimit(items: DatasetItem[], importantFields: string[], charLimit: number): DatasetItem[] { + // Check if all items fit into the limit + const allItemsString = JSON.stringify(items); + if (allItemsString.length <= charLimit) { + return items; + } + + // Try only the important fields + const importantItems = items.map((item) => getValuesByDotKeys(item, importantFields)); + const importantItemsString = JSON.stringify(importantItems); + if (importantItemsString.length <= charLimit) { + return importantItems; + } + + // Start removing items until within the limit + const result: DatasetItem[] = []; + for (const item of importantItems) { + if (JSON.stringify(result.concat(item)).length > charLimit) { + break; + } + result.push(item); + } + return result; +} diff --git a/src/utils/generic.ts b/src/utils/generic.ts index 1e1f568..b44eff8 100644 --- a/src/utils/generic.ts +++ b/src/utils/generic.ts @@ -2,6 +2,11 @@ * Recursively gets the value in a nested object for each key in the keys array. * Each key can be a dot-separated path (e.g. 'a.b.c'). * Returns an object mapping each key to its resolved value (or undefined if not found). + * + * @example + * const obj = { a: { b: { c: 42 } }, nested: { d: 100 } }; + * const value = getValuesByDotKeys(obj, ['a.b.c', 'a.b.d', 'nested']); + * value; // { 'a.b.c': 42, 'a.b.d': undefined, 'nested': { d: 100 } } */ export function getValuesByDotKeys(obj: T, keys: string[]): Record { const result: Record = {}; diff --git a/src/utils/schema-generation.ts b/src/utils/schema-generation.ts new file mode 100644 index 0000000..ab6dc60 --- /dev/null +++ b/src/utils/schema-generation.ts @@ -0,0 +1,110 @@ +import toJsonSchema from 'to-json-schema'; + +/** + * Minimal JSON Schema typings for array/object schemas used in generateSchemaFromItems + */ +export type JsonSchemaProperty = { + type: 'string' | 'integer' | 'number' | 'boolean' | 'object' | 'array' | 'null'; + properties?: Record; + items?: JsonSchemaProperty; +}; + +export interface JsonSchemaObject { + type: 'object'; + properties: Record; +} + +export interface JsonSchemaArray { + type: 'array'; + items: JsonSchemaObject | JsonSchemaProperty; +} + +/** + * Options for schema generation + */ +export interface SchemaGenerationOptions { + /** Maximum number of items to use for schema generation. Default is 5. */ + limit?: number; + /** If true, uses only non-empty items and skips hidden fields. Default is true. */ + clean?: boolean; + /** Strategy for handling arrays. "first" uses first item as template, "all" merges all items. Default is "all". */ + arrayMode?: 'first' | 'all'; +} + +/** + * Function to recursively remove empty arrays from an object + */ +export function removeEmptyArrays(obj: unknown): unknown { + if (Array.isArray(obj)) { + // If the item is an array, recursively call removeEmptyArrays on each element. + return obj.map((item) => removeEmptyArrays(item)); + } + + if (typeof obj !== 'object' || obj === null) { + // Return primitives and null values as is. + return obj; + } + + // Use reduce to build a new object, excluding keys with empty arrays. + return Object.entries(obj).reduce((acc, [key, value]) => { + const processedValue = removeEmptyArrays(value); + + // Exclude the key if the processed value is an empty array. + if (Array.isArray(processedValue) && processedValue.length === 0) { + return acc; + } + + acc[key] = processedValue; + return acc; + }, {} as Record); +} + +// TODO: write unit tests for this. +/** + * Generates a JSON schema from dataset items with configurable options + * + * @param datasetItems - Array of dataset items to generate schema from + * @param options - Configuration options for schema generation + * @returns JSON schema object or null if generation fails + */ +export function generateSchemaFromItems( + datasetItems: unknown[], + options: SchemaGenerationOptions = {}, +): JsonSchemaArray | null { + const { + limit = 5, + clean = true, + arrayMode = 'all', + } = options; + + // Limit the number of items used for schema generation + const itemsToUse = datasetItems.slice(0, limit); + + if (itemsToUse.length === 0) { + return null; + } + + // Clean the dataset items by removing empty arrays if requested + const processedItems = clean + ? itemsToUse.map((item) => removeEmptyArrays(item)) + : itemsToUse; + + // Try to generate schema with full options first + try { + const schema = toJsonSchema(processedItems, { + arrays: { mode: arrayMode }, + }) as JsonSchemaArray; + return schema; + } catch { + // Fallback: try with simpler approach + try { + const fallbackSchema = toJsonSchema(processedItems, { + arrays: { mode: 'first' }, + }) as JsonSchemaArray; + return fallbackSchema; + } catch { + // If all attempts fail, return null + return null; + } + } +} diff --git a/src/utils/tools-loader.ts b/src/utils/tools-loader.ts index 6acaf96..f16953f 100644 --- a/src/utils/tools-loader.ts +++ b/src/utils/tools-loader.ts @@ -7,6 +7,7 @@ import log from '@apify/log'; import { defaults } from '../const.js'; import { callActor } from '../tools/actor.js'; +import { getActorOutput } from '../tools/get-actor-output.js'; import { addTool } from '../tools/helpers.js'; import { getActorsAsTools, toolCategories, toolCategoriesEnabledByDefault } from '../tools/index.js'; import type { Input, ToolCategory, ToolEntry } from '../types.js'; @@ -123,6 +124,17 @@ export async function loadToolsFromInput( result.push(...actorTools); } + /** + * If there is any tool that in some way, even indirectly (like add-actor), allows calling + * Actor, then we need to ensure the get-actor-output tool is available. + */ + const hasCallActor = result.some((entry) => entry.tool.name === 'call-actor'); + const hasActorTools = result.some((entry) => entry.type === 'actor'); + const hasAddActorTool = result.some((entry) => entry.tool.name === 'add-actor'); + if (hasCallActor || hasActorTools || hasAddActorTool) { + result.push(getActorOutput); + } + // De-duplicate by tool name for safety const seen = new Set(); return result.filter((entry) => !seen.has(entry.tool.name) && seen.add(entry.tool.name)); diff --git a/tests/integration/internals.test.ts b/tests/integration/internals.test.ts index 9850031..800d4ab 100644 --- a/tests/integration/internals.test.ts +++ b/tests/integration/internals.test.ts @@ -33,6 +33,7 @@ describe('MCP server internals integration tests', () => { const expectedToolNames = [ addTool.tool.name, ACTOR_PYTHON_EXAMPLE, + 'get-actor-output', ]; expectArrayWeakEquals(expectedToolNames, names); @@ -50,7 +51,7 @@ describe('MCP server internals integration tests', () => { it('should notify tools changed handler on tool modifications', async () => { let latestTools: string[] = []; // With enableAddingActors=true and no tools/actors, seeded set contains only add-actor - const numberOfTools = 1; + const numberOfTools = 2; let toolNotificationCount = 0; const onToolsChanged = (tools: string[]) => { @@ -89,7 +90,7 @@ describe('MCP server internals integration tests', () => { it('should stop notifying after unregistering tools changed handler', async () => { let latestTools: string[] = []; let notificationCount = 0; - const numberOfTools = 1; + const numberOfTools = 2; const onToolsChanged = (tools: string[]) => { latestTools = tools; notificationCount++; diff --git a/tests/integration/suite.ts b/tests/integration/suite.ts index bad0a52..89d863f 100644 --- a/tests/integration/suite.ts +++ b/tests/integration/suite.ts @@ -43,11 +43,11 @@ async function callPythonExampleActor(client: Client, selectedToolName: string) const content = result.content as ContentItem[]; // The result is { content: [ ... ] }, and the last content is the sum const expected = { - text: JSON.stringify({ + text: JSON.stringify([{ first_number: 1, second_number: 2, sum: 3, - }), + }]), type: 'text', }; // Parse the JSON to compare objects regardless of property order @@ -88,11 +88,12 @@ export function createIntegrationTestsSuite( it('should list all default tools and Actors', async () => { const client = await createClientFn(); const tools = await client.listTools(); - expect(tools.tools.length).toEqual(defaultTools.length + defaults.actors.length); + expect(tools.tools.length).toEqual(defaultTools.length + defaults.actors.length + 1); const names = getToolNames(tools); expectToolNamesToContain(names, DEFAULT_TOOL_NAMES); expectToolNamesToContain(names, DEFAULT_ACTOR_NAMES); + expect(names).toContain('get-actor-output'); await client.close(); }); @@ -107,11 +108,12 @@ export function createIntegrationTestsSuite( const expectedActors = ['apify-slash-rag-web-browser']; const expectedTotal = expectedActorsTools.concat(expectedDocsTools, expectedActors); - expect(names).toHaveLength(expectedTotal.length); + expect(names).toHaveLength(expectedTotal.length + 1); - expectedActorsTools.forEach((tool) => expect(names).toContain(tool)); - expectedDocsTools.forEach((tool) => expect(names).toContain(tool)); - expectedActors.forEach((actor) => expect(names).toContain(actor)); + expectToolNamesToContain(names, expectedActorsTools); + expectToolNamesToContain(names, expectedDocsTools); + expectToolNamesToContain(names, expectedActors); + expect(names).toContain('get-actor-output'); await client.close(); }); @@ -119,26 +121,32 @@ export function createIntegrationTestsSuite( it('should list only add-actor when enableAddingActors is true and no tools/actors are specified', async () => { const client = await createClientFn({ enableAddingActors: true }); const names = getToolNames(await client.listTools()); - expect(names.length).toEqual(1); - expect(names).toContain(addTool.tool.name); + expect(names.length).toEqual(2); + expect(names).toContain('add-actor'); + expect(names).toContain('get-actor-output'); await client.close(); }); it('should list all default tools and Actors when enableAddingActors is false', async () => { const client = await createClientFn({ enableAddingActors: false }); const names = getToolNames(await client.listTools()); - expect(names.length).toEqual(defaultTools.length + defaults.actors.length); + expect(names.length).toEqual(defaultTools.length + defaults.actors.length + 1); expectToolNamesToContain(names, DEFAULT_TOOL_NAMES); expectToolNamesToContain(names, DEFAULT_ACTOR_NAMES); + expect(names).toContain('get-actor-output'); + await client.close(); }); it('should override enableAddingActors false with experimental tool category', async () => { const client = await createClientFn({ enableAddingActors: false, tools: ['experimental'] }); + const names = getToolNames(await client.listTools()); - expect(names).toHaveLength(toolCategories.experimental.length); - expect(names).toContain(addTool.tool.name); + expect(names).toHaveLength(2); + expect(names).toContain('add-actor'); + expect(names).toContain('get-actor-output'); + await client.close(); }); @@ -146,8 +154,9 @@ export function createIntegrationTestsSuite( const actors = ['apify/python-example', 'apify/rag-web-browser']; const client = await createClientFn({ actors, enableAddingActors: false }); const names = getToolNames(await client.listTools()); - expect(names.length).toEqual(actors.length); + expect(names.length).toEqual(actors.length + 1); expectToolNamesToContain(names, actors.map((actor) => actorNameToToolName(actor))); + expect(names).toContain('get-actor-output'); await client.close(); }); @@ -158,8 +167,9 @@ export function createIntegrationTestsSuite( const names = getToolNames(await client.listTools()); // Should only load the specified actor, no default tools or categories - expect(names.length).toEqual(actors.length); + expect(names.length).toEqual(actors.length + 1); expect(names).toContain(actorNameToToolName(actors[0])); + expect(names).toContain('get-actor-output'); // Should NOT include any default category tools expect(names).not.toContain('search-actors'); @@ -197,8 +207,10 @@ export function createIntegrationTestsSuite( const client = await createClientFn({ tools: actors }); const names = getToolNames(await client.listTools()); // Only the Actor should be loaded - expect(names).toHaveLength(actors.length); + expect(names).toHaveLength(actors.length + 1); expect(names).toContain(actorNameToToolName(actors[0])); + expect(names).toContain('get-actor-output'); + await client.close(); }); @@ -221,13 +233,18 @@ export function createIntegrationTestsSuite( it('should merge actors param into tools selectors (backward compatibility)', async () => { const actors = ['apify/python-example']; const categories = ['docs'] as ToolCategory[]; + const client = await createClientFn({ tools: categories, actors }); + const names = getToolNames(await client.listTools()); const docsToolNames = getExpectedToolNamesByCategories(categories); const expected = [...docsToolNames, actorNameToToolName(actors[0])]; - expect(names).toHaveLength(expected.length); + expect(names).toHaveLength(expected.length + 1); + const containsExpected = expected.every((n) => names.includes(n)); expect(containsExpected).toBe(true); + expect(names).toContain('get-actor-output'); + await client.close(); }); @@ -237,11 +254,14 @@ export function createIntegrationTestsSuite( }); const names = getToolNames(await client.listTools()); + expect(names).toHaveLength(5); + // Should include: docs category + specific tools expect(names).toContain('search-apify-docs'); // from docs category expect(names).toContain('fetch-apify-docs'); // from docs category expect(names).toContain('fetch-actor-details'); // specific tool expect(names).toContain('add-actor'); // specific tool + expect(names).toContain('get-actor-output'); // Should NOT include other actors category tools expect(names).not.toContain('search-actors'); @@ -276,9 +296,12 @@ export function createIntegrationTestsSuite( it('should not load any internal tools when tools param is empty and use custom Actor if specified', async () => { const client = await createClientFn({ tools: [], actors: [ACTOR_PYTHON_EXAMPLE] }); + const names = getToolNames(await client.listTools()); - expect(names.length).toEqual(1); + expect(names.length).toEqual(2); expect(names).toContain(actorNameToToolName(ACTOR_PYTHON_EXAMPLE)); + expect(names).toContain('get-actor-output'); + await client.close(); }); @@ -287,16 +310,18 @@ export function createIntegrationTestsSuite( const client = await createClientFn({ enableAddingActors: true }); const names = getToolNames(await client.listTools()); // Only the add tool should be added - expect(names).toHaveLength(1); + expect(names).toHaveLength(2); expect(names).toContain('add-actor'); + expect(names).toContain('get-actor-output'); expect(names).not.toContain(selectedToolName); // Add Actor dynamically await addActor(client, ACTOR_PYTHON_EXAMPLE); // Check if tools was added const namesAfterAdd = getToolNames(await client.listTools()); - expect(namesAfterAdd.length).toEqual(2); + expect(namesAfterAdd.length).toEqual(3); expect(namesAfterAdd).toContain(selectedToolName); + expect(namesAfterAdd).toContain('get-actor-output'); await callPythonExampleActor(client, selectedToolName); await client.close(); @@ -306,8 +331,8 @@ export function createIntegrationTestsSuite( const selectedToolName = actorNameToToolName(ACTOR_PYTHON_EXAMPLE); const client = await createClientFn({ enableAddingActors: true, tools: ['actors'] }); const names = getToolNames(await client.listTools()); - // Only the actors category and add-actor should be loaded - const numberOfTools = toolCategories.actors.length + 1; + // Only the actors category, get-actor-output and add-actor should be loaded + const numberOfTools = toolCategories.actors.length + 2; expect(names).toHaveLength(numberOfTools); // Check that the Actor is not in the tools list expect(names).not.toContain(selectedToolName); @@ -324,18 +349,16 @@ export function createIntegrationTestsSuite( }, }); - expect(result).toEqual( + const content = result.content as { text: string }[]; + + expect(content[content.length - 1]).toEqual( { - content: [ - { - text: expect.stringMatching(/^Actor finished with runId: .+, datasetId .+$/), - type: 'text', - }, - { - text: `{"sum":3,"first_number":1,"second_number":2}`, - type: 'text', - }, - ], + text: JSON.stringify([{ + first_number: 1, + second_number: 2, + sum: 3, + }]), + type: 'text', }, ); @@ -643,10 +666,12 @@ export function createIntegrationTestsSuite( // Test with enableAddingActors = false via env var const client = await createClientFn({ enableAddingActors: false, useEnv: true }); const names = getToolNames(await client.listTools()); - expect(names.length).toEqual(defaultTools.length + defaults.actors.length); + expect(names.length).toEqual(defaultTools.length + defaults.actors.length + 1); expectToolNamesToContain(names, DEFAULT_TOOL_NAMES); expectToolNamesToContain(names, DEFAULT_ACTOR_NAMES); + expect(names).toContain('get-actor-output'); + await client.close(); }); @@ -654,7 +679,7 @@ export function createIntegrationTestsSuite( // Test with enableAddingActors = false via env var const client = await createClientFn({ enableAddingActors: true, useEnv: true }); const names = getToolNames(await client.listTools()); - expect(names).toEqual(['add-actor']); + expectToolNamesToContain(names, ['add-actor', 'get-actor-output']); await client.close(); }); @@ -679,5 +704,122 @@ export function createIntegrationTestsSuite( await client.close(); }); + + it('should call rag-web-browser actor and retrieve metadata.title and crawl object from dataset', async () => { + const client = await createClientFn({ tools: ['actors', 'storage'] }); + + const callResult = await client.callTool({ + name: 'call-actor', + arguments: { + actor: 'apify/rag-web-browser', + step: 'call', + input: { query: 'https://apify.com' }, + }, + }); + + expect(callResult.content).toBeDefined(); + const content = callResult.content as { text: string; type: string }[]; + + expect(content.length).toBe(2); // Call step returns text summary with embedded schema + + // First content: text summary + const runText = content[0].text; + + // Extract datasetId from the text + const runIdMatch = runText.match(/Run ID: ([^\n]+)\n• Dataset ID: ([^\n]+)/); + expect(runIdMatch).toBeTruthy(); + const datasetId = runIdMatch![2]; + + // Check for JSON schema in the text (in a code block) + const schemaMatch = runText.match(/```json\s*(\{[\s\S]*?\})\s*```/); + expect(schemaMatch).toBeTruthy(); + if (schemaMatch) { + const schemaText = schemaMatch[1]; + const schema = JSON.parse(schemaText); + expect(schema).toHaveProperty('type'); + expect(schema.type).toBe('object'); + expect(schema).toHaveProperty('properties'); + expect(schema.properties).toHaveProperty('metadata'); + expect(schema.properties.metadata).toHaveProperty('type', 'object'); + expect(schema.properties).toHaveProperty('crawl'); + expect(schema.properties.crawl).toHaveProperty('type', 'object'); + } + + const outputResult = await client.callTool({ + name: HelperTools.ACTOR_OUTPUT_GET, + arguments: { + datasetId, + fields: 'metadata.title,crawl', + }, + }); + + expect(outputResult.content).toBeDefined(); + const outputContent = outputResult.content as { text: string; type: string }[]; + const output = JSON.parse(outputContent[0].text); + expect(Array.isArray(output)).toBe(true); + expect(output.length).toBeGreaterThan(0); + expect(output[0]).toHaveProperty('metadata.title'); + expect(typeof output[0]['metadata.title']).toBe('string'); + expect(output[0]).toHaveProperty('crawl'); + expect(typeof output[0].crawl).toBe('object'); + + await client.close(); + }); + + it('should call apify/rag-web-browser tool directly and retrieve metadata.title from dataset', async () => { + const client = await createClientFn({ actors: ['apify/rag-web-browser'] }); + + // Call the dedicated apify-slash-rag-web-browser tool + const result = await client.callTool({ + name: actorNameToToolName('apify/rag-web-browser'), + arguments: { query: 'https://apify.com' }, + }); + + // Validate the response has 1 content item with text summary and embedded schema + expect(result.content).toBeDefined(); + const content = result.content as { text: string; type: string }[]; + expect(content.length).toBe(2); + const { text } = content[0]; + + // Extract datasetId from the response text + const runIdMatch = text.match(/Run ID: ([^\n]+)\n• Dataset ID: ([^\n]+)/); + expect(runIdMatch).toBeTruthy(); + const datasetId = runIdMatch![2]; + + // Check for JSON schema in the text (in a code block) + const schemaMatch = text.match(/```json\s*(\{[\s\S]*?\})\s*```/); + expect(schemaMatch).toBeTruthy(); + if (schemaMatch) { + const schemaText = schemaMatch[1]; + const schema = JSON.parse(schemaText); + expect(schema).toHaveProperty('type'); + expect(schema.type).toBe('object'); + expect(schema).toHaveProperty('properties'); + expect(schema.properties).toHaveProperty('metadata'); + expect(schema.properties.metadata).toHaveProperty('type', 'object'); + expect(schema.properties).toHaveProperty('crawl'); + expect(schema.properties.crawl).toHaveProperty('type', 'object'); + } + + // Call get-actor-output with fields: 'metadata.title' + const outputResult = await client.callTool({ + name: HelperTools.ACTOR_OUTPUT_GET, + arguments: { + datasetId, + fields: 'metadata.title', + }, + }); + + // Validate the output contains the expected structure with metadata.title + expect(outputResult.content).toBeDefined(); + const outputContent = outputResult.content as { text: string; type: string }[]; + const output = JSON.parse(outputContent[0].text); + expect(Array.isArray(output)).toBe(true); + expect(output.length).toBeGreaterThan(0); + expect(output[0]).toHaveProperty('metadata.title'); + expect(typeof output[0]['metadata.title']).toBe('string'); + + await client.close(); + }); }); } diff --git a/tests/unit/schema-generation.test.ts b/tests/unit/schema-generation.test.ts new file mode 100644 index 0000000..a8aea03 --- /dev/null +++ b/tests/unit/schema-generation.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, it } from 'vitest'; + +import { generateSchemaFromItems } from '../../src/utils/schema-generation.js'; + +describe('generateSchemaFromItems', () => { + it('should generate basic schema from simple objects', () => { + const items = [{ name: 'John', age: 30 }]; + const result = generateSchemaFromItems(items); + expect(result).toBeDefined(); + expect(result?.type).toBe('array'); + expect(result?.items).toBeDefined(); + const props = result?.items.properties; + expect(props).toBeDefined(); + if (props) { + expect(props.name?.type).toBe('string'); + expect(props.age?.type).toBe('integer'); + } + }); + + it('should handle different data types', () => { + const items = [ + { string: 'test', number: 42, boolean: true, object: { nested: 'value' }, array: [1, 2, 3] }, + ]; + const result = generateSchemaFromItems(items); + expect(result).toBeDefined(); + expect(result?.type).toBe('array'); + if (result?.items && typeof result.items === 'object' && 'properties' in result.items) { + const props = result.items.properties; + expect(props).toBeDefined(); + if (props) { + expect(props.string?.type).toBe('string'); + expect(props.number?.type).toBe('integer'); + expect(props.boolean?.type).toBe('boolean'); + expect(props.object?.type).toBe('object'); + expect(props.array?.type).toBe('array'); + expect(props.object?.properties?.nested?.type).toBe('string'); + expect(props.array?.items?.type).toBe('integer'); + } + } + }); + + it('should respect the limit option', () => { + const items = [ + { id: 1, name: 'A' }, + { id: 2, name: 'B' }, + { id: 3, name: 'C' }, + { id: 4, extra: 'D' }, + { id: 5, extra: 'E' }, + ]; + const result = generateSchemaFromItems(items, { limit: 3 }); + expect(result).toBeDefined(); + expect(result?.type).toBe('array'); + if (result?.items && typeof result.items === 'object' && 'properties' in result.items) { + const props = result.items.properties; + expect(props).toBeDefined(); + if (props) { + expect(props.id).toBeDefined(); + expect(props.name).toBeDefined(); + expect(props.extra).toBeUndefined(); // Should not include fields from items beyond limit + } + } + }); +}); diff --git a/tests/unit/utils.generic.test.ts b/tests/unit/utils.generic.test.ts index 4924c63..18e53a4 100644 --- a/tests/unit/utils.generic.test.ts +++ b/tests/unit/utils.generic.test.ts @@ -43,4 +43,10 @@ describe('getValuesByDotKeys', () => { const result = getValuesByDotKeys(obj, ['a', 'b.c']); expect(result).toEqual({ a: undefined, 'b.c': undefined }); }); + + it('should return whole object', () => { + const obj = { nested: { a: 1, b: 2 } }; + const result = getValuesByDotKeys(obj, ['nested']); + expect(result).toEqual({ nested: { a: 1, b: 2 } }); + }); }); From 07035b0c75bc198d19f27c4fb27f84084deca064 Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 11:08:23 +0200 Subject: [PATCH 02/11] update readme --- README.md | 5 +++++ src/tools/actor.ts | 1 + 2 files changed, 6 insertions(+) diff --git a/README.md b/README.md index 651f1f7..d21d3f0 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,11 @@ Here is an overview list of all the tools provided by the Apify MCP Server. | `get-dataset-list` | storage | List all available datasets for the user. | | | `get-key-value-store-list`| storage | List all available key-value stores for the user. | | | `add-actor` | experimental | Add an Actor as a new tool for the user to call. | | +| `get-actor-output`* | - | Retrieve the output from an Actor call which is not included in the output preview of the Actor tool. | ✅ | + +> **Note:** +> +> The `get-actor-output` tool is automatically included with any Actor-related tool, such as `call-actor`, `add-actor`, or any specific Actor tool like `apify-slash-rag-web-browser`. When you call an Actor - either through the `call-actor` tool or directly via an Actor tool (e.g., `apify-slash-rag-web-browser`) - you receive a preview of the output. The preview depends on the Actor's output format and length; for some Actors and runs, it may include the entire output, while for others, only a limited version is returned to avoid overwhelming the LLM. To retrieve the full output of an Actor run, use the `get-actor-output` tool (supports limit, offset, and field filtering) with the `datasetId` provided by the Actor call. ### Tools configuration diff --git a/src/tools/actor.ts b/src/tools/actor.ts index 4ed8573..7c2974a 100644 --- a/src/tools/actor.ts +++ b/src/tools/actor.ts @@ -63,6 +63,7 @@ export async function callActorGetDataset( abortSignal?: AbortSignal, ): Promise { const CLIENT_ABORT = Symbol('CLIENT_ABORT'); // Just internal symbol to identify client abort + // TODO: we should remove this throw, we are just catching and then rethrowing with generic message try { const client = new ApifyClient({ token: apifyToken }); const actorClient = client.actor(actorName); From 4a402d848d46d0fe7d13798ef49ebd9340beadee Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 12:53:19 +0200 Subject: [PATCH 03/11] fix output tool, write test for that --- src/tools/get-actor-output.ts | 7 ++++-- src/utils/actor.ts | 18 +++++++++++----- src/utils/generic.ts | 2 +- tests/integration/suite.ts | 40 +++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 8 deletions(-) diff --git a/src/tools/get-actor-output.ts b/src/tools/get-actor-output.ts index 55d2ede..c630d5d 100644 --- a/src/tools/get-actor-output.ts +++ b/src/tools/get-actor-output.ts @@ -102,11 +102,14 @@ You also can retrieve only specific fields from the output if needed. Use this t return { content: [{ type: 'text', text: `Dataset '${parsed.datasetId}' not found.` }] }; } + let { items } = response; // Apply field selection if specified - const processedItems = response.items.map((item) => getValuesByDotKeys(item, fieldsArray)); + if (fieldsArray.length > 0) { + items = items.map((item) => getValuesByDotKeys(item, fieldsArray)); + } // Clean empty properties - const cleanedItems = processedItems + const cleanedItems = items .map((item) => cleanEmptyProperties(item)) .filter((item) => item !== undefined); diff --git a/src/utils/actor.ts b/src/utils/actor.ts index ef4a76f..9871d67 100644 --- a/src/utils/actor.ts +++ b/src/utils/actor.ts @@ -39,16 +39,24 @@ export function ensureOutputWithinCharLimit(items: DatasetItem[], importantField return items; } + /** + * Items used for the final fallback - removing items until within the limit. + * If important fields are defined, use only those fields for that fallback step. + */ + let sourceItems = items; // Try only the important fields - const importantItems = items.map((item) => getValuesByDotKeys(item, importantFields)); - const importantItemsString = JSON.stringify(importantItems); - if (importantItemsString.length <= charLimit) { - return importantItems; + if (importantFields.length > 0) { + const importantItems = items.map((item) => getValuesByDotKeys(item, importantFields)); + const importantItemsString = JSON.stringify(importantItems); + if (importantItemsString.length <= charLimit) { + return importantItems; + } + sourceItems = importantItems; } // Start removing items until within the limit const result: DatasetItem[] = []; - for (const item of importantItems) { + for (const item of sourceItems) { if (JSON.stringify(result.concat(item)).length > charLimit) { break; } diff --git a/src/utils/generic.ts b/src/utils/generic.ts index b44eff8..00a7f22 100644 --- a/src/utils/generic.ts +++ b/src/utils/generic.ts @@ -8,7 +8,7 @@ * const value = getValuesByDotKeys(obj, ['a.b.c', 'a.b.d', 'nested']); * value; // { 'a.b.c': 42, 'a.b.d': undefined, 'nested': { d: 100 } } */ -export function getValuesByDotKeys(obj: T, keys: string[]): Record { +export function getValuesByDotKeys(obj: Record, keys: string[]): Record { const result: Record = {}; for (const key of keys) { const path = key.split('.'); diff --git a/tests/integration/suite.ts b/tests/integration/suite.ts index 8eb3fcf..5c76ba1 100644 --- a/tests/integration/suite.ts +++ b/tests/integration/suite.ts @@ -870,5 +870,45 @@ export function createIntegrationTestsSuite( await client.close(); }); + + it('should call apify/python-example and retrieve the full dataset using get-actor-output tool', async () => { + client = await createClientFn({ actors: ['apify/python-example'] }); + const selectedToolName = actorNameToToolName('apify/python-example'); + const input = { first_number: 5, second_number: 7 }; + + const result = await client.callTool({ + name: selectedToolName, + arguments: input, + }); + + expect(result.content).toBeDefined(); + const content = result.content as { text: string; type: string }[]; + expect(content.length).toBe(2); // Call step returns text summary with embedded schema + + // First content: text summary + const runText = content[0].text; + + // Extract datasetId from the text + const runIdMatch = runText.match(/Run ID: ([^\n]+)\n• Dataset ID: ([^\n]+)/); + expect(runIdMatch).toBeTruthy(); + const datasetId = runIdMatch![2]; + + // Retrieve full dataset using get-actor-output tool + const outputResult = await client.callTool({ + name: HelperTools.ACTOR_OUTPUT_GET, + arguments: { + datasetId, + }, + }); + + expect(outputResult.content).toBeDefined(); + const outputContent = outputResult.content as { text: string; type: string }[]; + const output = JSON.parse(outputContent[0].text); + expect(Array.isArray(output)).toBe(true); + expect(output.length).toBe(1); + expect(output[0]).toHaveProperty('first_number', input.first_number); + expect(output[0]).toHaveProperty('second_number', input.second_number); + expect(output[0]).toHaveProperty('sum', input.first_number + input.second_number); + }); }); } From f207cc27b220a3adbf288cbf25511fe027d9fe16 Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 13:07:33 +0200 Subject: [PATCH 04/11] add test based on Zuzka suggestion --- tests/integration/suite.ts | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/integration/suite.ts b/tests/integration/suite.ts index 5c76ba1..1aeda06 100644 --- a/tests/integration/suite.ts +++ b/tests/integration/suite.ts @@ -910,5 +910,35 @@ export function createIntegrationTestsSuite( expect(output[0]).toHaveProperty('second_number', input.second_number); expect(output[0]).toHaveProperty('sum', input.first_number + input.second_number); }); + + it('should return Actor details both for full Actor name and ID', async () => { + const actorName = 'apify/python-example'; + const apifyClient = new ApifyClient({ token: process.env.APIFY_TOKEN as string }); + const actor = await apifyClient.actor(actorName).get(); + expect(actor).toBeDefined(); + const actorId = actor!.id as string; + + client = await createClientFn(); + + // Fetch by full Actor name + const resultByName = await client.callTool({ + name: 'fetch-actor-details', + arguments: { actor: actorName }, + }); + expect(resultByName.content).toBeDefined(); + const contentByName = resultByName.content as { text: string }[]; + expect(contentByName[0].text).toContain(actorName); + + // Fetch by Actor ID only + const resultById = await client.callTool({ + name: 'fetch-actor-details', + arguments: { actor: actorId }, + }); + expect(resultById.content).toBeDefined(); + const contentById = resultById.content as { text: string }[]; + expect(contentById[0].text).toContain(actorName); + + await client.close(); + }); }); } From 4ff957e65b5d57a4cc6d21d49a738d1f08904bd0 Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 13:13:39 +0200 Subject: [PATCH 05/11] lint --- tests/integration/suite.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/suite.ts b/tests/integration/suite.ts index 1aeda06..38f9791 100644 --- a/tests/integration/suite.ts +++ b/tests/integration/suite.ts @@ -917,9 +917,9 @@ export function createIntegrationTestsSuite( const actor = await apifyClient.actor(actorName).get(); expect(actor).toBeDefined(); const actorId = actor!.id as string; - + client = await createClientFn(); - + // Fetch by full Actor name const resultByName = await client.callTool({ name: 'fetch-actor-details', @@ -928,7 +928,7 @@ export function createIntegrationTestsSuite( expect(resultByName.content).toBeDefined(); const contentByName = resultByName.content as { text: string }[]; expect(contentByName[0].text).toContain(actorName); - + // Fetch by Actor ID only const resultById = await client.callTool({ name: 'fetch-actor-details', From a2bf3b5462883fcf7539f9672dcb0467c7d135dc Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 14:40:07 +0200 Subject: [PATCH 06/11] fix output response order so LLM does not lose the instructions --- src/utils/actor-response.ts | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/utils/actor-response.ts b/src/utils/actor-response.ts index 39f393e..4e9ffa5 100644 --- a/src/utils/actor-response.ts +++ b/src/utils/actor-response.ts @@ -32,11 +32,9 @@ Actor output data schema: ${JSON.stringify(displaySchema, null, 2)} \`\`\` -Below this text block is a preview of the Actor output containing ${result.previewItems.length} item(s). +Above this text block is a preview of the Actor output containing ${result.previewItems.length} item(s).${itemCount !== result.previewItems.length ? ` You have access only to a limited preview of the Actor output. Do not present this as the full output, as you have only ${result.previewItems.length} item(s) available instead of the full ${itemCount} item(s). Be aware of this and inform users about the currently loaded count and the total available output items count.` : ''} -If you need to retrieve additional data, use the "get-actor-output" tool with: - datasetId: "${datasetId}" -Be sure to limit the number of results when using the "get-actor-output" tool, since you never know how large the items may be and they might exceed the output limits. +If you need to retrieve additional data, use the "get-actor-output" tool with: datasetId: "${datasetId}". Be sure to limit the number of results when using the "get-actor-output" tool, since you never know how large the items may be, and they might exceed the output limits. `; const itemsPreviewText = result.previewItems.length > 0 @@ -45,8 +43,11 @@ Be sure to limit the number of results when using the "get-actor-output" tool, s // Build content array const content: ({ type: 'text'; text: string })[] = [ - { type: 'text', text: textContent }, { type: 'text', text: itemsPreviewText }, + /** + * The metadata and instructions text must be at the end otherwise the LLM does not acknowledge it. + */ + { type: 'text', text: textContent }, ]; return content; From 26c34ccb932e4675b56b59616eced0dd9b499df3 Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 15:16:50 +0200 Subject: [PATCH 07/11] refactor: unify string list parsing logic --- src/mcp/actors.ts | 3 ++- src/stdio.ts | 9 +++------ src/tools/dataset.ts | 7 ++++--- src/tools/get-actor-output.ts | 9 ++------- src/utils/generic.ts | 17 ++++++++++++++++ tests/unit/utils.generic.test.ts | 34 +++++++++++++++++++++++++++++++- 6 files changed, 61 insertions(+), 18 deletions(-) diff --git a/src/mcp/actors.ts b/src/mcp/actors.ts index 0185978..197d071 100644 --- a/src/mcp/actors.ts +++ b/src/mcp/actors.ts @@ -3,6 +3,7 @@ import type { ActorDefinition } from 'apify-client'; import { ApifyClient } from '../apify-client.js'; import { MCP_STREAMABLE_ENDPOINT } from '../const.js'; import type { ActorDefinitionPruned } from '../types.js'; +import { parseCommaSeparatedList } from '../utils/generic.js'; /** * Returns the MCP server path for the given Actor ID. @@ -13,7 +14,7 @@ export function getActorMCPServerPath(actorDefinition: ActorDefinition | ActorDe if ('webServerMcpPath' in actorDefinition && typeof actorDefinition.webServerMcpPath === 'string') { const webServerMcpPath = actorDefinition.webServerMcpPath.trim(); - const paths = webServerMcpPath.split(',').map((path) => path.trim()); + const paths = parseCommaSeparatedList(webServerMcpPath); // If there is only one path, return it directly if (paths.length === 1) { return paths[0]; diff --git a/src/stdio.ts b/src/stdio.ts index 39bb309..a288857 100644 --- a/src/stdio.ts +++ b/src/stdio.ts @@ -25,6 +25,7 @@ import log from '@apify/log'; import { processInput } from './input.js'; import { ActorsMcpServer } from './mcp/server.js'; import type { Input, ToolSelector } from './types.js'; +import { parseCommaSeparatedList } from './utils/generic.js'; import { loadToolsFromInput } from './utils/tools-loader.js'; // Keeping this interface here and not types.ts since @@ -86,13 +87,9 @@ For more details visit https://mcp.apify.com`, // Respect either the new flag or the deprecated one const enableAddingActors = Boolean(argv.enableAddingActors || argv.enableActorAutoLoading); // Split actors argument, trim whitespace, and filter out empty strings -const actorList = argv.actors !== undefined - ? argv.actors.split(',').map((a: string) => a.trim()).filter((a: string) => a.length > 0) - : undefined; +const actorList = argv.actors !== undefined ? parseCommaSeparatedList(argv.actors) : undefined; // Split tools argument, trim whitespace, and filter out empty strings -const toolCategoryKeys = argv.tools !== undefined - ? argv.tools.split(',').map((t: string) => t.trim()).filter((t: string) => t.length > 0) - : undefined; +const toolCategoryKeys = argv.tools !== undefined ? parseCommaSeparatedList(argv.tools) : undefined; // Propagate log.error to console.error for easier debugging const originalError = log.error.bind(log); diff --git a/src/tools/dataset.ts b/src/tools/dataset.ts index af513dc..48766fb 100644 --- a/src/tools/dataset.ts +++ b/src/tools/dataset.ts @@ -5,6 +5,7 @@ import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; import { ajv } from '../utils/ajv.js'; +import { parseCommaSeparatedList } from '../utils/generic.js'; import { generateSchemaFromItems } from '../utils/schema-generation.js'; const getDatasetArgs = z.object({ @@ -91,9 +92,9 @@ export const getDatasetItems: ToolEntry = { const client = new ApifyClient({ token: apifyToken }); // Convert comma-separated strings to arrays - const fields = parsed.fields?.split(',').map((f) => f.trim()); - const omit = parsed.omit?.split(',').map((f) => f.trim()); - const flatten = parsed.flatten?.split(',').map((f) => f.trim()); + const fields = parseCommaSeparatedList(parsed.fields); + const omit = parseCommaSeparatedList(parsed.omit); + const flatten = parseCommaSeparatedList(parsed.flatten); const v = await client.dataset(parsed.datasetId).listItems({ clean: parsed.clean, diff --git a/src/tools/get-actor-output.ts b/src/tools/get-actor-output.ts index c630d5d..cc7256e 100644 --- a/src/tools/get-actor-output.ts +++ b/src/tools/get-actor-output.ts @@ -5,7 +5,7 @@ import { ApifyClient } from '../apify-client.js'; import { HelperTools, TOOL_MAX_OUTPUT_CHARS } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; import { ajv } from '../utils/ajv.js'; -import { getValuesByDotKeys } from '../utils/generic.js'; +import { getValuesByDotKeys, parseCommaSeparatedList } from '../utils/generic.js'; /** * Zod schema for get-actor-output tool arguments @@ -78,12 +78,7 @@ You also can retrieve only specific fields from the output if needed. Use this t const client = new ApifyClient({ token: apifyToken }); // Parse fields into array - const fieldsArray = parsed.fields - ? parsed.fields - .split(',') - .map((field) => field.trim()) - .filter((field) => field.length > 0) - : []; + const fieldsArray = parseCommaSeparatedList(parsed.fields); // TODO: we can optimize the API level field filtering in future /** diff --git a/src/utils/generic.ts b/src/utils/generic.ts index 00a7f22..b7503fd 100644 --- a/src/utils/generic.ts +++ b/src/utils/generic.ts @@ -8,6 +8,23 @@ * const value = getValuesByDotKeys(obj, ['a.b.c', 'a.b.d', 'nested']); * value; // { 'a.b.c': 42, 'a.b.d': undefined, 'nested': { d: 100 } } */ +/** + * Parses a comma-separated string into an array of trimmed strings. + * Empty strings are filtered out after trimming. + * + * @param input - The comma-separated string to parse. If undefined, returns an empty array. + * @returns An array of trimmed, non-empty strings. + * @example + * parseCommaSeparatedList("a, b, c"); // ["a", "b", "c"] + * parseCommaSeparatedList("a, , b"); // ["a", "b"] + */ +export function parseCommaSeparatedList(input?: string): string[] { + if (!input) { + return []; + } + return input.split(',').map((s) => s.trim()).filter((s) => s.length > 0); +} + export function getValuesByDotKeys(obj: Record, keys: string[]): Record { const result: Record = {}; for (const key of keys) { diff --git a/tests/unit/utils.generic.test.ts b/tests/unit/utils.generic.test.ts index 18e53a4..8e0db60 100644 --- a/tests/unit/utils.generic.test.ts +++ b/tests/unit/utils.generic.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from 'vitest'; -import { getValuesByDotKeys } from '../../src/utils/generic.js'; +import { getValuesByDotKeys, parseCommaSeparatedList } from '../../src/utils/generic.js'; describe('getValuesByDotKeys', () => { it('should get value for a key without dot', () => { @@ -50,3 +50,35 @@ describe('getValuesByDotKeys', () => { expect(result).toEqual({ nested: { a: 1, b: 2 } }); }); }); + +describe('parseCommaSeparatedList', () => { + it('should parse comma-separated list with trimming', () => { + const result = parseCommaSeparatedList('field1, field2,field3 '); + expect(result).toEqual(['field1', 'field2', 'field3']); + }); + + it('should handle empty input', () => { + const result = parseCommaSeparatedList(); + expect(result).toEqual([]); + }); + + it('should handle empty string', () => { + const result = parseCommaSeparatedList(''); + expect(result).toEqual([]); + }); + + it('should filter empty strings', () => { + const result = parseCommaSeparatedList(' field1, , field2,,field3 '); + expect(result).toEqual(['field1', 'field2', 'field3']); + }); + + it('should handle only commas and spaces', () => { + const result = parseCommaSeparatedList(' , , '); + expect(result).toEqual([]); + }); + + it('should handle single item', () => { + const result = parseCommaSeparatedList(' single '); + expect(result).toEqual(['single']); + }); +}); From 3186c88184cb0216050feba4111048809ffa4250 Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 15:44:25 +0200 Subject: [PATCH 08/11] fix the tests - order of the Actor run response messages --- tests/integration/suite.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integration/suite.ts b/tests/integration/suite.ts index 38f9791..954bcec 100644 --- a/tests/integration/suite.ts +++ b/tests/integration/suite.ts @@ -52,7 +52,7 @@ async function callPythonExampleActor(client: Client, selectedToolName: string) type: 'text', }; // Parse the JSON to compare objects regardless of property order - const actual = content[content.length - 1]; + const actual = content[0]; expect(JSON.parse(actual.text)).toEqual(JSON.parse(expected.text)); expect(actual.type).toBe(expected.type); } @@ -344,7 +344,7 @@ export function createIntegrationTestsSuite( const content = result.content as { text: string }[]; - expect(content[content.length - 1]).toEqual( + expect(content[0]).toEqual( { text: JSON.stringify([{ first_number: 1, @@ -772,7 +772,7 @@ export function createIntegrationTestsSuite( expect(content.length).toBe(2); // Call step returns text summary with embedded schema // First content: text summary - const runText = content[0].text; + const runText = content[1].text; // Extract datasetId from the text const runIdMatch = runText.match(/Run ID: ([^\n]+)\n• Dataset ID: ([^\n]+)/); @@ -828,7 +828,7 @@ export function createIntegrationTestsSuite( expect(result.content).toBeDefined(); const content = result.content as { text: string; type: string }[]; expect(content.length).toBe(2); - const { text } = content[0]; + const { text } = content[1]; // Extract datasetId from the response text const runIdMatch = text.match(/Run ID: ([^\n]+)\n• Dataset ID: ([^\n]+)/); @@ -886,7 +886,7 @@ export function createIntegrationTestsSuite( expect(content.length).toBe(2); // Call step returns text summary with embedded schema // First content: text summary - const runText = content[0].text; + const runText = content[1].text; // Extract datasetId from the text const runIdMatch = runText.match(/Run ID: ([^\n]+)\n• Dataset ID: ([^\n]+)/); From 259831a327de1fa8c2b9520fc9f48dde93ba7ff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Kopeck=C3=BD?= Date: Thu, 4 Sep 2025 21:42:40 +0200 Subject: [PATCH 09/11] Update src/utils/schema-generation.ts Co-authored-by: Michal Kalita --- src/utils/schema-generation.ts | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/utils/schema-generation.ts b/src/utils/schema-generation.ts index ab6dc60..89a2eef 100644 --- a/src/utils/schema-generation.ts +++ b/src/utils/schema-generation.ts @@ -95,16 +95,15 @@ export function generateSchemaFromItems( arrays: { mode: arrayMode }, }) as JsonSchemaArray; return schema; - } catch { - // Fallback: try with simpler approach - try { - const fallbackSchema = toJsonSchema(processedItems, { - arrays: { mode: 'first' }, - }) as JsonSchemaArray; - return fallbackSchema; - } catch { - // If all attempts fail, return null - return null; - } - } + } catch { /* ignore */ } + + try { + const fallbackSchema = toJsonSchema(processedItems, { + arrays: { mode: 'first' }, + }) as JsonSchemaArray; + return fallbackSchema; + } catch { /* ignore */ } + + // If all attempts fail, return null + return null; } From 863dd47478eae840ddfe7418f13cc9e4b2bce7a1 Mon Sep 17 00:00:00 2001 From: MQ Date: Mon, 8 Sep 2025 10:43:53 +0200 Subject: [PATCH 10/11] address review comments --- src/const.ts | 3 --- src/utils/actor-response.ts | 15 +++++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/const.ts b/src/const.ts index d826688..d66e742 100644 --- a/src/const.ts +++ b/src/const.ts @@ -27,10 +27,8 @@ export const USER_AGENT_ORIGIN = 'Origin/mcp-server'; export enum HelperTools { ACTOR_ADD = 'add-actor', ACTOR_CALL = 'call-actor', - ACTOR_GET = 'get-actor', ACTOR_GET_DETAILS = 'fetch-actor-details', ACTOR_OUTPUT_GET = 'get-actor-output', - ACTOR_REMOVE = 'remove-actor', ACTOR_RUNS_ABORT = 'abort-actor-run', ACTOR_RUNS_GET = 'get-actor-run', ACTOR_RUNS_LOG = 'get-actor-log', @@ -43,7 +41,6 @@ export enum HelperTools { KEY_VALUE_STORE_GET = 'get-key-value-store', KEY_VALUE_STORE_KEYS_GET = 'get-key-value-store-keys', KEY_VALUE_STORE_RECORD_GET = 'get-key-value-store-record', - APIFY_MCP_HELP_TOOL = 'apify-actor-help-tool', STORE_SEARCH = 'search-actors', DOCS_SEARCH = 'search-apify-docs', DOCS_FETCH = 'fetch-apify-docs', diff --git a/src/utils/actor-response.ts b/src/utils/actor-response.ts index 4e9ffa5..5dc77b0 100644 --- a/src/utils/actor-response.ts +++ b/src/utils/actor-response.ts @@ -1,7 +1,16 @@ import type { CallActorGetDatasetResult } from '../tools/actor'; /** - * Builds the response content for actor tool calls. + * Builds the response content for Actor tool calls. + * Includes Actor run metadata, output schema, and a preview of output items. + * + * The response starts with a preview of Actor output items, if available. + * This must come first. Metadata and instructions for the LLM are provided last. + * The LLM may ignore metadata and instructions if it is not at the end of the response. + * + * If the preview is limited and does not show all items, the response informs the LLM. + * This is important because the LLM may assume it has all data and hallucinate missing items. + * * @param actorName - The name of the actor. * @param result - The result from callActorGetDataset. * @returns The content array for the tool response. @@ -42,13 +51,11 @@ If you need to retrieve additional data, use the "get-actor-output" tool with: d : `No items available for preview—either the Actor did not return any items or they are too large for preview. In this case, use the "get-actor-output" tool.`; // Build content array - const content: ({ type: 'text'; text: string })[] = [ + return [ { type: 'text', text: itemsPreviewText }, /** * The metadata and instructions text must be at the end otherwise the LLM does not acknowledge it. */ { type: 'text', text: textContent }, ]; - - return content; } From ea8753a331bd8943c58d4be84751cc5ba59c487d Mon Sep 17 00:00:00 2001 From: MQ Date: Tue, 9 Sep 2025 13:37:22 +0200 Subject: [PATCH 11/11] add get-actor-output tools note about when its loaded --- src/tools/get-actor-output.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tools/get-actor-output.ts b/src/tools/get-actor-output.ts index cc7256e..488d30e 100644 --- a/src/tools/get-actor-output.ts +++ b/src/tools/get-actor-output.ts @@ -69,7 +69,9 @@ export const getActorOutput: ToolEntry = { name: HelperTools.ACTOR_OUTPUT_GET, actorFullName: HelperTools.ACTOR_OUTPUT_GET, description: `Retrieves the output of a specific Actor execution based on its dataset ID. -You also can retrieve only specific fields from the output if needed. Use this tool to get Actor output data outside of the Actor dataset output preview, or to access fields from the Actor output dataset schema that are not included in the preview.`, +You can also retrieve only specific fields from the output if needed. Use this tool to get Actor output data outside of the Actor dataset output preview, or to access fields from the Actor output dataset schema that are not included in the preview. + +Note: This tool is automatically included if the Apify MCP Server is configured with any Actor tools (e.g. \`apify-slash-rag-web-browser\`) or tools that can interact with Actors (e.g. \`call-actor\`, \`add-actor\`).`, inputSchema: zodToJsonSchema(getActorOutputArgs), ajvValidate: ajv.compile(zodToJsonSchema(getActorOutputArgs)), call: async (toolArgs) => {