Skip to content

Commit ad7b772

Browse files
committed
feat: improve actor tool output
1 parent ff565f7 commit ad7b772

26 files changed

+684
-252
lines changed

src/const.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@ export const ACTOR_RUN_DATASET_OUTPUT_MAX_ITEMS = 5;
88
// Actor run const
99
export const ACTOR_MAX_MEMORY_MBYTES = 4_096; // If the Actor requires 8GB of memory, free users can't run actors-mcp-server and requested Actor
1010

11+
// Tool output
12+
/**
13+
* Usual tool output limit is 25k tokens, let's use 20k
14+
* just in case where 1 token =~ 4 characters thus 80k chars.
15+
* This is primarily used for Actor tool call output, but we can then
16+
* reuse this in other tools as well.
17+
*/
18+
export const TOOL_MAX_OUTPUT_CHARS = 80000;
19+
1120
// MCP Server
1221
export const SERVER_NAME = 'apify-mcp-server';
1322
export const SERVER_VERSION = '1.0.0';
@@ -20,6 +29,7 @@ export enum HelperTools {
2029
ACTOR_CALL = 'call-actor',
2130
ACTOR_GET = 'get-actor',
2231
ACTOR_GET_DETAILS = 'fetch-actor-details',
32+
ACTOR_OUTPUT_GET = 'get-actor-output',
2333
ACTOR_REMOVE = 'remove-actor',
2434
ACTOR_RUNS_ABORT = 'abort-actor-run',
2535
ACTOR_RUNS_GET = 'get-actor-run',
@@ -54,12 +64,12 @@ export const APIFY_DOCS_CACHE_MAX_SIZE = 500;
5464
export const APIFY_DOCS_CACHE_TTL_SECS = 60 * 60; // 1 hour
5565

5666
export const ACTOR_PRICING_MODEL = {
57-
/** Rental actors */
67+
/** Rental Actors */
5868
FLAT_PRICE_PER_MONTH: 'FLAT_PRICE_PER_MONTH',
5969
FREE: 'FREE',
60-
/** Pay per result (PPR) actors */
70+
/** Pay per result (PPR) Actors */
6171
PRICE_PER_DATASET_ITEM: 'PRICE_PER_DATASET_ITEM',
62-
/** Pay per event (PPE) actors */
72+
/** Pay per event (PPE) Actors */
6373
PAY_PER_EVENT: 'PAY_PER_EVENT',
6474
} as const;
6575

src/main.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,10 @@ if (STANDBY_MODE) {
4444
await Actor.fail('If you need to debug a specific Actor, please provide the debugActor and debugActorInput fields in the input');
4545
}
4646
const options = { memory: input.maxActorMemoryBytes } as ActorCallOptions;
47-
const { items } = await callActorGetDataset(input.debugActor!, input.debugActorInput!, process.env.APIFY_TOKEN, options);
47+
const { previewItems } = await callActorGetDataset(input.debugActor!, input.debugActorInput!, process.env.APIFY_TOKEN, options);
4848

49-
await Actor.pushData(items);
50-
log.info('Pushed items to dataset', { itemCount: items.count });
49+
await Actor.pushData(previewItems);
50+
log.info('Pushed items to dataset', { itemCount: previewItems.length });
5151
await Actor.exit();
5252
}
5353

src/mcp/proxy.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import type { Client } from '@modelcontextprotocol/sdk/client/index.js';
2-
import Ajv from 'ajv';
32

43
import { fixedAjvCompile } from '../tools/utils.js';
54
import type { ActorMcpTool, ToolEntry } from '../types.js';
5+
import { ajv } from '../utils/ajv.js';
66
import { getMCPServerID, getProxyMCPServerToolName } from './utils.js';
77

88
export async function getMCPServerTools(
@@ -14,8 +14,6 @@ export async function getMCPServerTools(
1414
const res = await client.listTools();
1515
const { tools } = res;
1616

17-
const ajv = new Ajv({ coerceTypes: 'array', strict: false });
18-
1917
const compiledTools: ToolEntry[] = [];
2018
for (const tool of tools) {
2119
const mcpTool: ActorMcpTool = {

src/mcp/server.ts

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import { prompts } from '../prompts/index.js';
3131
import { callActorGetDataset, defaultTools, getActorsAsTools, toolCategories } from '../tools/index.js';
3232
import { decodeDotPropertyNames } from '../tools/utils.js';
3333
import type { ActorMcpTool, ActorTool, HelperTool, ToolEntry } from '../types.js';
34+
import { buildActorResponseContent } from '../utils/actor-response.js';
3435
import { createProgressTracker } from '../utils/progress.js';
3536
import { getToolPublicFieldOnly } from '../utils/tools.js';
3637
import { connectMCPClient } from './client.js';
@@ -524,21 +525,15 @@ export class ActorsMcpServer {
524525

525526
try {
526527
log.info('Calling Actor', { actorName: actorTool.actorFullName, input: args });
527-
const { runId, datasetId, items } = await callActorGetDataset(
528+
const callResult = await callActorGetDataset(
528529
actorTool.actorFullName,
529530
args,
530531
apifyToken as string,
531532
callOptions,
532533
progressTracker,
533534
);
534-
const content = [
535-
{ type: 'text', text: `Actor finished with runId: ${runId}, datasetId ${datasetId}` },
536-
];
537535

538-
const itemContents = items.items.map((item: Record<string, unknown>) => {
539-
return { type: 'text', text: JSON.stringify(item) };
540-
});
541-
content.push(...itemContents);
536+
const content = buildActorResponseContent(actorTool.actorFullName, callResult);
542537
return { content };
543538
} finally {
544539
if (progressTracker) {

src/tools/actor.ts

Lines changed: 46 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import type { Client } from '@modelcontextprotocol/sdk/client/index.js';
2-
import { Ajv } from 'ajv';
3-
import type { ActorCallOptions, ActorRun, PaginatedList } from 'apify-client';
2+
import type { ActorCallOptions, ActorRun } from 'apify-client';
43
import { z } from 'zod';
54
import zodToJsonSchema from 'zod-to-json-schema';
65

@@ -11,41 +10,47 @@ import {
1110
ACTOR_ADDITIONAL_INSTRUCTIONS,
1211
ACTOR_MAX_MEMORY_MBYTES,
1312
HelperTools,
13+
TOOL_MAX_OUTPUT_CHARS,
1414
} from '../const.js';
1515
import { getActorMCPServerPath, getActorMCPServerURL } from '../mcp/actors.js';
1616
import { connectMCPClient } from '../mcp/client.js';
1717
import { getMCPServerTools } from '../mcp/proxy.js';
1818
import { actorDefinitionPrunedCache } from '../state.js';
19-
import type { ActorDefinitionStorage, ActorInfo, ToolEntry } from '../types.js';
20-
import { getActorDefinitionStorageFieldNames } from '../utils/actor.js';
19+
import type { ActorDefinitionStorage, ActorInfo, DatasetItem, ToolEntry } from '../types.js';
20+
import { ensureOutputWithinCharLimit, getActorDefinitionStorageFieldNames } from '../utils/actor.js';
2121
import { fetchActorDetails } from '../utils/actor-details.js';
22-
import { getValuesByDotKeys } from '../utils/generic.js';
22+
import { buildActorResponseContent } from '../utils/actor-response.js';
23+
import { ajv } from '../utils/ajv.js';
2324
import type { ProgressTracker } from '../utils/progress.js';
25+
import type { JsonSchemaProperty } from '../utils/schema-generation.js';
26+
import { generateSchemaFromItems } from '../utils/schema-generation.js';
2427
import { getActorDefinition } from './build.js';
2528
import { actorNameToToolName, fixedAjvCompile, getToolSchemaID, transformActorInputSchemaProperties } from './utils.js';
2629

27-
const ajv = new Ajv({ coerceTypes: 'array', strict: false });
28-
2930
// Define a named return type for callActorGetDataset
3031
export type CallActorGetDatasetResult = {
3132
runId: string;
3233
datasetId: string;
33-
items: PaginatedList<Record<string, unknown>>;
34+
itemCount: number;
35+
schema: JsonSchemaProperty;
36+
previewItems: DatasetItem[];
3437
};
3538

3639
/**
37-
* Calls an Apify actor and retrieves the dataset items.
40+
* Calls an Apify Actor and retrieves metadata about the dataset results.
3841
*
42+
* This function executes an Actor and returns summary information instead with a result items preview of the full dataset
43+
* to prevent overwhelming responses. The actual data can be retrieved using the get-actor-output tool.
3944
*
4045
* It requires the `APIFY_TOKEN` environment variable to be set.
4146
* If the `APIFY_IS_AT_HOME` the dataset items are pushed to the Apify dataset.
4247
*
43-
* @param {string} actorName - The name of the actor to call.
44-
* @param {ActorCallOptions} callOptions - The options to pass to the actor.
48+
* @param {string} actorName - The name of the Actor to call.
49+
* @param {ActorCallOptions} callOptions - The options to pass to the Actor.
4550
* @param {unknown} input - The input to pass to the actor.
4651
* @param {string} apifyToken - The Apify token to use for authentication.
4752
* @param {ProgressTracker} progressTracker - Optional progress tracker for real-time updates.
48-
* @returns {Promise<{ actorRun: any, items: object[] }>} - A promise that resolves to an object containing the actor run and dataset items.
53+
* @returns {Promise<CallActorGetDatasetResult>} - A promise that resolves to metadata about the Actor run and dataset.
4954
* @throws {Error} - Throws an error if the `APIFY_TOKEN` is not set
5055
*/
5156
export async function callActorGetDataset(
@@ -59,7 +64,7 @@ export async function callActorGetDataset(
5964
const client = new ApifyClient({ token: apifyToken });
6065
const actorClient = client.actor(actorName);
6166

62-
// Start the actor run but don't wait for completion
67+
// Start the Actor run but don't wait for completion
6368
const actorRun: ActorRun = await actorClient.start(input, callOptions);
6469

6570
// Start progress tracking if tracker is provided
@@ -71,34 +76,45 @@ export async function callActorGetDataset(
7176
const completedRun = await client.run(actorRun.id).waitForFinish();
7277

7378
const dataset = client.dataset(completedRun.defaultDatasetId);
74-
const [items, defaultBuild] = await Promise.all([
79+
const [datasetItems, defaultBuild] = await Promise.all([
7580
dataset.listItems(),
7681
(await actorClient.defaultBuild()).get(),
7782
]);
7883

79-
// Get important properties from storage view definitions and if available return only those properties
84+
// Generate schema using the shared utility
85+
const generatedSchema = generateSchemaFromItems(datasetItems.items, {
86+
clean: true,
87+
arrayMode: 'all',
88+
});
89+
const schema = generatedSchema || { type: 'object', properties: {} };
90+
91+
/**
92+
* Get important fields that are using in any dataset view as they MAY be used in filtering to ensure the output fits
93+
* the tool output limits. Client has to use the get-actor-output tool to retrieve the full dataset or filtered out fields.
94+
*/
8095
const storageDefinition = defaultBuild?.actorDefinition?.storages?.dataset as ActorDefinitionStorage | undefined;
8196
const importantProperties = getActorDefinitionStorageFieldNames(storageDefinition || {});
82-
if (importantProperties.length > 0) {
83-
items.items = items.items.map((item) => {
84-
return getValuesByDotKeys(item, importantProperties);
85-
});
86-
}
87-
88-
log.debug('Actor finished', { actorName, itemCount: items.count });
89-
return { runId: actorRun.id, datasetId: completedRun.defaultDatasetId, items };
97+
const previewItems = ensureOutputWithinCharLimit(datasetItems.items, importantProperties, TOOL_MAX_OUTPUT_CHARS);
98+
99+
return {
100+
runId: actorRun.id,
101+
datasetId: completedRun.defaultDatasetId,
102+
itemCount: datasetItems.count,
103+
schema,
104+
previewItems,
105+
};
90106
} catch (error) {
91-
log.error('Error calling actor', { error, actorName, input });
107+
log.error('Error calling Actor', { error, actorName, input });
92108
throw new Error(`Error calling Actor: ${error}`);
93109
}
94110
}
95111

96112
/**
97113
* This function is used to fetch normal non-MCP server Actors as a tool.
98114
*
99-
* Fetches actor input schemas by Actor IDs or Actor full names and creates MCP tools.
115+
* Fetches Actor input schemas by Actor IDs or Actor full names and creates MCP tools.
100116
*
101-
* This function retrieves the input schemas for the specified actors and compiles them into MCP tools.
117+
* This function retrieves the input schemas for the specified Actors and compiles them into MCP tools.
102118
* It uses the AJV library to validate the input schemas.
103119
*
104120
* Tool name can't contain /, so it is replaced with _
@@ -201,7 +217,7 @@ export async function getActorsAsTools(
201217
actorIdsOrNames: string[],
202218
apifyToken: string,
203219
): Promise<ToolEntry[]> {
204-
log.debug('Fetching actors as tools', { actorNames: actorIdsOrNames });
220+
log.debug('Fetching Actors as tools', { actorNames: actorIdsOrNames });
205221

206222
const actorsInfo: (ActorInfo | null)[] = await Promise.all(
207223
actorIdsOrNames.map(async (actorIdOrName) => {
@@ -298,7 +314,7 @@ The step parameter enforces this workflow - you cannot call an Actor without fir
298314

299315
try {
300316
if (step === 'info') {
301-
// Step 1: Return actor card and schema directly
317+
// Step 1: Return Actor card and schema directly
302318
const details = await fetchActorDetails(apifyToken, actorName);
303319
if (!details) {
304320
return {
@@ -342,23 +358,15 @@ The step parameter enforces this workflow - you cannot call an Actor without fir
342358
}
343359
}
344360

345-
const { runId, datasetId, items } = await callActorGetDataset(
361+
const callResult = await callActorGetDataset(
346362
actorName,
347363
input,
348364
apifyToken,
349365
callOptions,
350366
progressTracker,
351367
);
352368

353-
const content = [
354-
{ type: 'text', text: `Actor finished with runId: ${runId}, datasetId ${datasetId}` },
355-
];
356-
357-
const itemContents = items.items.map((item: Record<string, unknown>) => ({
358-
type: 'text',
359-
text: JSON.stringify(item),
360-
}));
361-
content.push(...itemContents);
369+
const content = buildActorResponseContent(actorName, callResult);
362370

363371
return { content };
364372
} catch (error) {

src/tools/build.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import { Ajv } from 'ajv';
21
import { z } from 'zod';
32
import zodToJsonSchema from 'zod-to-json-schema';
43

@@ -13,10 +12,9 @@ import type {
1312
ISchemaProperties,
1413
ToolEntry,
1514
} from '../types.js';
15+
import { ajv } from '../utils/ajv.js';
1616
import { filterSchemaProperties, shortenProperties } from './utils.js';
1717

18-
const ajv = new Ajv({ coerceTypes: 'array', strict: false });
19-
2018
/**
2119
* Get Actor input schema by Actor name.
2220
* First, fetch the Actor details to get the default build tag and buildId.

0 commit comments

Comments
 (0)