Skip to content

Commit dd7a924

Browse files
MQ37MichalKalita
andauthored
feat: Limit tools to discovery, dynamic Actor management, and help; simplify Actor input schema; return all dataset items at once with only relevant fields in outputs (#158)
* Disable internal tools except actor details, help tool, search actors. Improve help tool, search actors and get actor details output. * Return only relevant output dataset properies if defined in storage views defiintion, if dump all properties. Fix input schema for Actors without any input schema. * WIP filter out non integer input props that are not required * minor tools description and param improvements * Update tests/integration/suite.ts Co-authored-by: Michal Kalita <[email protected]> * split long one liner --------- Co-authored-by: Michal Kalita <[email protected]>
1 parent b2c31b6 commit dd7a924

File tree

17 files changed

+668
-190
lines changed

17 files changed

+668
-190
lines changed

src/const.ts

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,7 @@ export const ACTOR_OUTPUT_MAX_CHARS_PER_ITEM = 5_000;
4646
export const ACTOR_OUTPUT_TRUNCATED_MESSAGE = `Output was truncated because it will not fit into context.`
4747
+ `There is no reason to call this tool again! You can use ${HelperTools.DATASET_GET_ITEMS} tool to get more items from the dataset.`;
4848

49-
export const ACTOR_ADDITIONAL_INSTRUCTIONS = `Never call/execute tool/Actor unless confirmed by the user.
50-
Workflow: When an Actor runs, it processes data and stores results in Apify storage,
51-
Datasets (for structured/tabular data) and Key-Value Store (for various data types like JSON, images, HTML).
52-
Each Actor run produces a dataset ID and key-value store ID for accessing the results.
53-
By default, the number of items returned from an Actor run is limited to ${ACTOR_RUN_DATASET_OUTPUT_MAX_ITEMS}.
54-
You can always use ${HelperTools.DATASET_GET_ITEMS} tool to get more items from the dataset.
55-
Actor run input is always stored in the key-value store, recordKey: INPUT.`;
49+
export const ACTOR_ADDITIONAL_INSTRUCTIONS = 'Never call/execute tool/Actor unless confirmed by the user.';
5650

5751
export const ACTOR_CACHE_MAX_SIZE = 500;
5852
export const ACTOR_CACHE_TTL_SECS = 30 * 60; // 30 minutes

src/main.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,10 @@ if (STANDBY_MODE) {
5656
await Actor.fail('If you need to debug a specific Actor, please provide the debugActor and debugActorInput fields in the input');
5757
}
5858
const options = { memory: input.maxActorMemoryBytes } as ActorCallOptions;
59-
const { datasetInfo, items } = await callActorGetDataset(input.debugActor!, input.debugActorInput!, process.env.APIFY_TOKEN, options);
59+
const { items } = await callActorGetDataset(input.debugActor!, input.debugActorInput!, process.env.APIFY_TOKEN, options);
6060

6161
await Actor.pushData(items);
62-
log.info(`Pushed ${datasetInfo?.itemCount} items to the dataset`);
62+
log.info(`Pushed ${items.count} items to the dataset`);
6363
await Actor.exit();
6464
}
6565

src/mcp/server.ts

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ import { type ActorCallOptions, ApifyApiError } from 'apify-client';
1919
import log from '@apify/log';
2020

2121
import {
22-
ACTOR_OUTPUT_MAX_CHARS_PER_ITEM,
23-
ACTOR_OUTPUT_TRUNCATED_MESSAGE,
2422
defaults,
2523
SERVER_NAME,
2624
SERVER_VERSION,
@@ -468,25 +466,20 @@ export class ActorsMcpServer {
468466
const actorTool = tool.tool as ActorTool;
469467

470468
const callOptions: ActorCallOptions = { memory: actorTool.memoryMbytes };
471-
const { actorRun, datasetInfo, items } = await callActorGetDataset(
469+
const { items } = await callActorGetDataset(
472470
actorTool.actorFullName,
473471
args,
474472
apifyToken as string,
475473
callOptions,
476474
);
477-
const content = [
478-
{ type: 'text', text: `Actor finished with run information: ${JSON.stringify(actorRun)}` },
479-
{ type: 'text', text: `Dataset information: ${JSON.stringify(datasetInfo)}` },
480-
];
481-
482-
const itemContents = items.items.map((item: Record<string, unknown>) => {
483-
const text = JSON.stringify(item).slice(0, ACTOR_OUTPUT_MAX_CHARS_PER_ITEM);
484-
return text.length === ACTOR_OUTPUT_MAX_CHARS_PER_ITEM
485-
? { type: 'text', text: `${text} ... ${ACTOR_OUTPUT_TRUNCATED_MESSAGE}` }
486-
: { type: 'text', text };
487-
});
488-
content.push(...itemContents);
489-
return { content };
475+
return {
476+
content: items.items.map((item: Record<string, unknown>) => {
477+
return {
478+
type: 'text',
479+
text: JSON.stringify(item),
480+
};
481+
}),
482+
};
490483
}
491484
} catch (error) {
492485
if (error instanceof ApifyApiError) {

src/tools/actor.ts

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import type { Client } from '@modelcontextprotocol/sdk/client/index.js';
22
import { Ajv } from 'ajv';
3-
import type { ActorCallOptions, ActorRun, Dataset, PaginatedList } from 'apify-client';
3+
import type { ActorCallOptions, ActorRun, PaginatedList } from 'apify-client';
44
import { z } from 'zod';
55
import zodToJsonSchema from 'zod-to-json-schema';
66

@@ -10,14 +10,15 @@ import { ApifyClient } from '../apify-client.js';
1010
import {
1111
ACTOR_ADDITIONAL_INSTRUCTIONS,
1212
ACTOR_MAX_MEMORY_MBYTES,
13-
ACTOR_RUN_DATASET_OUTPUT_MAX_ITEMS,
1413
HelperTools,
1514
} from '../const.js';
1615
import { getActorMCPServerPath, getActorMCPServerURL } from '../mcp/actors.js';
1716
import { connectMCPClient } from '../mcp/client.js';
1817
import { getMCPServerTools } from '../mcp/proxy.js';
1918
import { actorDefinitionPrunedCache } from '../state.js';
20-
import type { ActorInfo, InternalTool, ToolEntry } from '../types.js';
19+
import type { ActorDefinitionStorage, ActorInfo, InternalTool, ToolEntry } from '../types.js';
20+
import { getActorDefinitionStorageFieldNames } from '../utils/actor.js';
21+
import { getValuesByDotKeys } from '../utils/generic.js';
2122
import { getActorDefinition } from './build.js';
2223
import {
2324
actorNameToToolName,
@@ -34,8 +35,6 @@ const ajv = new Ajv({ coerceTypes: 'array', strict: false });
3435

3536
// Define a named return type for callActorGetDataset
3637
export type CallActorGetDatasetResult = {
37-
actorRun: ActorRun;
38-
datasetInfo: Dataset | undefined;
3938
items: PaginatedList<Record<string, unknown>>;
4039
};
4140

@@ -50,7 +49,6 @@ export type CallActorGetDatasetResult = {
5049
* @param {ActorCallOptions} callOptions - The options to pass to the actor.
5150
* @param {unknown} input - The input to pass to the actor.
5251
* @param {string} apifyToken - The Apify token to use for authentication.
53-
* @param {number} limit - The maximum number of items to retrieve from the dataset.
5452
* @returns {Promise<{ actorRun: any, items: object[] }>} - A promise that resolves to an object containing the actor run and dataset items.
5553
* @throws {Error} - Throws an error if the `APIFY_TOKEN` is not set
5654
*/
@@ -59,7 +57,6 @@ export async function callActorGetDataset(
5957
input: unknown,
6058
apifyToken: string,
6159
callOptions: ActorCallOptions | undefined = undefined,
62-
limit = ACTOR_RUN_DATASET_OUTPUT_MAX_ITEMS,
6360
): Promise<CallActorGetDatasetResult> {
6461
try {
6562
log.info(`Calling Actor ${actorName} with input: ${JSON.stringify(input)}`);
@@ -69,13 +66,24 @@ export async function callActorGetDataset(
6966

7067
const actorRun: ActorRun = await actorClient.call(input, callOptions);
7168
const dataset = client.dataset(actorRun.defaultDatasetId);
72-
const [datasetInfo, items] = await Promise.all([
73-
dataset.get(),
74-
dataset.listItems({ limit }),
69+
// const dataset = client.dataset('Ehtn0Y4wIKviFT2WB');
70+
const [items, defaultBuild] = await Promise.all([
71+
dataset.listItems(),
72+
(await actorClient.defaultBuild()).get(),
7573
]);
76-
log.info(`Actor ${actorName} finished with ${datasetInfo?.itemCount} items`);
7774

78-
return { actorRun, datasetInfo, items };
75+
// Get important properties from storage view definitions and if available return only those properties
76+
const storageDefinition = defaultBuild?.actorDefinition?.storages?.dataset as ActorDefinitionStorage | undefined;
77+
const importantProperties = getActorDefinitionStorageFieldNames(storageDefinition || {});
78+
if (importantProperties.length > 0) {
79+
items.items = items.items.map((item) => {
80+
return getValuesByDotKeys(item, importantProperties);
81+
});
82+
}
83+
84+
log.info(`Actor ${actorName} finished with ${items.count} items`);
85+
86+
return { items };
7987
} catch (error) {
8088
log.error(`Error calling actor: ${error}. Actor: ${actorName}, input: ${JSON.stringify(input)}`);
8189
throw new Error(`Error calling Actor: ${error}`);
@@ -115,6 +123,18 @@ export async function getNormalActorsAsTools(
115123
if (actorDefinitionPruned) {
116124
const schemaID = getToolSchemaID(actorDefinitionPruned.actorFullName);
117125
if (actorDefinitionPruned.input && 'properties' in actorDefinitionPruned.input && actorDefinitionPruned.input) {
126+
// Filter non-required properties except integers if `required` is defined in the input schema and not empty.
127+
const { required } = actorDefinitionPruned.input;
128+
if (Array.isArray(required) && required.length > 0) {
129+
actorDefinitionPruned.input.properties = Object.fromEntries(
130+
Object.entries(actorDefinitionPruned.input.properties)
131+
// Keep all integer properties, as these include
132+
// properties related to output item counts that users
133+
// might want to change if they need more results than the default limit.
134+
.filter(([key, value]) => required.includes(key) || value.type === 'integer'),
135+
);
136+
}
137+
118138
actorDefinitionPruned.input.properties = markInputPropertiesAsRequired(actorDefinitionPruned.input);
119139
actorDefinitionPruned.input.properties = buildNestedProperties(actorDefinitionPruned.input.properties);
120140
actorDefinitionPruned.input.properties = filterSchemaProperties(actorDefinitionPruned.input.properties);
@@ -132,7 +152,13 @@ export async function getNormalActorsAsTools(
132152
name: actorNameToToolName(actorDefinitionPruned.actorFullName),
133153
actorFullName: actorDefinitionPruned.actorFullName,
134154
description: `${actorDefinitionPruned.description} Instructions: ${ACTOR_ADDITIONAL_INSTRUCTIONS}`,
135-
inputSchema: actorDefinitionPruned.input || {},
155+
inputSchema: actorDefinitionPruned.input
156+
// So Actor without input schema works - MCP client expects JSON schema valid output
157+
|| {
158+
type: 'object',
159+
properties: {},
160+
required: [],
161+
},
136162
ajvValidate: fixedAjvCompile(ajv, actorDefinitionPruned.input || {}),
137163
memoryMbytes: memoryMbytes > ACTOR_MAX_MEMORY_MBYTES ? ACTOR_MAX_MEMORY_MBYTES : memoryMbytes,
138164
},

src/tools/get-actor-details.ts

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import { z } from 'zod';
2+
import zodToJsonSchema from 'zod-to-json-schema';
3+
4+
import { ApifyClient } from '../apify-client.js';
5+
import { HelperTools } from '../const.js';
6+
import type { ExtendedPricingInfo, IActorInputSchema, InternalTool, ToolEntry } from '../types.js';
7+
import { ajv } from '../utils/ajv.js';
8+
import { getCurrentPricingInfo, pricingInfoToString } from '../utils/pricing-info.js';
9+
import { filterSchemaProperties, shortenProperties } from './utils.js';
10+
11+
const getActorDetailsToolArgsSchema = z.object({
12+
actor: z.string()
13+
.min(1)
14+
.describe(`Actor ID or full name in the format "username/name", e.g., "apify/rag-web-browser".`),
15+
});
16+
17+
interface IGetActorDetailsToolResult {
18+
id: string;
19+
actorFullName: string;
20+
21+
isPublic: boolean;
22+
isDeprecated: boolean;
23+
createdAt: string;
24+
modifiedAt: string;
25+
26+
categories?: string[];
27+
description: string;
28+
readme: string;
29+
30+
inputSchema: IActorInputSchema;
31+
32+
pricingInfo: string; // We convert the pricing info into a string representation
33+
34+
usageStatistics: {
35+
totalUsers: {
36+
allTime: number;
37+
last7Days: number;
38+
last30Days: number;
39+
last90Days: number;
40+
};
41+
failedRunsInLast30Days: number | string; // string for 'unknown' case
42+
}
43+
}
44+
45+
export const getActorDetailsTool: ToolEntry = {
46+
type: 'internal',
47+
tool: {
48+
name: HelperTools.ACTOR_GET_DETAILS,
49+
description: `Retrieve information about an Actor by its ID or full name.
50+
The Actor name is always composed of "username/name", for example, "apify/rag-web-browser".
51+
This tool returns information about the Actor, including whether it is public or deprecated, when it was created or modified, the categories in which the Actor is listed, a description, a README (the Actor's documentation), the input schema, and usage statistics—such as how many users are using it and the number of failed runs of the Actor.
52+
For example, use this tool when a user wants to know more about a specific Actor or wants to use optional or advanced parameters of the Actor that are not listed in the default Actor tool input schema - so you know the details and how to pass them.`,
53+
inputSchema: zodToJsonSchema(getActorDetailsToolArgsSchema),
54+
ajvValidate: ajv.compile(zodToJsonSchema(getActorDetailsToolArgsSchema)),
55+
call: async (toolArgs) => {
56+
const { args, apifyToken } = toolArgs;
57+
58+
const parsed = getActorDetailsToolArgsSchema.parse(args);
59+
const client = new ApifyClient({ token: apifyToken });
60+
61+
const [actorInfo, buildInfo] = await Promise.all([
62+
client.actor(parsed.actor).get(),
63+
client.actor(parsed.actor).defaultBuild().then(async (build) => build.get()),
64+
]);
65+
66+
if (!actorInfo || !buildInfo || !buildInfo.actorDefinition) {
67+
return {
68+
content: [{ type: 'text', text: `Actor information for '${parsed.actor}' was not found. Please check the Actor ID or name and ensure the Actor exists.` }],
69+
};
70+
}
71+
72+
const inputSchema = (buildInfo.actorDefinition.input || {
73+
type: 'object',
74+
properties: {},
75+
}) as IActorInputSchema;
76+
inputSchema.properties = filterSchemaProperties(inputSchema.properties);
77+
inputSchema.properties = shortenProperties(inputSchema.properties);
78+
79+
const currentPricingInfo = getCurrentPricingInfo(actorInfo.pricingInfos || [], new Date());
80+
81+
const result: IGetActorDetailsToolResult = {
82+
id: actorInfo.id,
83+
actorFullName: `${actorInfo.username}/${actorInfo.name}`,
84+
85+
isPublic: actorInfo.isPublic,
86+
isDeprecated: actorInfo.isDeprecated || false,
87+
createdAt: actorInfo.createdAt.toISOString(),
88+
modifiedAt: actorInfo.modifiedAt.toISOString(),
89+
90+
categories: actorInfo.categories,
91+
description: actorInfo.description || 'No description provided.',
92+
readme: buildInfo.actorDefinition.readme || 'No README provided.',
93+
94+
inputSchema,
95+
96+
pricingInfo: pricingInfoToString(currentPricingInfo as (ExtendedPricingInfo | null)),
97+
98+
usageStatistics: {
99+
totalUsers: {
100+
allTime: actorInfo.stats.totalUsers,
101+
last7Days: actorInfo.stats.totalUsers7Days,
102+
last30Days: actorInfo.stats.totalUsers30Days,
103+
last90Days: actorInfo.stats.totalUsers90Days,
104+
},
105+
failedRunsInLast30Days: (
106+
'publicActorRunStats30Days' in actorInfo.stats && 'FAILED' in (actorInfo.stats.publicActorRunStats30Days as object)
107+
) ? (actorInfo.stats.publicActorRunStats30Days as { FAILED: number }).FAILED : 'unknown',
108+
},
109+
};
110+
return {
111+
content: [{
112+
type: 'text',
113+
text: JSON.stringify(result),
114+
}],
115+
};
116+
},
117+
} as InternalTool,
118+
};

0 commit comments

Comments
 (0)