Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"dotenv": "^16.4.7",
"eslint": "^9.19.0",
"eventsource": "^3.0.2",
"tsx": "^4.6.2",
"tsx": "^4.20.5",
"typescript": "^5.3.3",
"typescript-eslint": "^8.23.0",
"vitest": "^3.0.8"
Expand Down
7 changes: 4 additions & 3 deletions src/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,16 @@ export enum HelperTools {
GET_HTML_SKELETON = 'get-html-skeleton',
}

export const ACTOR_RAG_WEB_BROWSER = 'apify/rag-web-browser';
export const RAG_WEB_BROWSER = 'apify/rag-web-browser';
export const RAG_WEB_BROWSER_WHITELISTED_FIELDS = ['query', 'maxResults', 'outputFormats'];
export const RAG_WEB_BROWSER_ADDITIONAL_DESC = `This tool provides general web browsing functionality, for specific sites like e-commerce, social media it is always better to search for a specific Actor`;

export const defaults = {
actors: [
ACTOR_RAG_WEB_BROWSER,
RAG_WEB_BROWSER,
],
};

export const ACTOR_ADDITIONAL_INSTRUCTIONS = 'Never call/execute tool/Actor unless confirmed by the user.';
export const SKYFIRE_MIN_CHARGE_USD = 5.0;
export const SKYFIRE_SELLER_ID = process.env.SKYFIRE_SELLER_SERVICE_ID;

Expand Down
119 changes: 68 additions & 51 deletions src/tools/actor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ import log from '@apify/log';

import { ApifyClient } from '../apify-client.js';
import {
ACTOR_ADDITIONAL_INSTRUCTIONS,
ACTOR_MAX_MEMORY_MBYTES,
HelperTools,
RAG_WEB_BROWSER,
RAG_WEB_BROWSER_ADDITIONAL_DESC,
SKYFIRE_TOOL_INSTRUCTIONS,
TOOL_MAX_OUTPUT_CHARS,
} from '../const.js';
Expand All @@ -26,7 +27,11 @@ import type { ProgressTracker } from '../utils/progress.js';
import type { JsonSchemaProperty } from '../utils/schema-generation.js';
import { generateSchemaFromItems } from '../utils/schema-generation.js';
import { getActorDefinition } from './build.js';
import { actorNameToToolName, fixedAjvCompile, getToolSchemaID, transformActorInputSchemaProperties } from './utils.js';
import {
actorNameToToolName,
buildActorInputSchema,
fixedAjvCompile,
} from './utils.js';

// Define a named return type for callActorGetDataset
export type CallActorGetDatasetResult = {
Expand Down Expand Up @@ -156,45 +161,48 @@ export async function getNormalActorsAsTools(
): Promise<ToolEntry[]> {
const tools: ToolEntry[] = [];

// Zip the results with their corresponding actorIDs
for (const actorInfo of actorsInfo) {
const { actorDefinitionPruned } = actorInfo;

if (actorDefinitionPruned) {
const schemaID = getToolSchemaID(actorDefinitionPruned.actorFullName);
if (actorDefinitionPruned.input && 'properties' in actorDefinitionPruned.input && actorDefinitionPruned.input) {
actorDefinitionPruned.input.properties = transformActorInputSchemaProperties(actorDefinitionPruned.input);
// Add schema $id, each valid JSON schema should have a unique $id
// see https://json-schema.org/understanding-json-schema/basics#declaring-a-unique-identifier
actorDefinitionPruned.input.$id = schemaID;
}
try {
const memoryMbytes = actorDefinitionPruned.defaultRunOptions?.memoryMbytes || ACTOR_MAX_MEMORY_MBYTES;
const tool: ToolEntry = {
type: 'actor',
tool: {
name: actorNameToToolName(actorDefinitionPruned.actorFullName),
actorFullName: actorDefinitionPruned.actorFullName,
description: `This tool calls the Actor "${actorDefinitionPruned.actorFullName}" and retrieves its output results. Use this tool instead of the "${HelperTools.ACTOR_CALL}" if user requests to use this specific Actor.
Actor description: ${actorDefinitionPruned.description}
Instructions: ${ACTOR_ADDITIONAL_INSTRUCTIONS}`,
inputSchema: actorDefinitionPruned.input
// So Actor without input schema works - MCP client expects JSON schema valid output
|| {
type: 'object',
properties: {},
required: [],
},
// Additional props true to allow skyfire-pay-id
ajvValidate: fixedAjvCompile(ajv, { ...actorDefinitionPruned.input, additionalProperties: true }),
memoryMbytes: memoryMbytes > ACTOR_MAX_MEMORY_MBYTES ? ACTOR_MAX_MEMORY_MBYTES : memoryMbytes,
},
};
tools.push(tool);
} catch (validationError) {
log.error('Failed to compile AJV schema for Actor', { actorName: actorDefinitionPruned.actorFullName, error: validationError });
}
if (!actorDefinitionPruned) continue;

const isRag = actorDefinitionPruned.actorFullName === RAG_WEB_BROWSER;
const { inputSchema } = buildActorInputSchema(actorDefinitionPruned.actorFullName, actorDefinitionPruned.input, isRag);

let description = `This tool calls the Actor "${actorDefinitionPruned.actorFullName}" and retrieves its output results.
Use this tool instead of the "${HelperTools.ACTOR_CALL}" if user requests this specific Actor.
Actor description: ${actorDefinitionPruned.description}`;
if (isRag) {
description += RAG_WEB_BROWSER_ADDITIONAL_DESC;
}

const memoryMbytes = Math.min(
actorDefinitionPruned.defaultRunOptions?.memoryMbytes || ACTOR_MAX_MEMORY_MBYTES,
ACTOR_MAX_MEMORY_MBYTES,
);

let ajvValidate;
try {
ajvValidate = fixedAjvCompile(ajv, { ...inputSchema, additionalProperties: true });
} catch (e) {
log.error('Failed to compile schema', {
actorName: actorDefinitionPruned.actorFullName,
error: e,
});
continue;
}

tools.push({
type: 'actor',
tool: {
name: actorNameToToolName(actorDefinitionPruned.actorFullName),
actorFullName: actorDefinitionPruned.actorFullName,
description,
inputSchema,
ajvValidate,
memoryMbytes,
},
});
}
return tools;
}
Expand Down Expand Up @@ -294,7 +302,7 @@ const callActorArgs = z.object({
.describe('The name of the Actor to call. For example, "apify/rag-web-browser".'),
step: z.enum(['info', 'call'])
.default('info')
.describe(`Step to perform: "info" to get Actor details and input schema (required first step), "call" to execute the Actor (only after getting info).`),
.describe(`Step to perform: "info" to get Actor details and input schema (required first step), "call" to run the Actor (only after getting info).`),
input: z.object({}).passthrough()
.optional()
.describe(`The input JSON to pass to the Actor. For example, {"query": "apify", "maxResults": 5, "outputFormats": ["markdown"]}. Required only when step is "call".`),
Expand All @@ -317,26 +325,35 @@ export const callActor: ToolEntry = {
tool: {
name: HelperTools.ACTOR_CALL,
actorFullName: HelperTools.ACTOR_CALL,
description: `Call Any Actor from Apify Store - Two-Step Process
description: `Call any Actor from the Apify Store using a mandatory two-step workflow.
This ensures you first get the Actor’s input schema and details before executing it safely.

This tool uses a mandatory two-step process to safely call any Actor from the Apify store.
There are two ways to run Actors:
1. Dedicated Actor tools (e.g., ${actorNameToToolName('apify/rag-web-browser')}): These are pre-configured tools, offering a simpler and more direct experience.
2. Generic call-actor tool (${HelperTools.ACTOR_CALL}): Use this when a dedicated tool is not available or when you want to run any Actor dynamically. This tool is especially useful if you do not want to add specific tools or your client does not support dynamic tool registration.

USAGE:
• ONLY for Actors that are NOT available as dedicated tools
• If a dedicated tool exists (e.g., ${actorNameToToolName('apify/rag-web-browser')}), use that instead
**Important:**

MANDATORY TWO-STEP WORKFLOW:
A successful run returns a \`datasetId\` (the Actor's output stored as an Apify dataset) and a short preview of items.
To fetch the full output, use the ${HelperTools.ACTOR_OUTPUT_GET} tool with the \`datasetId\`.

USAGE:
- Always use dedicated tools when available (e.g., ${actorNameToToolName('apify/rag-web-browser')})
- Use the generic call-actor tool only if a dedicated tool does not exist for your Actor.

MANDATORY TWO-STEP-WORKFLOW:
Step 1: Get Actor Info (step="info", default)
First call this tool with step="info" to get Actor details and input schema
This returns the Actor description, documentation, and required input schema
You MUST do this step first - it's required to understand how to call the Actor
- First call this tool with step="info" to get Actor details and input schema
- This returns the Actor description, documentation, and required input schema
- You MUST do this step first - it's required to understand how to call the Actor

Step 2: Call Actor (step="call")
• Only after step 1, call again with step="call" and proper input based on the schema
• This executes the Actor and returns the results
Step 2: Call Actor (step="call")
- Only after step 1, call again with step="call" and proper input based on the schema
- This calls and runs the Actor. It will create an output as an Apify dataset (with datasetId).
- This step returns a dataset preview, typically JSON-formatted tabular data.

The step parameter enforces this workflow - you cannot call an Actor without first getting its info.`,
EXAMPLES:
- user_input: Get instagram posts using apify/instagram-scraper`,
inputSchema: zodToJsonSchema(callActorArgs),
ajvValidate: ajv.compile({
...zodToJsonSchema(callActorArgs),
Expand Down
51 changes: 33 additions & 18 deletions src/tools/dataset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,17 @@ export const getDataset: ToolEntry = {
tool: {
name: HelperTools.DATASET_GET,
actorFullName: HelperTools.DATASET_GET,
description: 'Dataset is a collection of structured data created by an Actor run. '
+ 'Returns information about dataset object with metadata (itemCount, schema, fields, stats). '
+ `Fields describe the structure of the dataset and can be used to filter the data with the ${HelperTools.DATASET_GET_ITEMS} tool. `
+ 'Note: itemCount updates may have 5s delay.'
+ 'The dataset can be accessed with the dataset URL: GET: https://api.apify.com/v2/datasets/:datasetId',
description: `Get metadata for a dataset (collection of structured data created by an Actor run).
The results will include dataset details such as itemCount, schema, fields, and stats.
Use fields to understand structure for filtering with ${HelperTools.DATASET_GET_ITEMS}.
Note: itemCount updates may be delayed by up to ~5 seconds.

USAGE:
- Use when you need dataset metadata to understand its structure before fetching items.

EXAMPLES:
- user_input: Show info for dataset 8TtYhCwKzQeQk7dJx
- user_input: What fields does username~my-dataset have?`,
inputSchema: zodToJsonSchema(getDatasetArgs),
ajvValidate: ajv.compile(zodToJsonSchema(getDatasetArgs)),
call: async (toolArgs) => {
Expand All @@ -74,16 +80,18 @@ export const getDatasetItems: ToolEntry = {
tool: {
name: HelperTools.DATASET_GET_ITEMS,
actorFullName: HelperTools.DATASET_GET_ITEMS,
description: 'Returns dataset items with pagination support. '
+ 'Items can be sorted (newest to oldest) and filtered (clean mode skips empty items and hidden fields). '
+ 'Supports field selection - include specific fields or exclude unwanted ones using comma-separated lists. '
+ 'For nested objects, you must first flatten them using the flatten parameter before accessing their fields. '
+ 'Example: To get URLs from items like [{"metadata":{"url":"example.com"}}], '
+ 'use flatten="metadata" and then fields="metadata.url". '
+ 'The flattening transforms nested objects into dot-notation format '
+ '(e.g. {"metadata":{"url":"x"}} becomes {"metadata.url":"x"}). '
+ 'Retrieve only the fields you need, reducing the response size and improving performance. '
+ 'The response includes total count, offset, limit, and items array.',
description: `Retrieve dataset items with pagination, sorting, and field selection.
Use clean=true to skip empty items and hidden fields. Include or omit fields using comma-separated lists.
For nested objects, first flatten them (e.g., flatten="metadata"), then reference nested fields via dot notation (e.g., fields="metadata.url").

The results will include items along with pagination info (limit, offset) and total count.

USAGE:
- Use when you need to read data from a dataset (all items or only selected fields).

EXAMPLES:
- user_input: Get first 100 items from dataset 8TtYhCwKzQeQk7dJx
- user_input: Get only metadata.url and title from dataset username~my-dataset (flatten metadata)`,
inputSchema: zodToJsonSchema(getDatasetItemsArgs),
ajvValidate: ajv.compile(zodToJsonSchema(getDatasetItemsArgs)),
call: async (toolArgs) => {
Expand Down Expand Up @@ -136,9 +144,16 @@ export const getDatasetSchema: ToolEntry = {
tool: {
name: HelperTools.DATASET_SCHEMA_GET,
actorFullName: HelperTools.DATASET_SCHEMA_GET,
description: 'Generates a JSON schema from dataset items. '
+ 'The schema describes the structure of the data in the dataset, which can be used for validation, documentation, or data processing.'
+ 'Since the dataset can be large it is convenient to understand the structure of the dataset before getting dataset items.',
description: `Generate a JSON schema from a sample of dataset items.
The schema describes the structure of the data and can be used for validation, documentation, or processing.
Use this to understand the dataset before fetching many items.

USAGE:
- Use when you need to infer the structure of dataset items for downstream processing or validation.

EXAMPLES:
- user_input: Generate schema for dataset 8TtYhCwKzQeQk7dJx using 10 items
- user_input: Show schema of username~my-dataset (clean items only)`,
inputSchema: zodToJsonSchema(getDatasetSchemaArgs),
ajvValidate: ajv.compile(zodToJsonSchema(getDatasetSchemaArgs)),
call: async (toolArgs) => {
Expand Down
18 changes: 12 additions & 6 deletions src/tools/dataset_collection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,18 @@ export const getUserDatasetsList: ToolEntry = {
tool: {
name: HelperTools.DATASET_LIST_GET,
actorFullName: HelperTools.DATASET_LIST_GET,
description: 'Lists datasets (collections of Actor run data). '
+ 'Actor runs automatically produce unnamed datasets (use unnamed=true to include these). '
+ 'Users can also create named datasets manually. '
+ 'Each dataset includes itemCount, access settings, and usage stats (readCount, writeCount). '
+ 'Results are sorted by createdAt in ascending order (use desc=true for descending). '
+ 'Supports pagination with limit (max 20) and offset parameters.',
description: `List datasets (collections of Actor run data) for the authenticated user.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: should we include that it is for authenticated user? I will always be authenticated user (even in case of Skyfire - by the PAY token)

Actor runs automatically produce unnamed datasets (set unnamed=true to include them). Users can also create named datasets.

The results will include datasets with itemCount, access settings, and usage stats, sorted by createdAt (ascending by default).
Use limit (max 20), offset, and desc to paginate and sort.

USAGE:
- Use when you need to browse available datasets (named or unnamed) to locate data.

EXAMPLES:
- user_input: List my last 10 datasets (newest first)
- user_input: List unnamed datasets`,
inputSchema: zodToJsonSchema(getUserDatasetsListArgs),
ajvValidate: ajv.compile(zodToJsonSchema(getUserDatasetsListArgs)),
call: async (toolArgs) => {
Expand Down
20 changes: 11 additions & 9 deletions src/tools/fetch-actor-details.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,17 @@ export const fetchActorDetailsTool: ToolEntry = {
type: 'internal',
tool: {
name: HelperTools.ACTOR_GET_DETAILS,
description: `Get detailed information about an Actor by its ID or full name.\n`
+ `This tool returns title, description, URL, README (Actor's documentation), input schema, and usage statistics. \n`
+ `The Actor name is always composed of "username/name", for example, "apify/rag-web-browser".\n`
+ `Present Actor information in user-friendly format as an Actor card.\n`
+ `USAGE:\n`
+ `- Use when user asks about an Actor its details, description, input schema, etc.\n`
+ `EXAMPLES:\n`
+ `- user_input: How to use apify/rag-web-browser\n`
+ `- user_input: What is the input schema for apify/rag-web-browser`,
description: `Get detailed information about an Actor by its ID or full name (format: "username/name", e.g., "apify/rag-web-browser").
This returns the Actor’s title, description, URL, README (documentation), input schema, pricing/usage information, and basic stats.
Present the information in a user-friendly Actor card.

USAGE:
- Use when a user asks about an Actor’s details, input schema, README, or how to use it.

EXAMPLES:
- user_input: How to use apify/rag-web-browser
- user_input: What is the input schema for apify/rag-web-browser?
- user_input: What is the pricing for apify/instagram-scraper?`,
inputSchema: zodToJsonSchema(fetchActorDetailsToolArgsSchema),
ajvValidate: ajv.compile(zodToJsonSchema(fetchActorDetailsToolArgsSchema)),
call: async (toolArgs) => {
Expand Down
10 changes: 9 additions & 1 deletion src/tools/fetch-apify-docs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,15 @@ export const fetchApifyDocsTool: ToolEntry = {
type: 'internal',
tool: {
name: HelperTools.DOCS_FETCH,
description: `Apify documentation fetch tool. This tool allows you to fetch the full content of an Apify documentation page by its URL.`,
description: `Fetch the full content of an Apify documentation page by its URL.
Use this after finding a relevant page with the ${HelperTools.DOCS_SEARCH} tool.

USAGE:
- Use when you need the complete content of a specific docs page for detailed answers.

EXAMPLES:
- user_input: Fetch https://docs.apify.com/platform/actors/running#builds
- user_input: Fetch https://docs.apify.com/academy`,
args: fetchApifyDocsToolArgsSchema,
inputSchema: zodToJsonSchema(fetchApifyDocsToolArgsSchema),
ajvValidate: ajv.compile(zodToJsonSchema(fetchApifyDocsToolArgsSchema)),
Expand Down
Loading