Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"dotenv": "^16.4.7",
"eslint": "^9.19.0",
"eventsource": "^3.0.2",
"tsx": "^4.6.2",
"tsx": "^4.20.5",
"typescript": "^5.3.3",
"typescript-eslint": "^8.23.0",
"vitest": "^3.0.8"
Expand Down
7 changes: 4 additions & 3 deletions src/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,16 @@ export enum HelperTools {
GET_HTML_SKELETON = 'get-html-skeleton',
}

export const ACTOR_RAG_WEB_BROWSER = 'apify/rag-web-browser';
export const RAG_WEB_BROWSER = 'apify/rag-web-browser';
export const RAG_WEB_BROWSER_WHITELISTED_FIELDS = ['query', 'maxResults', 'outputFormats'];
export const RAG_WEB_BROWSER_ADDITIONAL_DESC = `This tool provides general web browsing functionality, for specific sites like e-commerce, social media it is always better to search for a specific Actor`;

export const defaults = {
actors: [
ACTOR_RAG_WEB_BROWSER,
RAG_WEB_BROWSER,
],
};

export const ACTOR_ADDITIONAL_INSTRUCTIONS = 'Never call/execute tool/Actor unless confirmed by the user.';
export const SKYFIRE_MIN_CHARGE_USD = 5.0;
export const SKYFIRE_SELLER_ID = process.env.SKYFIRE_SELLER_SERVICE_ID;

Expand Down
100 changes: 54 additions & 46 deletions src/tools/actor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ import log from '@apify/log';

import { ApifyClient } from '../apify-client.js';
import {
ACTOR_ADDITIONAL_INSTRUCTIONS,
ACTOR_MAX_MEMORY_MBYTES,
HelperTools,
RAG_WEB_BROWSER,
RAG_WEB_BROWSER_ADDITIONAL_DESC,
SKYFIRE_TOOL_INSTRUCTIONS,
TOOL_MAX_OUTPUT_CHARS,
} from '../const.js';
Expand All @@ -26,7 +27,11 @@ import type { ProgressTracker } from '../utils/progress.js';
import type { JsonSchemaProperty } from '../utils/schema-generation.js';
import { generateSchemaFromItems } from '../utils/schema-generation.js';
import { getActorDefinition } from './build.js';
import { actorNameToToolName, fixedAjvCompile, getToolSchemaID, transformActorInputSchemaProperties } from './utils.js';
import {
actorNameToToolName,
buildActorInputSchema,
fixedAjvCompile,
} from './utils.js';

// Define a named return type for callActorGetDataset
export type CallActorGetDatasetResult = {
Expand Down Expand Up @@ -156,45 +161,48 @@ export async function getNormalActorsAsTools(
): Promise<ToolEntry[]> {
const tools: ToolEntry[] = [];

// Zip the results with their corresponding actorIDs
for (const actorInfo of actorsInfo) {
const { actorDefinitionPruned } = actorInfo;

if (actorDefinitionPruned) {
const schemaID = getToolSchemaID(actorDefinitionPruned.actorFullName);
if (actorDefinitionPruned.input && 'properties' in actorDefinitionPruned.input && actorDefinitionPruned.input) {
actorDefinitionPruned.input.properties = transformActorInputSchemaProperties(actorDefinitionPruned.input);
// Add schema $id, each valid JSON schema should have a unique $id
// see https://json-schema.org/understanding-json-schema/basics#declaring-a-unique-identifier
actorDefinitionPruned.input.$id = schemaID;
}
try {
const memoryMbytes = actorDefinitionPruned.defaultRunOptions?.memoryMbytes || ACTOR_MAX_MEMORY_MBYTES;
const tool: ToolEntry = {
type: 'actor',
tool: {
name: actorNameToToolName(actorDefinitionPruned.actorFullName),
actorFullName: actorDefinitionPruned.actorFullName,
description: `This tool calls the Actor "${actorDefinitionPruned.actorFullName}" and retrieves its output results. Use this tool instead of the "${HelperTools.ACTOR_CALL}" if user requests to use this specific Actor.
Actor description: ${actorDefinitionPruned.description}
Instructions: ${ACTOR_ADDITIONAL_INSTRUCTIONS}`,
inputSchema: actorDefinitionPruned.input
// So Actor without input schema works - MCP client expects JSON schema valid output
|| {
type: 'object',
properties: {},
required: [],
},
// Additional props true to allow skyfire-pay-id
ajvValidate: fixedAjvCompile(ajv, { ...actorDefinitionPruned.input, additionalProperties: true }),
memoryMbytes: memoryMbytes > ACTOR_MAX_MEMORY_MBYTES ? ACTOR_MAX_MEMORY_MBYTES : memoryMbytes,
},
};
tools.push(tool);
} catch (validationError) {
log.error('Failed to compile AJV schema for Actor', { actorName: actorDefinitionPruned.actorFullName, error: validationError });
}
if (!actorDefinitionPruned) continue;

const isRag = actorDefinitionPruned.actorFullName === RAG_WEB_BROWSER;
const { inputSchema } = buildActorInputSchema(actorDefinitionPruned.actorFullName, actorDefinitionPruned.input, isRag);

let description = `This tool calls the Actor "${actorDefinitionPruned.actorFullName}" and retrieves its output results.
Use this tool instead of the "${HelperTools.ACTOR_CALL}" if user requests this specific Actor.
Actor description: ${actorDefinitionPruned.description}`;
if (isRag) {
description += RAG_WEB_BROWSER_ADDITIONAL_DESC;
}

const memoryMbytes = Math.min(
actorDefinitionPruned.defaultRunOptions?.memoryMbytes || ACTOR_MAX_MEMORY_MBYTES,
ACTOR_MAX_MEMORY_MBYTES,
);

let ajvValidate;
try {
ajvValidate = fixedAjvCompile(ajv, { ...inputSchema, additionalProperties: true });
} catch (e) {
log.error('Failed to compile schema', {
actorName: actorDefinitionPruned.actorFullName,
error: e,
});
continue;
}

tools.push({
type: 'actor',
tool: {
name: actorNameToToolName(actorDefinitionPruned.actorFullName),
actorFullName: actorDefinitionPruned.actorFullName,
description,
inputSchema,
ajvValidate,
memoryMbytes,
},
});
}
return tools;
}
Expand Down Expand Up @@ -317,8 +325,7 @@ export const callActor: ToolEntry = {
tool: {
name: HelperTools.ACTOR_CALL,
actorFullName: HelperTools.ACTOR_CALL,
description: `Call Any Actor from Apify Store - Two-Step Process

description: `Call any Actor from Apify Store - two-step process
This tool uses a mandatory two-step process to safely call any Actor from the Apify store.

USAGE:
Expand All @@ -328,13 +335,14 @@ USAGE:
MANDATORY TWO-STEP WORKFLOW:

Step 1: Get Actor Info (step="info", default)
• First call this tool with step="info" to get Actor details and input schema
• This returns the Actor description, documentation, and required input schema
• You MUST do this step first - it's required to understand how to call the Actor

Step 2: Call Actor (step="call")
• Only after step 1, call again with step="call" and proper input based on the schema
• This executes the Actor and returns the results
- First call this tool with step="info" to get Actor details and input schema
- This returns the Actor description, documentation, and required input schema
- You MUST do this step first - it's required to understand how to call the Actor

Step 2: Call Actor (step="call")
- Only after step 1, call again with step="call" and proper input based on the schema
- This calls and runs the Actor. It will create an output as an Apify dataset (with datasetId).
- This step returns a dataset preview, typically JSON-formatted tabular data.

The step parameter enforces this workflow - you cannot call an Actor without first getting its info.`,
inputSchema: zodToJsonSchema(callActorArgs),
Expand Down
19 changes: 10 additions & 9 deletions src/tools/fetch-actor-details.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,16 @@ export const fetchActorDetailsTool: ToolEntry = {
type: 'internal',
tool: {
name: HelperTools.ACTOR_GET_DETAILS,
description: `Get detailed information about an Actor by its ID or full name.\n`
+ `This tool returns title, description, URL, README (Actor's documentation), input schema, and usage statistics. \n`
+ `The Actor name is always composed of "username/name", for example, "apify/rag-web-browser".\n`
+ `Present Actor information in user-friendly format as an Actor card.\n`
+ `USAGE:\n`
+ `- Use when user asks about an Actor its details, description, input schema, etc.\n`
+ `EXAMPLES:\n`
+ `- user_input: How to use apify/rag-web-browser\n`
+ `- user_input: What is the input schema for apify/rag-web-browser`,
description: `Get detailed information about an Actor by its ID or full name.
This tool returns title, description, URL, README (Actor's documentation), input schema, and usage statistics.
The Actor name is always composed of "username/name", for example, "apify/rag-web-browser".
Present Actor information in user-friendly format as an Actor card.
USAGE:
- Use when user asks about an Actor its details, description, input schema, etc.
EXAMPLES:
- user_input: How to use apify/rag-web-browser
- user_input: What is the input schema for apify/rag-web-browser,
- user_input: What is pricing of apify/instagram-scraper?`,
inputSchema: zodToJsonSchema(fetchActorDetailsToolArgsSchema),
ajvValidate: ajv.compile(zodToJsonSchema(fetchActorDetailsToolArgsSchema)),
call: async (toolArgs) => {
Expand Down
15 changes: 12 additions & 3 deletions src/tools/get-actor-output.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,19 @@ export const getActorOutput: ToolEntry = {
tool: {
name: HelperTools.ACTOR_OUTPUT_GET,
actorFullName: HelperTools.ACTOR_OUTPUT_GET,
description: `Retrieves the output of a specific Actor execution based on its dataset ID.
You can also retrieve only specific fields from the output if needed. Use this tool to get Actor output data outside of the Actor dataset output preview, or to access fields from the Actor output dataset schema that are not included in the preview.
description: `Fetch the dataset of a specific Actor run based on datasetId.
You can also retrieve only specific fields from the output if needed.

Note: This tool is automatically included if the Apify MCP Server is configured with any Actor tools (e.g. \`apify-slash-rag-web-browser\`) or tools that can interact with Actors (e.g. \`call-actor\`, \`add-actor\`).`,
USAGE:
- Use this tool to get Actor dataset outside of the preview, or to access fields from the Actor output
dataset schema that are not included in the preview.

EXAMPLES:
- user_input: Get data of my last Actor run?
- user_input: Get number_of_likes from my dataset?

Note: This tool is automatically included if the Apify MCP Server is configured with any Actor tools
(e.g. "apify-slash-rag-web-browser") or tools that can interact with Actors (e.g. "call-actor", "add-actor").`,
inputSchema: zodToJsonSchema(getActorOutputArgs),
/**
* Allow additional properties for Skyfire mode to pass `skyfire-pay-id`.
Expand Down
10 changes: 5 additions & 5 deletions src/tools/get-html-skeleton.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { z } from 'zod';
import zodToJsonSchema from 'zod-to-json-schema';

import { ApifyClient } from '../apify-client.js';
import { ACTOR_RAG_WEB_BROWSER, HelperTools, TOOL_MAX_OUTPUT_CHARS } from '../const.js';
import { HelperTools, RAG_WEB_BROWSER, TOOL_MAX_OUTPUT_CHARS } from '../const.js';
import { getHtmlSkeletonCache } from '../state.js';
import type { InternalTool, ToolEntry } from '../types.js';
import { ajv } from '../utils/ajv.js';
Expand Down Expand Up @@ -58,7 +58,7 @@ export const getHtmlSkeleton: ToolEntry = {
// Not in cache, call the Actor for scraping
const client = new ApifyClient({ token: apifyToken });

const run = await client.actor(ACTOR_RAG_WEB_BROWSER).call({
const run = await client.actor(RAG_WEB_BROWSER).call({
query: parsed.url,
outputFormats: [
'html',
Expand All @@ -68,16 +68,16 @@ export const getHtmlSkeleton: ToolEntry = {

const datasetItems = await client.dataset(run.defaultDatasetId).listItems();
if (datasetItems.items.length === 0) {
return buildMCPResponse([`The scraping Actor (${ACTOR_RAG_WEB_BROWSER}) did not return any output for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`]);
return buildMCPResponse([`The scraping Actor (${RAG_WEB_BROWSER}) did not return any output for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`]);
}

const firstItem = datasetItems.items[0] as unknown as ScrapedPageItem;
if (firstItem.crawl.httpStatusMessage.toLocaleLowerCase() !== 'ok') {
return buildMCPResponse([`The scraping Actor (${ACTOR_RAG_WEB_BROWSER}) returned an HTTP status ${firstItem.crawl.httpStatusCode} (${firstItem.crawl.httpStatusMessage}) for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`]);
return buildMCPResponse([`The scraping Actor (${RAG_WEB_BROWSER}) returned an HTTP status ${firstItem.crawl.httpStatusCode} (${firstItem.crawl.httpStatusMessage}) for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`]);
}

if (!firstItem.html) {
return buildMCPResponse([`The scraping Actor (${ACTOR_RAG_WEB_BROWSER}) did not return any HTML content for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`]);
return buildMCPResponse([`The scraping Actor (${RAG_WEB_BROWSER}) did not return any HTML content for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`]);
}

strippedHtml = stripHtml(firstItem.html);
Expand Down
27 changes: 20 additions & 7 deletions src/tools/search-apify-docs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,25 @@ export const searchApifyDocsTool: ToolEntry = {
type: 'internal',
tool: {
name: HelperTools.DOCS_SEARCH,
description: `Apify documentation search tool. This tool allows you to search the Apify documentation using Algolia's full-text search.\n`
+ 'You can use it to find relevant documentation pages based on keywords. The results will include the URL of the documentation page, '
+ 'a fragment identifier (if available), and a limited piece of content that matches the search query. '
+ `You can then fetch the full content of the document using the ${HelperTools.DOCS_FETCH} tool by providing the URL.\n`
+ 'Use this tool when a user asks for help with Apify documentation or when you need to find relevant documentation pages based on keywords. '
+ 'For example, when a user wants to build an Apify Actor, you can search "How to build Actors" to find relevant guidance.',
description: `Search Apify documentation using full-text search.
You can use it to find relevant documentation based on keywords.
Apify documentation has information about Apify console, Actors (development
(actor.json, input schema, dataset schema, dockerfile), deployment, builds, runs),
schedules, storages (datasets, key-value store), Proxy, Integrations,
Apify Academy (crawling and webscraping with Crawlee),

The results will include the URL of the documentation page, a fragment identifier (if available),
and a limited piece of content that matches the search query.

Fetch the full content of the document using the ${HelperTools.DOCS_FETCH} tool by providing the URL.

USAGE:
- Use when user asks about Apify documentation, Actor development, Crawlee, or Apify platform.

EXAMPLES:
- query: How to use create Apify Actor?
- query: How to define Actor input schema?
- query: How scrape with Crawlee?`,
args: searchApifyDocsToolArgsSchema,
inputSchema: zodToJsonSchema(searchApifyDocsToolArgsSchema),
ajvValidate: ajv.compile(zodToJsonSchema(searchApifyDocsToolArgsSchema)),
Expand All @@ -59,7 +72,7 @@ export const searchApifyDocsTool: ToolEntry = {

const textContent = `You can use the Apify docs fetch tool to retrieve the full content of a document by its URL. The document fragment refers to the section of the content containing the relevant part for the search result item.
Search results for "${query}":

${results.map((result) => `- Document URL: ${result.url}${result.fragment ? `\n Document fragment: ${result.fragment}` : ''}
Content: ${result.content}`).join('\n\n')}`;
return {
Expand Down
Loading