apify · jirispilka · Sep 22, 2025 · Sep 17, 2025 · Sep 17, 2025 · Sep 18, 2025
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -64,7 +64,7 @@
     "dotenv": "^16.4.7",
     "eslint": "^9.19.0",
     "eventsource": "^3.0.2",
-    "tsx": "^4.6.2",
+    "tsx": "^4.20.5",
     "typescript": "^5.3.3",
     "typescript-eslint": "^8.23.0",
     "vitest": "^3.0.8"

diff --git a/src/const.ts b/src/const.ts
@@ -47,15 +47,16 @@ export enum HelperTools {
     GET_HTML_SKELETON = 'get-html-skeleton',
 }
 
-export const ACTOR_RAG_WEB_BROWSER = 'apify/rag-web-browser';
+export const RAG_WEB_BROWSER = 'apify/rag-web-browser';
+export const RAG_WEB_BROWSER_WHITELISTED_FIELDS = ['query', 'maxResults', 'outputFormats'];
+export const RAG_WEB_BROWSER_ADDITIONAL_DESC = `This tool provides general web browsing functionality, for specific sites like e-commerce, social media it is always better to search for a specific Actor`;
 
 export const defaults = {
     actors: [
-        ACTOR_RAG_WEB_BROWSER,
+        RAG_WEB_BROWSER,
     ],
 };
 
-export const ACTOR_ADDITIONAL_INSTRUCTIONS = 'Never call/execute tool/Actor unless confirmed by the user.';
 export const SKYFIRE_MIN_CHARGE_USD = 5.0;
 export const SKYFIRE_SELLER_ID = process.env.SKYFIRE_SELLER_SERVICE_ID;
 

diff --git a/src/tools/actor.ts b/src/tools/actor.ts
@@ -7,9 +7,10 @@ import log from '@apify/log';
 
 import { ApifyClient } from '../apify-client.js';
 import {
-    ACTOR_ADDITIONAL_INSTRUCTIONS,
     ACTOR_MAX_MEMORY_MBYTES,
     HelperTools,
+    RAG_WEB_BROWSER,
+    RAG_WEB_BROWSER_ADDITIONAL_DESC,
     SKYFIRE_TOOL_INSTRUCTIONS,
     TOOL_MAX_OUTPUT_CHARS,
 } from '../const.js';
@@ -26,7 +27,11 @@ import type { ProgressTracker } from '../utils/progress.js';
 import type { JsonSchemaProperty } from '../utils/schema-generation.js';
 import { generateSchemaFromItems } from '../utils/schema-generation.js';
 import { getActorDefinition } from './build.js';
-import { actorNameToToolName, fixedAjvCompile, getToolSchemaID, transformActorInputSchemaProperties } from './utils.js';
+import {
+    actorNameToToolName,
+    buildActorInputSchema,
+    fixedAjvCompile,
+} from './utils.js';
 
 // Define a named return type for callActorGetDataset
 export type CallActorGetDatasetResult = {
@@ -156,45 +161,48 @@ export async function getNormalActorsAsTools(
 ): Promise<ToolEntry[]> {
     const tools: ToolEntry[] = [];
 
-    // Zip the results with their corresponding actorIDs
     for (const actorInfo of actorsInfo) {
         const { actorDefinitionPruned } = actorInfo;
 
-        if (actorDefinitionPruned) {
-            const schemaID = getToolSchemaID(actorDefinitionPruned.actorFullName);
-            if (actorDefinitionPruned.input && 'properties' in actorDefinitionPruned.input && actorDefinitionPruned.input) {
-                actorDefinitionPruned.input.properties = transformActorInputSchemaProperties(actorDefinitionPruned.input);
-                // Add schema $id, each valid JSON schema should have a unique $id
-                // see https://json-schema.org/understanding-json-schema/basics#declaring-a-unique-identifier
-                actorDefinitionPruned.input.$id = schemaID;
-            }
-            try {
-                const memoryMbytes = actorDefinitionPruned.defaultRunOptions?.memoryMbytes || ACTOR_MAX_MEMORY_MBYTES;
-                const tool: ToolEntry = {
-                    type: 'actor',
-                    tool: {
-                        name: actorNameToToolName(actorDefinitionPruned.actorFullName),
-                        actorFullName: actorDefinitionPruned.actorFullName,
-                        description: `This tool calls the Actor "${actorDefinitionPruned.actorFullName}" and retrieves its output results. Use this tool instead of the "${HelperTools.ACTOR_CALL}" if user requests to use this specific Actor.
-Actor description: ${actorDefinitionPruned.description}
-Instructions: ${ACTOR_ADDITIONAL_INSTRUCTIONS}`,
-                        inputSchema: actorDefinitionPruned.input
-                        // So Actor without input schema works - MCP client expects JSON schema valid output
-                        || {
-                            type: 'object',
-                            properties: {},
-                            required: [],
-                        },
-                        // Additional props true to allow skyfire-pay-id
-                        ajvValidate: fixedAjvCompile(ajv, { ...actorDefinitionPruned.input, additionalProperties: true }),
-                        memoryMbytes: memoryMbytes > ACTOR_MAX_MEMORY_MBYTES ? ACTOR_MAX_MEMORY_MBYTES : memoryMbytes,
-                    },
-                };
-                tools.push(tool);
-            } catch (validationError) {
-                log.error('Failed to compile AJV schema for Actor', { actorName: actorDefinitionPruned.actorFullName, error: validationError });
-            }
+        if (!actorDefinitionPruned) continue;
+
+        const isRag = actorDefinitionPruned.actorFullName === RAG_WEB_BROWSER;
+        const { inputSchema } = buildActorInputSchema(actorDefinitionPruned.actorFullName, actorDefinitionPruned.input, isRag);
+
+        let description = `This tool calls the Actor "${actorDefinitionPruned.actorFullName}" and retrieves its output results.
+Use this tool instead of the "${HelperTools.ACTOR_CALL}" if user requests this specific Actor.
+Actor description: ${actorDefinitionPruned.description}`;
+        if (isRag) {
+            description += RAG_WEB_BROWSER_ADDITIONAL_DESC;
+        }
+
+        const memoryMbytes = Math.min(
+            actorDefinitionPruned.defaultRunOptions?.memoryMbytes || ACTOR_MAX_MEMORY_MBYTES,
+            ACTOR_MAX_MEMORY_MBYTES,
+        );
+
+        let ajvValidate;
+        try {
+            ajvValidate = fixedAjvCompile(ajv, { ...inputSchema, additionalProperties: true });
+        } catch (e) {
+            log.error('Failed to compile schema', {
+                actorName: actorDefinitionPruned.actorFullName,
+                error: e,
+            });
+            continue;
         }
+
+        tools.push({
+            type: 'actor',
+            tool: {
+                name: actorNameToToolName(actorDefinitionPruned.actorFullName),
+                actorFullName: actorDefinitionPruned.actorFullName,
+                description,
+                inputSchema,
+                ajvValidate,
+                memoryMbytes,
+            },
+        });
     }
     return tools;
 }
@@ -317,8 +325,7 @@ export const callActor: ToolEntry = {
     tool: {
         name: HelperTools.ACTOR_CALL,
         actorFullName: HelperTools.ACTOR_CALL,
-        description: `Call Any Actor from Apify Store - Two-Step Process
-
+        description: `Call any Actor from Apify Store - two-step process
 This tool uses a mandatory two-step process to safely call any Actor from the Apify store.
 
 USAGE:
@@ -328,13 +335,14 @@ USAGE:
 MANDATORY TWO-STEP WORKFLOW:
 
 Step 1: Get Actor Info (step="info", default)
-• First call this tool with step="info" to get Actor details and input schema
-• This returns the Actor description, documentation, and required input schema
-• You MUST do this step first - it's required to understand how to call the Actor
-
-Step 2: Call Actor (step="call") 
-• Only after step 1, call again with step="call" and proper input based on the schema
-• This executes the Actor and returns the results
+- First call this tool with step="info" to get Actor details and input schema
+- This returns the Actor description, documentation, and required input schema
+- You MUST do this step first - it's required to understand how to call the Actor
+
+Step 2: Call Actor (step="call")
+- Only after step 1, call again with step="call" and proper input based on the schema
+- This calls and runs the Actor. It will create an output as an Apify dataset (with datasetId).
+- This step returns a dataset preview, typically JSON-formatted tabular data.
 
 The step parameter enforces this workflow - you cannot call an Actor without first getting its info.`,
         inputSchema: zodToJsonSchema(callActorArgs),

diff --git a/src/tools/fetch-actor-details.ts b/src/tools/fetch-actor-details.ts
@@ -17,15 +17,16 @@ export const fetchActorDetailsTool: ToolEntry = {
     type: 'internal',
     tool: {
         name: HelperTools.ACTOR_GET_DETAILS,
-        description: `Get detailed information about an Actor by its ID or full name.\n`
-            + `This tool returns title, description, URL, README (Actor's documentation), input schema, and usage statistics. \n`
-            + `The Actor name is always composed of "username/name", for example, "apify/rag-web-browser".\n`
-            + `Present Actor information in user-friendly format as an Actor card.\n`
-            + `USAGE:\n`
-            + `- Use when user asks about an Actor its details, description, input schema, etc.\n`
-            + `EXAMPLES:\n`
-            + `- user_input: How to use apify/rag-web-browser\n`
-            + `- user_input: What is the input schema for apify/rag-web-browser`,
+        description: `Get detailed information about an Actor by its ID or full name.
+This tool returns title, description, URL, README (Actor's documentation), input schema, and usage statistics.
+The Actor name is always composed of "username/name", for example, "apify/rag-web-browser".
+Present Actor information in user-friendly format as an Actor card.
+USAGE:
+- Use when user asks about an Actor its details, description, input schema, etc.
+EXAMPLES:
+- user_input: How to use apify/rag-web-browser
+- user_input: What is the input schema for apify/rag-web-browser,
+- user_input: What is pricing of apify/instagram-scraper?`,
         inputSchema: zodToJsonSchema(fetchActorDetailsToolArgsSchema),
         ajvValidate: ajv.compile(zodToJsonSchema(fetchActorDetailsToolArgsSchema)),
         call: async (toolArgs) => {

diff --git a/src/tools/get-actor-output.ts b/src/tools/get-actor-output.ts
@@ -68,10 +68,19 @@ export const getActorOutput: ToolEntry = {
     tool: {
         name: HelperTools.ACTOR_OUTPUT_GET,
         actorFullName: HelperTools.ACTOR_OUTPUT_GET,
-        description: `Retrieves the output of a specific Actor execution based on its dataset ID.
-You can also retrieve only specific fields from the output if needed. Use this tool to get Actor output data outside of the Actor dataset output preview, or to access fields from the Actor output dataset schema that are not included in the preview.
+        description: `Fetch the dataset of a specific Actor run based on datasetId.
+You can also retrieve only specific fields from the output if needed.
 
-Note: This tool is automatically included if the Apify MCP Server is configured with any Actor tools (e.g. \`apify-slash-rag-web-browser\`) or tools that can interact with Actors (e.g. \`call-actor\`, \`add-actor\`).`,
+USAGE:
+- Use this tool to get Actor dataset outside of the preview, or to access fields from the Actor output
+dataset schema that are not included in the preview.
+
+EXAMPLES:
+- user_input: Get data of my last Actor run?
+- user_input: Get number_of_likes from my dataset?
+
+Note: This tool is automatically included if the Apify MCP Server is configured with any Actor tools
+ (e.g. "apify-slash-rag-web-browser") or tools that can interact with Actors (e.g. "call-actor", "add-actor").`,
         inputSchema: zodToJsonSchema(getActorOutputArgs),
         /**
          * Allow additional properties for Skyfire mode to pass `skyfire-pay-id`.

diff --git a/src/tools/get-html-skeleton.ts b/src/tools/get-html-skeleton.ts
@@ -2,7 +2,7 @@ import { z } from 'zod';
 import zodToJsonSchema from 'zod-to-json-schema';
 
 import { ApifyClient } from '../apify-client.js';
-import { ACTOR_RAG_WEB_BROWSER, HelperTools, TOOL_MAX_OUTPUT_CHARS } from '../const.js';
+import { HelperTools, RAG_WEB_BROWSER, TOOL_MAX_OUTPUT_CHARS } from '../const.js';
 import { getHtmlSkeletonCache } from '../state.js';
 import type { InternalTool, ToolEntry } from '../types.js';
 import { ajv } from '../utils/ajv.js';
@@ -58,7 +58,7 @@ export const getHtmlSkeleton: ToolEntry = {
                 // Not in cache, call the Actor for scraping
                 const client = new ApifyClient({ token: apifyToken });
 
-                const run = await client.actor(ACTOR_RAG_WEB_BROWSER).call({
+                const run = await client.actor(RAG_WEB_BROWSER).call({
                     query: parsed.url,
                     outputFormats: [
                         'html',
@@ -68,16 +68,16 @@ export const getHtmlSkeleton: ToolEntry = {
 
                 const datasetItems = await client.dataset(run.defaultDatasetId).listItems();
                 if (datasetItems.items.length === 0) {
-                    return buildMCPResponse([`The scraping Actor (${ACTOR_RAG_WEB_BROWSER}) did not return any output for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`]);
+                    return buildMCPResponse([`The scraping Actor (${RAG_WEB_BROWSER}) did not return any output for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`]);
                 }
 
                 const firstItem = datasetItems.items[0] as unknown as ScrapedPageItem;
                 if (firstItem.crawl.httpStatusMessage.toLocaleLowerCase() !== 'ok') {
-                    return buildMCPResponse([`The scraping Actor (${ACTOR_RAG_WEB_BROWSER}) returned an HTTP status ${firstItem.crawl.httpStatusCode} (${firstItem.crawl.httpStatusMessage}) for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`]);
+                    return buildMCPResponse([`The scraping Actor (${RAG_WEB_BROWSER}) returned an HTTP status ${firstItem.crawl.httpStatusCode} (${firstItem.crawl.httpStatusMessage}) for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`]);
                 }
 
                 if (!firstItem.html) {
-                    return buildMCPResponse([`The scraping Actor (${ACTOR_RAG_WEB_BROWSER}) did not return any HTML content for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`]);
+                    return buildMCPResponse([`The scraping Actor (${RAG_WEB_BROWSER}) did not return any HTML content for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`]);
                 }
 
                 strippedHtml = stripHtml(firstItem.html);

diff --git a/src/tools/search-apify-docs.ts b/src/tools/search-apify-docs.ts
@@ -30,12 +30,25 @@ export const searchApifyDocsTool: ToolEntry = {
     type: 'internal',
     tool: {
         name: HelperTools.DOCS_SEARCH,
-        description: `Apify documentation search tool. This tool allows you to search the Apify documentation using Algolia's full-text search.\n`
-            + 'You can use it to find relevant documentation pages based on keywords. The results will include the URL of the documentation page, '
-            + 'a fragment identifier (if available), and a limited piece of content that matches the search query. '
-            + `You can then fetch the full content of the document using the ${HelperTools.DOCS_FETCH} tool by providing the URL.\n`
-            + 'Use this tool when a user asks for help with Apify documentation or when you need to find relevant documentation pages based on keywords. '
-            + 'For example, when a user wants to build an Apify Actor, you can search "How to build Actors" to find relevant guidance.',
+        description: `Search Apify documentation using full-text search.
+    You can use it to find relevant documentation based on keywords.
+    Apify documentation has information about Apify console, Actors (development
+    (actor.json, input schema, dataset schema, dockerfile), deployment, builds, runs),
+    schedules, storages (datasets, key-value store), Proxy, Integrations,
+    Apify Academy (crawling and webscraping with Crawlee),
+
+    The results will include the URL of the documentation page, a fragment identifier (if available),
+    and a limited piece of content that matches the search query.
+
+    Fetch the full content of the document using the ${HelperTools.DOCS_FETCH} tool by providing the URL.
+
+    USAGE:
+    - Use when user asks about Apify documentation, Actor development, Crawlee, or Apify platform.
+
+    EXAMPLES:
+    - query: How to use create Apify Actor?
+    - query: How to define Actor input schema?
+    - query: How scrape with Crawlee?`,
         args: searchApifyDocsToolArgsSchema,
         inputSchema: zodToJsonSchema(searchApifyDocsToolArgsSchema),
         ajvValidate: ajv.compile(zodToJsonSchema(searchApifyDocsToolArgsSchema)),
@@ -59,7 +72,7 @@ export const searchApifyDocsTool: ToolEntry = {
 
             const textContent = `You can use the Apify docs fetch tool to retrieve the full content of a document by its URL. The document fragment refers to the section of the content containing the relevant part for the search result item.
 Search results for "${query}":
-                    
+
 ${results.map((result) => `- Document URL: ${result.url}${result.fragment ? `\n  Document fragment: ${result.fragment}` : ''}
   Content: ${result.content}`).join('\n\n')}`;
             return {