diff --git a/package-lock.json b/package-lock.json index 00bdff40..16563736 100644 --- a/package-lock.json +++ b/package-lock.json @@ -18,6 +18,7 @@ "apify": "^3.4.2", "apify-client": "^2.12.6", "express": "^4.21.2", + "to-json-schema": "^0.2.5", "turndown": "^7.2.0", "yargs": "^17.7.2", "zod": "^3.24.1", @@ -32,6 +33,7 @@ "@apify/eslint-config": "^1.0.0", "@apify/tsconfig": "^0.1.0", "@types/express": "^4.0.0", + "@types/to-json-schema": "^0.2.4", "@types/yargs": "^17.0.33", "@types/yargs-parser": "^21.0.3", "dotenv": "^16.4.7", @@ -2139,6 +2141,16 @@ "@types/send": "*" } }, + "node_modules/@types/to-json-schema": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/@types/to-json-schema/-/to-json-schema-0.2.4.tgz", + "integrity": "sha512-ENEB7JBlKODdihNrg08RgtLT8DZj43K48dV39yzV93QQPZhbQ+zan8osWpKll3HFEuLsiSttBdJ6QZFGsvRd4Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/json-schema": "*" + } + }, "node_modules/@types/turndown": { "version": "5.0.5", "resolved": "https://registry.npmjs.org/@types/turndown/-/turndown-5.0.5.tgz", @@ -5898,11 +5910,35 @@ "deprecated": "This package is deprecated. Use require('node:util').isDeepStrictEqual instead.", "license": "MIT" }, + "node_modules/lodash.keys": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/lodash.keys/-/lodash.keys-4.2.0.tgz", + "integrity": "sha512-J79MkJcp7Df5mizHiVNpjoHXLi4HLjh9VLS/M7lQSGoQ+0oQ+lWEigREkqKyizPB1IawvQLLKY8mzEcm1tkyxQ==", + "license": "MIT" + }, "node_modules/lodash.merge": { "version": "4.6.2", "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", - "dev": true, + "license": "MIT" + }, + "node_modules/lodash.omit": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.omit/-/lodash.omit-4.5.0.tgz", + "integrity": "sha512-XeqSp49hNGmlkj2EJlfrQFIzQ6lXdNro9sddtQzcJY8QaoC2GO0DT7xaIokHeyM+mIT0mPMlPvkYzg2xCuHdZg==", + "deprecated": "This package is deprecated. Use destructuring assignment syntax instead.", + "license": "MIT" + }, + "node_modules/lodash.without": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/lodash.without/-/lodash.without-4.4.0.tgz", + "integrity": "sha512-M3MefBwfDhgKgINVuBJCO1YR3+gf6s9HNJsIiZ/Ru77Ws6uTb9eBuvrkpzO+9iLoAaRodGuq7tyrPCx+74QYGQ==", + "license": "MIT" + }, + "node_modules/lodash.xor": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.xor/-/lodash.xor-4.5.0.tgz", + "integrity": "sha512-sVN2zimthq7aZ5sPGXnSz32rZPuqcparVW50chJQe+mzTYV+IsxSsl/2gnkWWE2Of7K3myBQBqtLKOUEHJKRsQ==", "license": "MIT" }, "node_modules/loupe": { @@ -7634,6 +7670,20 @@ "integrity": "sha512-Je6p7pkk+KMzMv2XXKmAE3McmolOQFdxkKw0R8EYNr7sELW46JqnNeTX8ybPiQgvg1ymCoF8LXs5fzFaZvJPTA==", "license": "MIT" }, + "node_modules/to-json-schema": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/to-json-schema/-/to-json-schema-0.2.5.tgz", + "integrity": "sha512-jP1ievOee8pec3tV9ncxLSS48Bnw7DIybgy112rhMCEhf3K4uyVNZZHr03iQQBzbV5v5Hos+dlZRRyk6YSMNDw==", + "license": "MIT", + "dependencies": { + "lodash.isequal": "^4.5.0", + "lodash.keys": "^4.2.0", + "lodash.merge": "^4.6.2", + "lodash.omit": "^4.5.0", + "lodash.without": "^4.4.0", + "lodash.xor": "^4.5.0" + } + }, "node_modules/to-regex-range": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", diff --git a/package.json b/package.json index 71922d19..85d2ba98 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,7 @@ "apify": "^3.4.2", "apify-client": "^2.12.6", "express": "^4.21.2", + "to-json-schema": "^0.2.5", "turndown": "^7.2.0", "yargs": "^17.7.2", "zod": "^3.24.1", @@ -50,6 +51,7 @@ "@apify/eslint-config": "^1.0.0", "@apify/tsconfig": "^0.1.0", "@types/express": "^4.0.0", + "@types/to-json-schema": "^0.2.4", "@types/yargs": "^17.0.33", "@types/yargs-parser": "^21.0.3", "dotenv": "^16.4.7", diff --git a/src/const.ts b/src/const.ts index 0152ea7a..336696ba 100644 --- a/src/const.ts +++ b/src/const.ts @@ -28,6 +28,7 @@ export enum HelperTools { DATASET_GET = 'get-dataset', DATASET_LIST_GET = 'get-dataset-list', DATASET_GET_ITEMS = 'get-dataset-items', + DATASET_SCHEMA_GET = 'get-dataset-schema', KEY_VALUE_STORE_LIST_GET = 'get-key-value-store-list', KEY_VALUE_STORE_GET = 'get-key-value-store', KEY_VALUE_STORE_KEYS_GET = 'get-key-value-store-keys', @@ -44,11 +45,6 @@ export const defaults = { ], }; -// Actor output const -export const ACTOR_OUTPUT_MAX_CHARS_PER_ITEM = 5_000; -export const ACTOR_OUTPUT_TRUNCATED_MESSAGE = `Output was truncated because it will not fit into context.` - + `There is no reason to call this tool again! You can use ${HelperTools.DATASET_GET_ITEMS} tool to get more items from the dataset.`; - export const ACTOR_ADDITIONAL_INSTRUCTIONS = 'Never call/execute tool/Actor unless confirmed by the user.'; // Cache diff --git a/src/mcp/server.ts b/src/mcp/server.ts index cc022826..99279732 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -495,22 +495,22 @@ export class ActorsMcpServer { const callOptions: ActorCallOptions = { memory: actorTool.memoryMbytes }; try { - const { items } = await callActorGetDataset( + const { runId, datasetId, items } = await callActorGetDataset( actorTool.actorFullName, args, apifyToken as string, callOptions, progressTracker, ); + const content = [ + { type: 'text', text: `Actor finished with runId: ${runId}, datasetId ${datasetId}` }, + ]; - return { - content: items.items.map((item: Record) => { - return { - type: 'text', - text: JSON.stringify(item), - }; - }), - }; + const itemContents = items.items.map((item: Record) => { + return { type: 'text', text: JSON.stringify(item) }; + }); + content.push(...itemContents); + return { content }; } finally { if (progressTracker) { progressTracker.stop(); diff --git a/src/stdio.ts b/src/stdio.ts index a33ffe58..3506d1dd 100644 --- a/src/stdio.ts +++ b/src/stdio.ts @@ -95,6 +95,14 @@ const actorList = actors ? actors.split(',').map((a: string) => a.trim()) : []; // Keys of the tool categories to enable const toolCategoryKeys = argv.tools ? argv.tools.split(',').map((t: string) => t.trim()) : []; +// Propagate log.error to console.error for easier debugging +const originalError = log.error.bind(log); +log.error = (...args: Parameters) => { + originalError(...args); + // eslint-disable-next-line no-console + console.error(...args); +}; + // Validate environment if (!process.env.APIFY_TOKEN) { log.error('APIFY_TOKEN is required but not set in the environment variables.'); diff --git a/src/tools/actor.ts b/src/tools/actor.ts index f02b737d..47cc3657 100644 --- a/src/tools/actor.ts +++ b/src/tools/actor.ts @@ -37,6 +37,8 @@ const ajv = new Ajv({ coerceTypes: 'array', strict: false }); // Define a named return type for callActorGetDataset export type CallActorGetDatasetResult = { + runId: string; + datasetId: string; items: PaginatedList>; }; @@ -95,8 +97,7 @@ export async function callActorGetDataset( } log.info(`Actor ${actorName} finished with ${items.count} items`); - - return { items }; + return { runId: actorRun.id, datasetId: completedRun.defaultDatasetId, items }; } catch (error) { log.error(`Error calling actor: ${error}. Actor: ${actorName}, input: ${JSON.stringify(input)}`); throw new Error(`Error calling Actor: ${error}`); @@ -120,9 +121,8 @@ export async function callActorGetDataset( * 4. Properties are shortened using shortenProperties() * 5. Enums are added to descriptions with examples using addEnumsToDescriptionsWithExamples() * - * @param {string[]} actors - An array of actor IDs or Actor full names. - * @param {string} apifyToken - The Apify token to use for authentication. - * @returns {Promise} - A promise that resolves to an array of MCP tools. + * @param {ActorInfo[]} actorsInfo - An array of ActorInfo objects with webServerMcpPath and actorDefinitionPruned. + * @returns {Promise} - A promise that resolves to an array of MCP tools. */ export async function getNormalActorsAsTools( actorsInfo: ActorInfo[], diff --git a/src/tools/dataset.ts b/src/tools/dataset.ts index cef33b40..3f733869 100644 --- a/src/tools/dataset.ts +++ b/src/tools/dataset.ts @@ -1,4 +1,5 @@ import { Ajv } from 'ajv'; +import toJsonSchema from 'to-json-schema'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; @@ -112,3 +113,113 @@ export const getDatasetItems: ToolEntry = { }, } as InternalTool, }; + +/** + * Function to recursively remove empty arrays from an object + */ +function removeEmptyArrays(obj: unknown): unknown { + if (Array.isArray(obj)) { + // If the item is an array, recursively call removeEmptyArrays on each element. + return obj.map((item) => removeEmptyArrays(item)); + } + + if (typeof obj !== 'object' || obj === null) { + // Return primitives and null values as is. + return obj; + } + + // Use reduce to build a new object, excluding keys with empty arrays. + return Object.entries(obj).reduce((acc, [key, value]) => { + const processedValue = removeEmptyArrays(value); + + // Exclude the key if the processed value is an empty array. + if (Array.isArray(processedValue) && processedValue.length === 0) { + return acc; + } + + acc[key] = processedValue; + return acc; + }, {} as Record); +} + +const getDatasetSchemaArgs = z.object({ + datasetId: z.string() + .min(1) + .describe('Dataset ID or username~dataset-name.'), + limit: z.number().optional() + .describe('Maximum number of items to use for schema generation. Default is 5.') + .default(5), + clean: z.boolean().optional() + .describe('If true, uses only non-empty items and skips hidden fields (starting with #). Default is true.') + .default(true), + arrayMode: z.enum(['first', 'all']).optional() + .describe('Strategy for handling arrays. "first" uses first item as template, "all" merges all items. Default is "all".') + .default('all'), + additionalProperties: z.boolean().optional() + .describe('If true, allows additional properties in objects. Default is true.') + .default(true), +}); + +/** + * Generates a JSON schema from dataset items + */ +export const getDatasetSchema: ToolEntry = { + type: 'internal', + tool: { + name: HelperTools.DATASET_SCHEMA_GET, + actorFullName: HelperTools.DATASET_SCHEMA_GET, + description: 'Generates a JSON schema from dataset items. ' + + 'The schema describes the structure of the data in the dataset, which can be used for validation, documentation, or data processing.' + + 'Since the dataset can be large it is convenient to understand the structure of the dataset before getting dataset items.', + inputSchema: zodToJsonSchema(getDatasetSchemaArgs), + ajvValidate: ajv.compile(zodToJsonSchema(getDatasetSchemaArgs)), + call: async (toolArgs) => { + const { args, apifyToken } = toolArgs; + const parsed = getDatasetSchemaArgs.parse(args); + const client = new ApifyClient({ token: apifyToken }); + + // Get dataset items + const datasetResponse = await client.dataset(parsed.datasetId).listItems({ + clean: parsed.clean, + limit: parsed.limit, + }); + + if (!datasetResponse) { + return { content: [{ type: 'text', text: `Dataset '${parsed.datasetId}' not found.` }] }; + } + + const datasetItems = datasetResponse.items; + + if (datasetItems.length === 0) { + return { content: [{ type: 'text', text: `Dataset '${parsed.datasetId}' is empty.` }] }; + } + + // Clean the dataset items by removing empty arrays + const cleanedDatasetItems = datasetItems.map((item) => removeEmptyArrays(item)); + + // Try to generate schema with full options first + try { + const schema = toJsonSchema(cleanedDatasetItems, { + arrays: { mode: parsed.arrayMode }, + objects: { additionalProperties: parsed.additionalProperties }, + }); + + return { + content: [{ + type: 'text', + text: JSON.stringify(schema), + }], + }; + } catch { + // Fallback: try with simpler approach + const fallbackSchema = toJsonSchema(cleanedDatasetItems, { + arrays: { mode: 'first' }, + }); + + return { + content: [{ type: 'text', text: JSON.stringify(fallbackSchema) }], + }; + } + }, + } as InternalTool, +}; diff --git a/src/tools/index.ts b/src/tools/index.ts index 16441f1b..3085b860 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -1,7 +1,7 @@ // Import specific tools that are being used import type { ToolCategory } from '../types.js'; import { callActor, callActorGetDataset, getActorsAsTools } from './actor.js'; -import { getDataset, getDatasetItems } from './dataset.js'; +import { getDataset, getDatasetItems, getDatasetSchema } from './dataset.js'; import { getUserDatasetsList } from './dataset_collection.js'; import { fetchApifyDocsTool } from './fetch-apify-docs.js'; import { getActorDetailsTool } from './get-actor-details.js'; @@ -26,6 +26,7 @@ export const toolCategories = { storage: [ getDataset, getDatasetItems, + getDatasetSchema, getKeyValueStore, getKeyValueStoreKeys, getKeyValueStoreRecord,