Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 51 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
"apify": "^3.4.2",
"apify-client": "^2.12.6",
"express": "^4.21.2",
"to-json-schema": "^0.2.5",
"turndown": "^7.2.0",
"yargs": "^17.7.2",
"zod": "^3.24.1",
Expand All @@ -50,6 +51,7 @@
"@apify/eslint-config": "^1.0.0",
"@apify/tsconfig": "^0.1.0",
"@types/express": "^4.0.0",
"@types/to-json-schema": "^0.2.4",
"@types/yargs": "^17.0.33",
"@types/yargs-parser": "^21.0.3",
"dotenv": "^16.4.7",
Expand Down
6 changes: 1 addition & 5 deletions src/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export enum HelperTools {
DATASET_GET = 'get-dataset',
DATASET_LIST_GET = 'get-dataset-list',
DATASET_GET_ITEMS = 'get-dataset-items',
DATASET_SCHEMA_GET = 'get-dataset-schema',
KEY_VALUE_STORE_LIST_GET = 'get-key-value-store-list',
KEY_VALUE_STORE_GET = 'get-key-value-store',
KEY_VALUE_STORE_KEYS_GET = 'get-key-value-store-keys',
Expand All @@ -44,11 +45,6 @@ export const defaults = {
],
};

// Actor output const
export const ACTOR_OUTPUT_MAX_CHARS_PER_ITEM = 5_000;
export const ACTOR_OUTPUT_TRUNCATED_MESSAGE = `Output was truncated because it will not fit into context.`
+ `There is no reason to call this tool again! You can use ${HelperTools.DATASET_GET_ITEMS} tool to get more items from the dataset.`;

export const ACTOR_ADDITIONAL_INSTRUCTIONS = 'Never call/execute tool/Actor unless confirmed by the user.';

// Cache
Expand Down
18 changes: 9 additions & 9 deletions src/mcp/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -495,22 +495,22 @@ export class ActorsMcpServer {
const callOptions: ActorCallOptions = { memory: actorTool.memoryMbytes };

try {
const { items } = await callActorGetDataset(
const { runId, datasetId, items } = await callActorGetDataset(
actorTool.actorFullName,
args,
apifyToken as string,
callOptions,
progressTracker,
);
const content = [
{ type: 'text', text: `Actor finished with runId: ${runId}, datasetId ${datasetId}` },
];

return {
content: items.items.map((item: Record<string, unknown>) => {
return {
type: 'text',
text: JSON.stringify(item),
};
}),
};
const itemContents = items.items.map((item: Record<string, unknown>) => {
return { type: 'text', text: JSON.stringify(item) };
});
content.push(...itemContents);
return { content };
} finally {
if (progressTracker) {
progressTracker.stop();
Expand Down
8 changes: 8 additions & 0 deletions src/stdio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ const actorList = actors ? actors.split(',').map((a: string) => a.trim()) : [];
// Keys of the tool categories to enable
const toolCategoryKeys = argv.tools ? argv.tools.split(',').map((t: string) => t.trim()) : [];

// Propagate log.error to console.error for easier debugging
const originalError = log.error.bind(log);
log.error = (...args: Parameters<typeof log.error>) => {
originalError(...args);
// eslint-disable-next-line no-console
console.error(...args);
};

// Validate environment
if (!process.env.APIFY_TOKEN) {
log.error('APIFY_TOKEN is required but not set in the environment variables.');
Expand Down
10 changes: 5 additions & 5 deletions src/tools/actor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ const ajv = new Ajv({ coerceTypes: 'array', strict: false });

// Define a named return type for callActorGetDataset
export type CallActorGetDatasetResult = {
runId: string;
datasetId: string;
items: PaginatedList<Record<string, unknown>>;
};

Expand Down Expand Up @@ -95,8 +97,7 @@ export async function callActorGetDataset(
}

log.info(`Actor ${actorName} finished with ${items.count} items`);

return { items };
return { runId: actorRun.id, datasetId: completedRun.defaultDatasetId, items };
} catch (error) {
log.error(`Error calling actor: ${error}. Actor: ${actorName}, input: ${JSON.stringify(input)}`);
throw new Error(`Error calling Actor: ${error}`);
Expand All @@ -120,9 +121,8 @@ export async function callActorGetDataset(
* 4. Properties are shortened using shortenProperties()
* 5. Enums are added to descriptions with examples using addEnumsToDescriptionsWithExamples()
*
* @param {string[]} actors - An array of actor IDs or Actor full names.
* @param {string} apifyToken - The Apify token to use for authentication.
* @returns {Promise<Tool[]>} - A promise that resolves to an array of MCP tools.
* @param {ActorInfo[]} actorsInfo - An array of ActorInfo objects with webServerMcpPath and actorDefinitionPruned.
* @returns {Promise<ToolEntry[]>} - A promise that resolves to an array of MCP tools.
*/
export async function getNormalActorsAsTools(
actorsInfo: ActorInfo[],
Expand Down
130 changes: 130 additions & 0 deletions src/tools/dataset.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Ajv } from 'ajv';
import toJsonSchema from 'to-json-schema';
import { z } from 'zod';
import zodToJsonSchema from 'zod-to-json-schema';

Expand Down Expand Up @@ -112,3 +113,132 @@ export const getDatasetItems: ToolEntry = {
},
} as InternalTool,
};

/**
* Function to recursively remove empty arrays from an object
*/
function removeEmptyArrays(obj: unknown): unknown {
if (Array.isArray(obj)) {
// If the item is an array, recursively call removeEmptyArrays on each element.
return obj.map((item) => removeEmptyArrays(item));
}

if (typeof obj !== 'object' || obj === null) {
// Return primitives and null values as is.
return obj;
}

// Use reduce to build a new object, excluding keys with empty arrays.
return Object.entries(obj).reduce((acc, [key, value]) => {
const processedValue = removeEmptyArrays(value);

// Exclude the key if the processed value is an empty array.
if (Array.isArray(processedValue) && processedValue.length === 0) {
return acc;
}

acc[key] = processedValue;
return acc;
}, {} as Record<string, unknown>);
}

const getDatasetSchemaArgs = z.object({
datasetId: z.string()
.min(1)
.describe('Dataset ID or username~dataset-name.'),
limit: z.number().optional()
.describe('Maximum number of items to use for schema generation. Default is 5.')
.default(5),
clean: z.boolean().optional()
.describe('If true, uses only non-empty items and skips hidden fields (starting with #). Default is true.')
.default(true),
arrayMode: z.enum(['first', 'all']).optional()
.describe('Strategy for handling arrays. "first" uses first item as template, "all" merges all items. Default is "all".')
.default('all'),
additionalProperties: z.boolean().optional()
.describe('If true, allows additional properties in objects. Default is true.')
.default(true),
});

/**
* Generates a JSON schema from dataset items
*/
export const getDatasetSchema: ToolEntry = {
type: 'internal',
tool: {
name: HelperTools.DATASET_SCHEMA_GET,
actorFullName: HelperTools.DATASET_SCHEMA_GET,
description: 'Generates a JSON schema from dataset items. '
+ 'The schema describes the structure of the data in the dataset, which can be used for validation, documentation, or data processing.'
+ 'Since the dataset can be large it is convenient to understand the structure of the dataset before getting dataset items.',
inputSchema: zodToJsonSchema(getDatasetSchemaArgs),
ajvValidate: ajv.compile(zodToJsonSchema(getDatasetSchemaArgs)),
call: async (toolArgs) => {
const { args, apifyToken } = toolArgs;
const parsed = getDatasetSchemaArgs.parse(args);
const client = new ApifyClient({ token: apifyToken });

try {
// Get dataset items
const datasetResponse = await client.dataset(parsed.datasetId).listItems({
clean: parsed.clean,
limit: parsed.limit,
});

if (!datasetResponse) {
return { content: [{ type: 'text', text: `Dataset '${parsed.datasetId}' not found.` }] };
}

const datasetItems = datasetResponse.items;

if (datasetItems.length === 0) {
return { content: [{ type: 'text', text: `Dataset '${parsed.datasetId}' is empty.` }] };
}

// Clean the dataset items by removing empty arrays
const cleanedDatasetItems = datasetItems.map((item) => removeEmptyArrays(item));

try {
// Generate schema with options to handle arrays better
const schema = toJsonSchema(cleanedDatasetItems, {
arrays: { mode: parsed.arrayMode },
objects: { additionalProperties: parsed.additionalProperties },
// strings: { detectFormat: false },
});

return {
content: [{
type: 'text',
text: JSON.stringify(schema),
}],
};
} catch (schemaError) {
// Fallback: try with a simpler approach
try {
const fallbackSchema = toJsonSchema(cleanedDatasetItems, {
arrays: { mode: 'first' },
});

return {
content: [{ type: 'text', text: JSON.stringify(fallbackSchema) }],
};
} catch (fallbackError) {
return {
content: [{
type: 'text',
text: `Error generating schema: ${(schemaError as Error).message}. Fallback also failed: ${(fallbackError as Error).message}`,
}],
};
}
}
} catch (error) {
return {
content: [{
type: 'text',
text: `Error generating schema: ${(error as Error).message}`,
}],
};
}
},
} as InternalTool,
};
3 changes: 2 additions & 1 deletion src/tools/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Import specific tools that are being used
import type { ToolCategory } from '../types.js';
import { callActor, callActorGetDataset, getActorsAsTools } from './actor.js';
import { getDataset, getDatasetItems } from './dataset.js';
import { getDataset, getDatasetItems, getDatasetSchema } from './dataset.js';
import { getUserDatasetsList } from './dataset_collection.js';
import { fetchApifyDocsTool } from './fetch-apify-docs.js';
import { getActorDetailsTool } from './get-actor-details.js';
Expand All @@ -26,6 +26,7 @@ export const toolCategories = {
storage: [
getDataset,
getDatasetItems,
getDatasetSchema,
getKeyValueStore,
getKeyValueStoreKeys,
getKeyValueStoreRecord,
Expand Down