Skip to content

Commit 25fd5ad

Browse files
authored
fix: tool schema array type infer and nested props (#45)
* Fix array type inferencing * build nested input schemas, fix WCC * Export inferArrayItemType helper function * improve filterSchemaProperties and add more comment related to Actor definition processing * fix order * lint, fix lint config
1 parent 1083436 commit 25fd5ad

File tree

4 files changed

+177
-41
lines changed

4 files changed

+177
-41
lines changed

eslint.config.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import apify from '@apify/eslint-config';
22

33
// eslint-disable-next-line import/no-default-export
44
export default [
5-
{ ignores: ['**/dist'] }, // Ignores need to happen first
5+
{ ignores: ['**/dist', '**/.venv'] }, // Ignores need to happen first
66
...apify,
77
{
88
languageOptions: {

src/actors.ts

Lines changed: 140 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { ApifyClient } from 'apify-client';
33

44
import { ACTOR_ADDITIONAL_INSTRUCTIONS, defaults, MAX_DESCRIPTION_LENGTH, ACTOR_README_MAX_LENGTH } from './const.js';
55
import { log } from './logger.js';
6-
import type { ActorDefinitionPruned, ActorDefinitionWithDesc, SchemaProperties, Tool } from './types.js';
6+
import type { ActorDefinitionPruned, ActorDefinitionWithDesc, IActorInputSchema, ISchemaProperties, Tool } from './types.js';
77

88
export function actorNameToToolName(actorName: string): string {
99
return actorName
@@ -67,7 +67,11 @@ function pruneActorDefinition(response: ActorDefinitionWithDesc): ActorDefinitio
6767
actorFullName: response.actorFullName || '',
6868
buildTag: response?.buildTag || '',
6969
readme: response?.readme || '',
70-
input: response?.input || null,
70+
input: response?.input && 'type' in response.input && 'properties' in response.input
71+
? { ...response.input,
72+
type: response.input.type as string,
73+
properties: response.input.properties as Record<string, ISchemaProperties> }
74+
: undefined,
7175
description: response.description,
7276
defaultRunOptions: response.defaultRunOptions,
7377
};
@@ -77,7 +81,7 @@ function pruneActorDefinition(response: ActorDefinitionWithDesc): ActorDefinitio
7781
* Shortens the description and enum values of schema properties.
7882
* @param properties
7983
*/
80-
export function shortenProperties(properties: { [key: string]: SchemaProperties}): { [key: string]: SchemaProperties } {
84+
export function shortenProperties(properties: { [key: string]: ISchemaProperties}): { [key: string]: ISchemaProperties } {
8185
for (const property of Object.values(properties)) {
8286
if (property.description.length > MAX_DESCRIPTION_LENGTH) {
8387
property.description = `${property.description.slice(0, MAX_DESCRIPTION_LENGTH)}...`;
@@ -105,59 +109,170 @@ export function truncateActorReadme(readme: string, limit = ACTOR_README_MAX_LEN
105109
/**
106110
* Helps determine the type of items in an array schema property.
107111
* Priority order: explicit type in items > prefill type > default value type > editor type.
112+
*
113+
* Based on JSON schema, the array needs a type, and most of the time Actor input schema does not have this, so we need to infer that.
114+
*
108115
*/
109-
export function inferArrayItemType(property: SchemaProperties): string | null {
116+
export function inferArrayItemType(property: ISchemaProperties): string | null {
110117
return property.items?.type
111-
|| (property.prefill && typeof property.prefill[0])
112-
|| (property.default && typeof property.default[0])
118+
|| (Array.isArray(property.prefill) && property.prefill.length > 0 && typeof property.prefill[0])
119+
|| (Array.isArray(property.default) && property.default.length > 0 && typeof property.default[0])
113120
|| (property.editor && getEditorItemType(property.editor))
114121
|| null;
115122

116123
function getEditorItemType(editor: string): string | null {
117124
const editorTypeMap: Record<string, string> = {
118125
requestListSources: 'object',
119126
stringList: 'string',
127+
json: 'object',
128+
globs: 'object',
120129
};
121130
return editorTypeMap[editor] || null;
122131
}
123132
}
124133

125134
/**
126135
* Add enum values as string to property descriptions.
136+
*
137+
* This is done as a preventive measure to prevent cases where library or agent framework
138+
* does not handle enums or examples based on JSON schema definition.
139+
*
140+
* https://json-schema.org/understanding-json-schema/reference/enum
141+
* https://json-schema.org/understanding-json-schema/reference/annotations
142+
*
127143
* @param properties
128144
*/
129-
export function addEnumsToDescriptionsWithExamples(properties: { [key: string]: SchemaProperties }): { [key: string]: SchemaProperties } {
145+
function addEnumsToDescriptionsWithExamples(properties: Record<string, ISchemaProperties>): Record<string, ISchemaProperties> {
130146
for (const property of Object.values(properties)) {
131147
if (property.enum && property.enum.length > 0) {
132-
property.description = `${property.description}\nPossible values: ${property.enum.join(',')}`;
148+
property.description = `${property.description}\nPossible values: ${property.enum.slice(0, 20).join(',')}`;
133149
}
134150
const value = property.prefill ?? property.default;
135151
if (value && !(Array.isArray(value) && value.length === 0)) {
136152
property.examples = Array.isArray(value) ? value : [value];
153+
property.description = `${property.description}\nExample values: ${JSON.stringify(value)}`;
137154
}
138155
}
139156
return properties;
140157
}
141158

142159
/**
143160
* Filters schema properties to include only the necessary fields.
161+
*
162+
* This is done to reduce the size of the input schema and to make it more readable.
163+
*
144164
* @param properties
145165
*/
146-
export function filterSchemaProperties(properties: { [key: string]: SchemaProperties }): { [key: string]: SchemaProperties } {
147-
const filteredProperties: { [key: string]: SchemaProperties } = {};
166+
export function filterSchemaProperties(properties: { [key: string]: ISchemaProperties }): { [key: string]: ISchemaProperties } {
167+
const filteredProperties: { [key: string]: ISchemaProperties } = {};
148168
for (const [key, property] of Object.entries(properties)) {
149-
const { title, description, enum: enumValues, type, default: defaultValue, prefill } = property;
150-
filteredProperties[key] = { title, description, enum: enumValues, type, default: defaultValue, prefill };
151-
if (type === 'array') {
169+
filteredProperties[key] = {
170+
title: property.title,
171+
description: property.description,
172+
enum: property.enum,
173+
type: property.type,
174+
default: property.default,
175+
prefill: property.prefill,
176+
properties: property.properties,
177+
items: property.items,
178+
required: property.required,
179+
};
180+
if (property.type === 'array' && !property.items?.type) {
152181
const itemsType = inferArrayItemType(property);
153182
if (itemsType) {
154-
filteredProperties[key].items = { type: itemsType };
183+
filteredProperties[key].items = {
184+
...filteredProperties[key].items,
185+
title: filteredProperties[key].title ?? 'Item',
186+
description: filteredProperties[key].description ?? 'Item',
187+
type: itemsType,
188+
};
155189
}
156190
}
157191
}
158192
return filteredProperties;
159193
}
160194

195+
/**
196+
* Marks input properties as required by adding a "REQUIRED" prefix to their descriptions.
197+
* Takes an IActorInput object and returns a modified Record of SchemaProperties.
198+
*
199+
* This is done for maximum compatibility in case where library or agent framework does not consider
200+
* required fields and does not handle the JSON schema properly: we are prepending this to the description
201+
* as a preventive measure.
202+
* @param {IActorInputSchema} input - Actor input object containing properties and required fields
203+
* @returns {Record<string, ISchemaProperties>} - Modified properties with required fields marked
204+
*/
205+
function markInputPropertiesAsRequired(input: IActorInputSchema): Record<string, ISchemaProperties> {
206+
const { required = [], properties } = input;
207+
208+
for (const property of Object.keys(properties)) {
209+
if (required.includes(property)) {
210+
properties[property] = {
211+
...properties[property],
212+
description: `**REQUIRED** ${properties[property].description}`,
213+
};
214+
}
215+
}
216+
217+
return properties;
218+
}
219+
220+
/**
221+
* Builds nested properties for object types in the schema.
222+
*
223+
* Specifically handles special cases like proxy configuration and request list sources
224+
* by adding predefined nested properties to these object types.
225+
* This is necessary for the agent to correctly infer how to structure object inputs
226+
* when passing arguments to the Actor.
227+
*
228+
* For proxy objects (type='object', editor='proxy'), adds 'useApifyProxy' property.
229+
* For request list sources (type='array', editor='requestListSources'), adds URL structure to items.
230+
*
231+
* @param {Record<string, ISchemaProperties>} properties - The input schema properties
232+
* @returns {Record<string, ISchemaProperties>} Modified properties with nested properties
233+
*/
234+
function buildNestedProperties(properties: Record<string, ISchemaProperties>): Record<string, ISchemaProperties> {
235+
const clonedProperties = { ...properties };
236+
237+
for (const [propertyName, property] of Object.entries(clonedProperties)) {
238+
if (property.type === 'object' && property.editor === 'proxy') {
239+
clonedProperties[propertyName] = {
240+
...property,
241+
properties: {
242+
...property.properties,
243+
useApifyProxy: {
244+
title: 'Use Apify Proxy',
245+
type: 'boolean',
246+
description: 'Whether to use Apify Proxy - ALWAYS SET TO TRUE.',
247+
default: true,
248+
examples: [true],
249+
},
250+
},
251+
required: ['useApifyProxy'],
252+
};
253+
} else if (property.type === 'array' && property.editor === 'requestListSources') {
254+
clonedProperties[propertyName] = {
255+
...property,
256+
items: {
257+
...property.items,
258+
type: 'object',
259+
title: 'Request list source',
260+
description: 'Request list source',
261+
properties: {
262+
url: {
263+
title: 'URL',
264+
type: 'string',
265+
description: 'URL of the request list source',
266+
},
267+
},
268+
},
269+
};
270+
}
271+
}
272+
273+
return clonedProperties;
274+
}
275+
161276
/**
162277
* Fetches actor input schemas by Actor IDs or Actor full names and creates MCP tools.
163278
*
@@ -166,6 +281,13 @@ export function filterSchemaProperties(properties: { [key: string]: SchemaProper
166281
*
167282
* Tool name can't contain /, so it is replaced with _
168283
*
284+
* The input schema processing workflow:
285+
* 1. Properties are marked as required using markInputPropertiesAsRequired()
286+
* 2. Nested properties are built by analyzing editor type (proxy, requestListSources) using buildNestedProperties()
287+
* 3. Properties are filtered using filterSchemaProperties()
288+
* 4. Properties are shortened using shortenProperties()
289+
* 5. Enums are added to descriptions with examples using addEnumsToDescriptionsWithExamples()
290+
*
169291
* @param {string[]} actors - An array of actor IDs or Actor full names.
170292
* @returns {Promise<Tool[]>} - A promise that resolves to an array of MCP tools.
171293
*/
@@ -176,8 +298,10 @@ export async function getActorsAsTools(actors: string[]): Promise<Tool[]> {
176298
for (const result of results) {
177299
if (result) {
178300
if (result.input && 'properties' in result.input && result.input) {
179-
const properties = filterSchemaProperties(result.input.properties as { [key: string]: SchemaProperties });
180-
const propertiesShortened = shortenProperties(properties);
301+
const propertiesMarkedAsRequired = markInputPropertiesAsRequired(result.input);
302+
const propertiesObjectsBuilt = buildNestedProperties(propertiesMarkedAsRequired);
303+
const propertiesFiltered = filterSchemaProperties(propertiesObjectsBuilt);
304+
const propertiesShortened = shortenProperties(propertiesFiltered);
181305
result.input.properties = addEnumsToDescriptionsWithExamples(propertiesShortened);
182306
}
183307
try {

src/server.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import {
3434
searchActorsByKeywords,
3535
GetActorDefinition,
3636
} from './tools.js';
37-
import type { SchemaProperties, Tool } from './types.js';
37+
import type { ISchemaProperties, Tool } from './types.js';
3838

3939
/**
4040
* Create Apify MCP server
@@ -199,7 +199,7 @@ export class ApifyMcpServer {
199199
const parsed = GetActorDefinition.parse(args);
200200
const v = await getActorDefinition(parsed.actorName, parsed.limit);
201201
if (v && v.input && 'properties' in v.input && v.input) {
202-
const properties = filterSchemaProperties(v.input.properties as { [key: string]: SchemaProperties });
202+
const properties = filterSchemaProperties(v.input.properties as { [key: string]: ISchemaProperties });
203203
v.input.properties = shortenProperties(properties);
204204
}
205205
return { content: [{ type: 'text', text: JSON.stringify(v) }] };

src/types.ts

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,48 @@ export type Input = {
99
debugActorInput?: unknown;
1010
};
1111

12-
export interface ActorDefinitionPruned {
13-
id: string;
14-
actorFullName: string;
15-
buildTag?: string;
16-
readme?: string | null;
17-
input?: object | null;
12+
export interface ISchemaProperties {
13+
type: string;
14+
15+
title: string;
1816
description: string;
19-
defaultRunOptions: ActorDefaultRunOptions;
17+
18+
enum?: string[]; // Array of string options for the enum
19+
enumTitles?: string[]; // Array of string titles for the enum
20+
default?: unknown;
21+
prefill?: unknown;
22+
23+
items?: ISchemaProperties;
24+
editor?: string;
25+
examples?: unknown[];
26+
27+
properties?: Record<string, ISchemaProperties>;
28+
required?: string[];
2029
}
2130

22-
export interface ActorDefinitionWithDesc extends ActorDefinition {
31+
export interface IActorInputSchema {
32+
title?: string;
33+
description?: string;
34+
35+
type: string;
36+
37+
properties: Record<string, ISchemaProperties>;
38+
39+
required?: string[];
40+
schemaVersion?: number;
41+
}
42+
43+
export type ActorDefinitionWithDesc = Omit<ActorDefinition, 'input'> & {
2344
id: string;
2445
actorFullName: string;
2546
description: string;
26-
defaultRunOptions: ActorDefaultRunOptions
47+
defaultRunOptions: ActorDefaultRunOptions;
48+
input?: IActorInputSchema;
2749
}
2850

51+
export type ActorDefinitionPruned = Pick<ActorDefinitionWithDesc,
52+
'id' | 'actorFullName' | 'buildTag' | 'readme' | 'input' | 'description' | 'defaultRunOptions'>
53+
2954
export interface Tool {
3055
name: string;
3156
actorFullName: string;
@@ -35,19 +60,6 @@ export interface Tool {
3560
memoryMbytes?: number;
3661
}
3762

38-
export interface SchemaProperties {
39-
title: string;
40-
description: string;
41-
enum: string[]; // Array of string options for the enum
42-
enumTitles?: string[]; // Array of string titles for the enum
43-
type: string; // Data type (e.g., "string")
44-
default: string;
45-
prefill: string;
46-
items?: { type: string; }
47-
editor?: string;
48-
examples?: unknown[];
49-
}
50-
5163
// ActorStoreList for actor-search tool
5264
export interface ActorStats {
5365
totalRuns: number;

0 commit comments

Comments
 (0)