Skip to content

Commit 6b4de55

Browse files
committed
add apify specific fields and properties to the Actor input transform pipeline
1 parent c8131c8 commit 6b4de55

File tree

3 files changed

+532
-39
lines changed

3 files changed

+532
-39
lines changed

src/tools/apify-properties.ts

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
import type { ISchemaProperties } from '../types.js';
2+
3+
export function addResourcePickerProperties(property: ISchemaProperties): ISchemaProperties {
4+
return {
5+
...property,
6+
items: {
7+
...property.items,
8+
type: 'string',
9+
title: 'Resource ID',
10+
description: 'Resource ID, either Apify Dataset, Key-Value Store, or Request List identifier',
11+
},
12+
};
13+
}
14+
15+
/**
16+
* Adds key-value schema structure to array properties with editor === 'keyValue'.
17+
*/
18+
export function addKeyValueProperties(property: ISchemaProperties): ISchemaProperties {
19+
return {
20+
...property,
21+
items: {
22+
...property.items,
23+
type: 'object',
24+
title: 'Key-Value Pair',
25+
description: 'Key-value pair definition',
26+
properties: {
27+
key: {
28+
type: 'string',
29+
title: 'Key',
30+
description: 'Key string',
31+
},
32+
value: {
33+
type: 'string',
34+
title: 'Value',
35+
description: 'Value string',
36+
},
37+
},
38+
},
39+
};
40+
}
41+
42+
/**
43+
* Adds globs schema structure to array properties with editor === 'globs'.
44+
*/
45+
/**
46+
* Adds pseudoUrls schema structure to array properties with items.editor === 'pseudoUrls'.
47+
*/
48+
49+
const USER_DATA_DESCRIPTION = `User data object. A JSON object with custom user data that will be passed in the userData property of the Request object for each URL`;
50+
const HEADERS_DESCRIPTION = `Headers object. A JSON object whose properties and values contain HTTP headers that will sent with the request.`;
51+
52+
export function addGlobsProperties(property: ISchemaProperties): ISchemaProperties {
53+
return {
54+
...property,
55+
items: {
56+
...property.items,
57+
type: 'object',
58+
title: 'Glob',
59+
description: 'Glob pattern definition',
60+
properties: {
61+
glob: {
62+
type: 'string',
63+
title: 'Glob',
64+
description: 'Glob pattern string',
65+
},
66+
method: {
67+
type: 'string',
68+
title: 'HTTP Method',
69+
description: 'HTTP method for the request',
70+
},
71+
payload: {
72+
type: 'string',
73+
title: 'Payload',
74+
description: 'Payload for the request',
75+
},
76+
userData: {
77+
type: 'object',
78+
title: 'User Data',
79+
description: USER_DATA_DESCRIPTION,
80+
properties: {},
81+
},
82+
headers: {
83+
type: 'object',
84+
title: 'Headers',
85+
description: HEADERS_DESCRIPTION,
86+
properties: {},
87+
},
88+
},
89+
},
90+
};
91+
}
92+
93+
export function addPseudoUrlsProperties(property: ISchemaProperties): ISchemaProperties {
94+
return {
95+
...property,
96+
items: {
97+
...property.items,
98+
type: 'object',
99+
title: 'PseudoUrl',
100+
description: `PseudoUrl definition. Represents a pseudo-URL (PURL) - an URL pattern used by web crawlers to specify which URLs should the crawler visit.
101+
A PURL is simply a URL with special directives enclosed in [] brackets. Currently, the only supported directive is [RegExp], which defines a JavaScript-style regular expression to match against the URL.`,
102+
properties: {
103+
purl: {
104+
type: 'string',
105+
title: 'PseudoUrl',
106+
description: `PseudoUrl pattern string. Be careful to correctly escape special characters in the pseudo-URL string. If either [ or ] is part of the normal query string, it must be encoded as [\\x5B] or [\\x5D], respectively`,
107+
examples: [
108+
'http://www.example.com/pages/[(\\w|-)*]',
109+
],
110+
},
111+
method: {
112+
type: 'string',
113+
title: 'HTTP Method',
114+
description: 'HTTP method for the request',
115+
enum: [
116+
'GET',
117+
'POST',
118+
'PUT',
119+
'DELETE',
120+
'PATCH',
121+
'HEAD',
122+
'OPTIONS',
123+
'CONNECT',
124+
'TRACE',
125+
],
126+
},
127+
payload: {
128+
type: 'string',
129+
title: 'Payload',
130+
description: 'Payload for the request',
131+
},
132+
userData: {
133+
type: 'object',
134+
title: 'User Data',
135+
description: USER_DATA_DESCRIPTION,
136+
properties: {},
137+
},
138+
headers: {
139+
type: 'object',
140+
title: 'Headers',
141+
description: HEADERS_DESCRIPTION,
142+
properties: {},
143+
},
144+
},
145+
},
146+
};
147+
}
148+
149+
/**
150+
* Adds Apify proxy-specific properties to a proxy object property.
151+
*/
152+
export function addProxyProperties(property: ISchemaProperties): ISchemaProperties {
153+
return {
154+
...property,
155+
properties: {
156+
...property.properties,
157+
useApifyProxy: {
158+
title: 'Use Apify Proxy',
159+
type: 'boolean',
160+
description: 'Whether to use Apify Proxy. Set this to false when you want to use custom proxy URLs.',
161+
default: true,
162+
},
163+
apifyProxyGroups: {
164+
title: 'Apify Proxy Groups',
165+
type: 'array',
166+
description: `Select specific Apify Proxy groups to use (e.g., RESIDENTIAL, DATACENTER).
167+
**DATACENTER:**
168+
The fastest and cheapest option. It uses datacenters to change your IP address. Note that there is a chance of being blocked because of the activity of other users.
169+
170+
**RESIDENTIAL:**
171+
IP addresses located in homes and offices around the world. These IPs are the least likely to be blocked.`,
172+
items: {
173+
type: 'string',
174+
title: 'Proxy group name',
175+
description: 'Proxy group name',
176+
enum: [
177+
'RESIDENTIAL',
178+
'DATACENTER',
179+
],
180+
},
181+
},
182+
proxyUrls: {
183+
title: 'Proxy URLs',
184+
type: 'array',
185+
description: 'List of custom proxy URLs to be used instead of the Apify Proxy.',
186+
items: {
187+
type: 'string',
188+
title: 'Custom proxy URL',
189+
description: 'Custom proxy URL',
190+
},
191+
},
192+
},
193+
required: ['useApifyProxy'],
194+
};
195+
}
196+
197+
/**
198+
* Adds request list source structure to array properties with editor 'requestListSources'.
199+
*/
200+
export function addRequestListSourcesProperties(property: ISchemaProperties): ISchemaProperties {
201+
return {
202+
...property,
203+
items: {
204+
...property.items,
205+
type: 'object',
206+
title: 'Request list source',
207+
description: 'Request list source',
208+
properties: {
209+
url: {
210+
title: 'URL',
211+
type: 'string',
212+
description: 'URL of the request list source',
213+
},
214+
},
215+
},
216+
};
217+
}

src/tools/utils.ts

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@ import type Ajv from 'ajv';
33

44
import { ACTOR_ENUM_MAX_LENGTH, ACTOR_MAX_DESCRIPTION_LENGTH } from '../const.js';
55
import type { ActorInputSchemaProperties, IActorInputSchema, ISchemaProperties } from '../types.js';
6+
import {
7+
addGlobsProperties,
8+
addKeyValueProperties,
9+
addProxyProperties,
10+
addPseudoUrlsProperties,
11+
addRequestListSourcesProperties,
12+
addResourcePickerProperties as addArrayResourcePickerProperties,
13+
} from './apify-properties.js';
614

715
export function actorNameToToolName(actorName: string): string {
816
return actorName
@@ -48,42 +56,22 @@ export function fixedAjvCompile(ajvInstance: Ajv, schema: object): ValidateFunct
4856
* @param {Record<string, ISchemaProperties>} properties - The input schema properties
4957
* @returns {Record<string, ISchemaProperties>} Modified properties with nested properties
5058
*/
51-
export function buildNestedProperties(properties: Record<string, ISchemaProperties>): Record<string, ISchemaProperties> {
59+
export function buildApifySpecificProperties(properties: Record<string, ISchemaProperties>): Record<string, ISchemaProperties> {
5260
const clonedProperties = { ...properties };
5361

5462
for (const [propertyName, property] of Object.entries(clonedProperties)) {
5563
if (property.type === 'object' && property.editor === 'proxy') {
56-
clonedProperties[propertyName] = {
57-
...property,
58-
properties: {
59-
...property.properties,
60-
useApifyProxy: {
61-
title: 'Use Apify Proxy',
62-
type: 'boolean',
63-
description: 'Whether to use Apify Proxy - ALWAYS SET TO TRUE.',
64-
default: true,
65-
examples: [true],
66-
},
67-
},
68-
required: ['useApifyProxy'],
69-
};
64+
clonedProperties[propertyName] = addProxyProperties(property);
7065
} else if (property.type === 'array' && property.editor === 'requestListSources') {
71-
clonedProperties[propertyName] = {
72-
...property,
73-
items: {
74-
...property.items,
75-
type: 'object',
76-
title: 'Request list source',
77-
description: 'Request list source',
78-
properties: {
79-
url: {
80-
title: 'URL',
81-
type: 'string',
82-
description: 'URL of the request list source',
83-
},
84-
},
85-
},
86-
};
66+
clonedProperties[propertyName] = addRequestListSourcesProperties(property);
67+
} else if (property.type === 'array' && property.editor === 'pseudoUrls') {
68+
clonedProperties[propertyName] = addPseudoUrlsProperties(property);
69+
} else if (property.type === 'array' && property.editor === 'globs') {
70+
clonedProperties[propertyName] = addGlobsProperties(property);
71+
} else if (property.type === 'array' && property.editor === 'keyValue') {
72+
clonedProperties[propertyName] = addKeyValueProperties(property);
73+
} else if (property.type === 'array' && property.editor === 'resourcePicker') {
74+
clonedProperties[propertyName] = addArrayResourcePickerProperties(property);
8775
}
8876
}
8977

@@ -191,6 +179,7 @@ export function inferArrayItemType(property: ISchemaProperties): string | null {
191179
stringList: 'string',
192180
json: 'object',
193181
globs: 'object',
182+
select: 'string',
194183
};
195184
return editorTypeMap[editor] || null;
196185
}
@@ -302,7 +291,7 @@ export function transformActorInputSchemaProperties(input: IActorInputSchema): A
302291
// Deep clone input to avoid mutating the original object
303292
const inputClone: IActorInputSchema = JSON.parse(JSON.stringify(input));
304293
let transformedProperties = markInputPropertiesAsRequired(inputClone);
305-
transformedProperties = buildNestedProperties(transformedProperties);
294+
transformedProperties = buildApifySpecificProperties(transformedProperties);
306295
transformedProperties = filterSchemaProperties(transformedProperties);
307296
transformedProperties = inferArrayItemsTypeIfMissing(transformedProperties);
308297
transformedProperties = shortenProperties(transformedProperties);

0 commit comments

Comments
 (0)