diff --git a/package-lock.json b/package-lock.json index 534a053553..3c105d30ee 100644 --- a/package-lock.json +++ b/package-lock.json @@ -63,7 +63,7 @@ }, "apify-docs-theme": { "name": "@apify/docs-theme", - "version": "1.0.210", + "version": "1.0.213", "license": "ISC", "dependencies": { "@apify/docs-search-modal": "^1.2.2", diff --git a/sources/platform/actors/development/actor_definition/dataset_schema/index.md b/sources/platform/actors/development/actor_definition/dataset_schema/index.md index 70516ab551..d7aa956a66 100644 --- a/sources/platform/actors/development/actor_definition/dataset_schema/index.md +++ b/sources/platform/actors/development/actor_definition/dataset_schema/index.md @@ -1,6 +1,6 @@ --- title: Dataset schema specification -sidebar_position: 3 +sidebar_position: 4 description: Learn how to define and present your dataset schema in an user-friendly output UI. slug: /actors/development/actor-definition/dataset-schema sidebar_label: Dataset schema diff --git a/sources/platform/actors/development/actor_definition/docker.md b/sources/platform/actors/development/actor_definition/docker.md index c3ccc6a776..734540c7a1 100644 --- a/sources/platform/actors/development/actor_definition/docker.md +++ b/sources/platform/actors/development/actor_definition/docker.md @@ -2,7 +2,7 @@ title: Dockerfile description: Learn about the available Docker images you can use as a base for your Apify Actors. Choose the right base image based on your Actor's requirements and the programming language you're using. slug: /actors/development/actor-definition/dockerfile -sidebar_position: 4 +sidebar_position: 7 --- **Learn about the available Docker images you can use as a base for your Apify Actors. Choose the right base image based on your Actor's requirements and the programming language you're using.** diff --git a/sources/platform/actors/development/actor_definition/input_schema/index.md b/sources/platform/actors/development/actor_definition/input_schema/index.md index 3ede079356..ad7b50024c 100644 --- a/sources/platform/actors/development/actor_definition/input_schema/index.md +++ b/sources/platform/actors/development/actor_definition/input_schema/index.md @@ -1,6 +1,6 @@ --- title: Actor input schema -sidebar_position: 2 +sidebar_position: 3 description: Learn how to define and validate a schema for your Actor's input with code examples. Provide an autogenerated input UI for your Actor's users. slug: /actors/development/actor-definition/input-schema --- diff --git a/sources/platform/actors/development/actor_definition/key_value_store_schema/index.md b/sources/platform/actors/development/actor_definition/key_value_store_schema/index.md index 2cb42da6f5..4b04f7f2f3 100644 --- a/sources/platform/actors/development/actor_definition/key_value_store_schema/index.md +++ b/sources/platform/actors/development/actor_definition/key_value_store_schema/index.md @@ -1,7 +1,7 @@ --- title: Key-value store schema specification sidebar_label: Key-value store schema -sidebar_position: 3 +sidebar_position: 4 description: Learn how to define and present your key-value store schema to organize records into collections. slug: /actors/development/actor-definition/key-value-store-schema --- diff --git a/sources/platform/actors/development/actor_definition/output_schema/images/output-schema-chat-example.png b/sources/platform/actors/development/actor_definition/output_schema/images/output-schema-chat-example.png new file mode 100644 index 0000000000..380187178c Binary files /dev/null and b/sources/platform/actors/development/actor_definition/output_schema/images/output-schema-chat-example.png differ diff --git a/sources/platform/actors/development/actor_definition/output_schema/images/output-schema-combination-example.png b/sources/platform/actors/development/actor_definition/output_schema/images/output-schema-combination-example.png new file mode 100644 index 0000000000..f75679c301 Binary files /dev/null and b/sources/platform/actors/development/actor_definition/output_schema/images/output-schema-combination-example.png differ diff --git a/sources/platform/actors/development/actor_definition/output_schema/images/output-schema-record-example.png b/sources/platform/actors/development/actor_definition/output_schema/images/output-schema-record-example.png new file mode 100644 index 0000000000..fe2f319483 Binary files /dev/null and b/sources/platform/actors/development/actor_definition/output_schema/images/output-schema-record-example.png differ diff --git a/sources/platform/actors/development/actor_definition/output_schema/images/output-schema-simple-example.png b/sources/platform/actors/development/actor_definition/output_schema/images/output-schema-simple-example.png new file mode 100644 index 0000000000..e5a1603030 Binary files /dev/null and b/sources/platform/actors/development/actor_definition/output_schema/images/output-schema-simple-example.png differ diff --git a/sources/platform/actors/development/actor_definition/output_schema/index.md b/sources/platform/actors/development/actor_definition/output_schema/index.md new file mode 100644 index 0000000000..04a1bc29d0 --- /dev/null +++ b/sources/platform/actors/development/actor_definition/output_schema/index.md @@ -0,0 +1,296 @@ +--- +title: Actor output schema +sidebar_label: Actor output schema +sidebar_position: 5 +description: Learn how to define and present output of your Actor. +slug: /actors/development/actor-definition/output-schema +--- + +**Learn how to define and present the output of your Actor.** + +--- + +The Actor output schema builds upon the schemas for the dataset and key-value store, and defines where the output of an Actor run is stored. It allows you to define templates for URLs where users can find the output generated by your Actor. The outputs defined in the schema are used in the UI to manage how the output is displayed and are returned in the Actor run's `GET` endpoint for automated systems using the API. + +## Example + +Consider a very simple example Actor that calls `Actor.setValue()` to save two files into the key-value store: + +```javascript title="main.js" +import { Actor } from 'apify'; +// Initialize the JavaScript SDK +await Actor.init(); + +/** + * Store data in key-value store + */ +await Actor.setValue('document-1.txt', 'my text data', { contentType: 'text/plain' }); + +await Actor.setValue(`image-1.jpeg`, imageBuffer, { contentType: 'image/jpeg' }); + +// Exit successfully +await Actor.exit(); +``` + +To let users know that the output is stored in the key-value store, you can update the `.actor/actor.json` configuration like this: + +```json title=".actor/actor.json" +{ + "actorSpecification": 1, + "name": "Actor Name", + "title": "Actor Title", + "version": "1.0.0", + "output": { + "actorOutputSchemaVersion": 1, + "title": "Output schema of the Actor", + "properties": { + "files": { + "type": "string", + "title": "Files", + "template": "{{links.apiDefaultKeyValueStoreUrl}}/keys" + } + } + } +} +``` + +The schema above defines one output called `files` which uses the `template` to define a template which links to the key-value store `GET keys` API endpoint of the default key-value store. + +This allows the UI to know that when displaying the output of a run, it needs to display data from the key-value store. + +The **Output** tab will then display the contents of the key-value store: + +![Output tab in Run](images/output-schema-simple-example.png) + +And if you call the `GET Run` API endpoint, it will contain an `output` property: + +```json +"output": { + "files": "https://api.apify.com/v2/key-value-stores//keys" +} +``` + +## Structure + +Output configuration files need to be located in the `.actor` folder within the Actor's root directory. + +You have two choices for how to organize files within the `.actor` folder. + +### Single configuration file + +```json title=".actor/actor.json" +{ + "actorSpecification": 1, + "name": "files-scraper", + "title": "Files scraper", + "version": "1.0.0", + "output": { + "actorOutputSchemaVersion": 1, + "title": "Output schema of the files scraper", + "properties": /* define your outputs here */ + } +} +``` + +### Separate configuration files + +```json title=".actor/actor.json" +{ + "actorSpecification": 1, + "name": "files-scraper", + "title": "Files scraper", + "version": "1.0.0", + "output": "./output_schema.json" +} +``` + +```json title=".actor/output_schema.json" +{ + "actorOutputSchemaVersion": 1, + "title": "Output schema of the files scraper", + "properties": /* define your outputs here */ + } +``` + +Choose the method that best suits your configuration. + +## Output schema structure definitions + +The key-value store schema defines the collections of keys and their properties. It allows you to organize and validate data stored by the Actor, making it easier to manage and retrieve specific records. + +### Key-value store schema object definition + +| Property | Type | Required | Description | +|-----------------------------------|-------------------------------|----------|-----------------------------------------------------------------------------------------------------------------| +| `actorOutputSchemaVersion` | integer | true | Specifies the version of output schema structure document.
Currently only version 1 is available. | +| `title` | string | true | Title of the schema | +| `description` | string | false | Description of the schema | +| `properties` | Object | true | An object where each key is an output ID and its value is an output object definition (see below). | + +### Output object definition + +| Property | Type | Required | Description | +|----------------|--------------|--------------|-------------------------------------------------------------------------------------------------------------------------------------------------| +| `title` | string | true | The output's title, shown in the run's output tab if there are multiple outputs and in API as key for the generated output URL. | +| `description` | string | false | A description of the output. Only used when reading the schema (useful for LLMs) | +| `template` | string | true | Defines a template which will be translated into output URL. The template can use variables (see below) | + +### Available template variables + +| Variable | Type | Description | +|----------------|--------------|--------------| +| `links` | object | Contains quick links to most commonly used URLs | +| `links.publicRunUrl` | string | Public run url in format `https://console.apify.com/view/runs/:runId` | +| `links.consoleRunUrl` | string | Console run url in format `https://console.apify.com/actors/runs/:runId` | +| `links.apiRunUrl` | string | API run url in format `https://api.apify.com/v2/actor-runs/:runId` | +| `links.apiDefaultDatasetUrl` | string | API url of default dataset in format `https://api.apify.com/v2/datasets/:defaultDatasetId` | +| `links.apiDefaultKeyValueStoreUrl` | string | API url of default key-value store in format `https://api.apify.com/v2/key-value-stores/:defaultKeyValueStoreId` | +| `links.containerRunUrl` | string | URL of a webserver running inside the run in format `https://.runs.apify.net/` | +| `run` | object | Contains information about the run same as it is returned from the `GET Run` API endpoint | +| `run.defaultDatasetId` | string | ID of the default dataset | +| `run.defaultKeyValueStoreId` | string | ID of the default key-value store | + +## Advanced examples + +The output schema and its `template` property allow a lot of flexibility to support a wide array of Actors. Here are some examples to help you get started. + +### Linking dataset views and key value store collections + +This example shows a schema definition for a basic social media scraper. The scraper downloads post data into the dataset, and video and subtitle files into the key-value store. + +If we correctly define `views` and `collection` in `dataset_schema.json` and `key_value_store_schema.json`, we can then use them in output schema like this: + +```json title=".actor/output_schema.json" +{ + "actorOutputSchemaVersion": 1, + "title": "Output schema of Social media scraper", + "properties": { + "overview": { + "type": "string", + "title": "Overview 🔎", + "template": "{{links.apiDefaultDatasetUrl}}/items?view=overview" + }, + "posts": { + "type": "string", + "title": "Posts ✉️", + "template": "{{links.apiDefaultDatasetUrl}}/items?view=posts" + }, + "author": { + "type": "string", + "title": "Authors 🧑‍🎤", + "template": "{{links.apiDefaultDatasetUrl}}/items?view=author" + }, + "music": { + "type": "string", + "title": "Music 🎶", + "template": "{{links.apiDefaultDatasetUrl}}/items?view=music" + }, + "video": { + "type": "string", + "title": "Video 🎞️", + "template": "{{links.apiDefaultDatasetUrl}}/items?view=video" + }, + "subtitleFiles": { + "type": "string", + "title": "Subtitle files", + "template": "{{links.apiDefaultKeyValueStoreUrl}}/keys?collection=subtitles" + }, + "videoFiles": { + "type": "string", + "title": "Video files", + "template": "{{links.apiDefaultKeyValueStoreUrl}}/keys?collection=videos" + } + } +} +``` + +The schema above defines five dataset outputs and two key-value store outputs. The dataset outputs link to specific views defined in `dataset_schema.json`, and the key-value store outputs use similar logic to target a specific collection defined in `key_value_store_schema.json`. + +When a user runs the Actor in the Console, the UI will look like this: + +![Video files in Output tab](images/output-schema-combination-example.png) + +### Using container URL to display chat client + +In this example, we have an Actor that internally runs a web server that allows users to connect to an LLM and chat with it. +The conversation history is then stored in the dataset. + +```json title=".actor/output_schema.json" +{ + "actorOutputSchemaVersion": 1, + + "title": "Chat client output", + "description": "Chat client provides interactive view to converse with LLM and chat history in dataset", + "type": "object", + + "properties": { + "clientUrl": { + "type": "string", + "title": "Chat client", + "template": "{{run.containerUrl}}" + }, + "chatHistory": { + "type": "string", + "title": "Conversation history", + "template": "{{links.apiDefaultDatasetUrl}}/items" + } + } +} +``` + +In the schema above we have two outputs. +The `clientUrl` output will return a link to the web server running inside the run. +The `chatHistory` links to the default dataset and contains the history of the whole conversation, with each message as a separate item. + +When the run in the Console, the user will then see this: + +![Chat in Output tab](images/output-schema-chat-example.png) + +### Custom html as run output + +This example shows an output schema of an Actor that runs Cypress tests. When it finishes, it generates an HTML report and stores it in the key-value store. We can then link to this file and show it as an output like this: + +```json title=".actor/output_schema.json" +{ + "actorOutputSchemaVersion": 1, + + "title": "Cypress test report output", + "description": "Test report from Cypress", + "type": "object", + + "properties": { + "reportUrl": { + "type": "string", + "title": "HTML Report", + "template": "{{links.apiDefaultKeyValueStoreUrl}}/records/report.html" + } + } +} +``` + +The `reportUrl` in this case links directly to the key-value store record stored in the default key-value store. + +When the output run is finished, the record will be displayed in an `