|
| 1 | +--- |
| 2 | +title: Output schema |
| 3 | +description: Output schema is designed to help actor creators present the results in an output UI attractive to actor users. |
| 4 | +paths: |
| 5 | +# NOTE: IF ADDING A NEW PATH, LEAVE THE OLD ONES FOR REDIRECTS |
| 6 | + - actors/development/output-schema |
| 7 | +--- |
| 8 | + |
| 9 | +# Actor output schema |
| 10 | + |
| 11 | + It is recommended to show the most important fields in a curated Overview visualization configured using output schema specification, while all available fields are automatically available in the “All fields” view. |
| 12 | + |
| 13 | +In the future, output schema will also help with strict output data format validation, which will make integrations more solid and easier to set up. |
| 14 | + |
| 15 | +## Specification version 1 |
| 16 | + |
| 17 | +An actor's output schema defines the structure and both API and visual representation of data produced by an actor. Output configuration files have to be located in the `.actor` folder in the actor's root directory. |
| 18 | + |
| 19 | +## How to organize files in the .actor folder: two options |
| 20 | + |
| 21 | +**A)** all config options are being set in a .actor/actor.json file, e.g.: |
| 22 | + |
| 23 | +```JSON |
| 24 | +//file: .actor/actor.json |
| 25 | +{ |
| 26 | + "actorSpecification": 1, |
| 27 | + "name": "this-is-book-library-scraper", |
| 28 | + "title": "Book Library scraper", |
| 29 | + "version": "1.0.0", |
| 30 | + "storages": { |
| 31 | + "dataset": { |
| 32 | + "actorSpecification": 1, |
| 33 | + "fields": {}, |
| 34 | + "views": { |
| 35 | + "overview": { |
| 36 | + "title": "Overview", |
| 37 | + "transformation": {}, |
| 38 | + "display": {} |
| 39 | + } |
| 40 | + } |
| 41 | + } |
| 42 | + } |
| 43 | +} |
| 44 | +``` |
| 45 | + |
| 46 | +**B)** .actor/actor.json links to other sub-config files in the same folder, e.g.: |
| 47 | + |
| 48 | +```JSON |
| 49 | +//file: .actor/actor.json |
| 50 | +{ |
| 51 | + "actorSpecification": 1, |
| 52 | + "name": "this-is-book-library-scraper", |
| 53 | + "title": "Book Library scraper", |
| 54 | + "version": "1.0.0", |
| 55 | + "storages": { |
| 56 | + "dataset": "./dataset_schema.json" |
| 57 | + } |
| 58 | +} |
| 59 | +``` |
| 60 | + |
| 61 | +```JSON |
| 62 | +//file: .actor/dataset_schema.json |
| 63 | +{ |
| 64 | + "actorSpecification": 1, |
| 65 | + "fields": {}, |
| 66 | + "views": { |
| 67 | + "overview": { |
| 68 | + "title": "Overview", |
| 69 | + "transformation": {}, |
| 70 | + "display": {} |
| 71 | + } |
| 72 | + } |
| 73 | +} |
| 74 | +``` |
| 75 | + |
| 76 | +Both options are valid. The user can choose based on their own needs. |
| 77 | + |
| 78 | +## Basic Template |
| 79 | + |
| 80 | +Imagine there is an actor that calls `Actor.pushData()` to store data into dataset e.g. |
| 81 | + |
| 82 | +```JSON |
| 83 | +//file: main.js |
| 84 | +import { Actor } from 'apify'; |
| 85 | +// Initialize the Apify SDK |
| 86 | +await Actor.init(); |
| 87 | + |
| 88 | +/** |
| 89 | + * Actor code |
| 90 | + */ |
| 91 | +await Actor.pushData({ |
| 92 | + "___EXAMPLE_NUMERIC_FIELD___": 10, |
| 93 | + "___EXAMPLE_PICTURE_URL_FIELD___": "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_92x30dp.png", |
| 94 | + "___EXAMPLE_LINK_URL_FIELD___": "https://google.com", |
| 95 | + "___EXAMPLE_TEXT_FIELD___": "Google", |
| 96 | + "___EXAMPLE_BOOLEAN_FIELD___": true, |
| 97 | + "___EXAMPLE_DATE_FIELD___": new Date(), |
| 98 | + "___EXAMPLE_ARRAY_FIELD___": ['#hello', "#world"], |
| 99 | + "___EXAMPLE_OBJECT_FIELD___": {}, |
| 100 | +}) |
| 101 | + |
| 102 | +// Exit successfully |
| 103 | +await Actor.exit(); |
| 104 | +``` |
| 105 | + |
| 106 | +Let’s say we are going to use a single file to set up an actor’s output tab UI. The following template can be used as a `.actor/actor.json` configuration. |
| 107 | + |
| 108 | +```JSON |
| 109 | +//file: .actor/actor.json |
| 110 | +{ |
| 111 | + "actorSpecification": 1, |
| 112 | + "name": "___ENTER_ACTOR_NAME____", |
| 113 | + "title": "___ENTER_ACTOR_TITLE____", |
| 114 | + "version": "1.0.0", |
| 115 | + "storages": { |
| 116 | + "dataset": { |
| 117 | + "actorSpecification": 1, |
| 118 | + "views": { |
| 119 | + "overview": { |
| 120 | + "title": "Overview", |
| 121 | + "transformation": { |
| 122 | + "fields": [ |
| 123 | + "___EXAMPLE_PICTURE_URL_FIELD___", |
| 124 | + "___EXAMPLE_LINK_URL_FIELD___", |
| 125 | + "___EXAMPLE_TEXT_FIELD___", |
| 126 | + "___EXAMPLE_BOOLEAN_FIELD___", |
| 127 | + "___EXAMPLE_ARRAY_FIELD___", |
| 128 | + "___EXAMPLE_OBJECT_FIELD___", |
| 129 | + "___EXAMPLE_DATE_FIELD___", |
| 130 | + "___EXAMPLE_NUMERIC_FIELD___" |
| 131 | + ] |
| 132 | + }, |
| 133 | + "display": { |
| 134 | + "component": "table", |
| 135 | + "properties": { |
| 136 | + "___EXAMPLE_PICTURE_URL_FIELD___": { |
| 137 | + "label": "Image", |
| 138 | + "format": "image" |
| 139 | + }, |
| 140 | + "___EXAMPLE_LINK_URL_FIELD___": { |
| 141 | + "label": "Link", |
| 142 | + "format": "link" |
| 143 | + }, |
| 144 | + "___EXAMPLE_TEXT_FIELD___": { |
| 145 | + "label": "Text", |
| 146 | + "format": "text" |
| 147 | + }, |
| 148 | + "___EXAMPLE_BOOLEAN_FIELD___": { |
| 149 | + "label": "Boolean", |
| 150 | + "format": "boolean" |
| 151 | + }, |
| 152 | + "___EXAMPLE_ARRAY_FIELD___": { |
| 153 | + "label": "Array", |
| 154 | + "format": "array" |
| 155 | + }, |
| 156 | + "___EXAMPLE_OBJECT_FIELD___": { |
| 157 | + "label": "Object", |
| 158 | + "format": "object" |
| 159 | + }, |
| 160 | + "___EXAMPLE_DATE_FIELD___": { |
| 161 | + "label": "Date", |
| 162 | + "format": "date" |
| 163 | + }, |
| 164 | + "___EXAMPLE_NUMERIC_FIELD___": { |
| 165 | + "label": "Number", |
| 166 | + "format": "number" |
| 167 | + } |
| 168 | + } |
| 169 | + } |
| 170 | + } |
| 171 | + } |
| 172 | + } |
| 173 | + } |
| 174 | +} |
| 175 | +``` |
| 176 | + |
| 177 | +The template above defines the configuration for the default dataset output view. Under the **views** property, there is one view with the title **Overview**. The view configuration consists of two basic steps: 1) set up how to fetch the data, aka **transformation,** and 2) set up how to **display** the data fetched in step 1). The default behaviour is that the Output tab UI table will display **all the fields** from `transformation.fields` **in that same order**. So, theoretically, there should be no need to set up `[**display.properties**](http://display.properties)` at all. However, it can be customized in case it is visually worth setting up some specific display format or column labels. The customization is carried out by using one of the `transformation.fields` names inside `display.properties` and overriding either the label or the format, as demonstrated in the basic template above. |
| 178 | + |
| 179 | +A 2-step configuration (transform & display) was implemented to provide a way to fetch data in the format presented in both API and UI consistently. Consistency between API data and UI data is crucial for actor end-users for them to experience the same results in both API and UI. Thus for the best end-user experience, we recommend overriding as few display properties as possible. |
| 180 | + |
| 181 | +Example of an actor output UI generated using basic template: |
| 182 | + |
| 183 | + |
| 184 | +## Example with inline comments |
| 185 | + |
| 186 | +```JSON |
| 187 | +//file: .actor/actor.json |
| 188 | +{ |
| 189 | + "actorSpecification": 1, //mandatory |
| 190 | + "name": "this-is-book-library-scraper", //mandatory, unique name of an actor |
| 191 | + "title": "Book Library scraper", //mandatory, the human readable name of an actor |
| 192 | + "version": "1.0.0", //mandatory |
| 193 | + "storages": { //mandatory |
| 194 | + "dataset": { //mandatory |
| 195 | + "actorSpecification": 1, //mandatory |
| 196 | + "fields": {}, //mandatory, but it can be an empty object for now |
| 197 | + "views": { //mandatory |
| 198 | + "overview": { //mandatory, but it does not have to be "overview", one can choose any name, multiple views are possible within views object |
| 199 | + "title": "Overview", //mandatory, one can choose any other title |
| 200 | + "transformation": { //mandatory |
| 201 | + "fields": [ //mandatory, fields property supports basic JSONPath selectors |
| 202 | + "isbn", //important, an order of fields in this array matches the order of columns in visualisation UI |
| 203 | + "picture", |
| 204 | + "title", |
| 205 | + "buyOnlineUrl", |
| 206 | + "author", |
| 207 | + "longBookDescription", |
| 208 | + "anObjectWithDeepStructure.pageCount", |
| 209 | + "anObjectWithDeepStructure.buyOnlineUrl", |
| 210 | + "anObjectWithDeepStructure.isRead", |
| 211 | + "anObjectWithDeepStructure.lastReadTime", |
| 212 | + "anArray", |
| 213 | + "anObject" |
| 214 | + ], |
| 215 | + "flatten": [ //optional, flattened objects are easily available for as display.properties keys |
| 216 | + "anObjectWithDeepStructure" |
| 217 | + ] |
| 218 | + }, |
| 219 | + "display": { //mandatory |
| 220 | + "component": "table", //mandatory |
| 221 | + "properties": { //mandatory |
| 222 | + "isbn": { //optional, use transformation.fields values there as keys |
| 223 | + "label": "ISBN", //optional, define "label" only in case you would like to overide the basic field name capitalisation in table UI |
| 224 | + // "format": "text" //optional, "text" format is default, use only in case you would like to overide the default format settings |
| 225 | + }, |
| 226 | + "picture": { |
| 227 | + "label": "Cover", |
| 228 | + "format": "image" //optional, in this case the format is overriden to show "image" instead of image link "text". "image" format only works with .jpeg, .png or other image format urls. |
| 229 | + }, |
| 230 | + // "title": { //does not have to be specified, default behaviour will show the field correctly |
| 231 | + // "label": "Title", |
| 232 | + // "format": "text" |
| 233 | + // }, |
| 234 | + "buyOnlineUrl": { |
| 235 | + "label": "URL", |
| 236 | + "format": "link" |
| 237 | + }, |
| 238 | + // "author": { |
| 239 | + // "label": "Author", |
| 240 | + // "format": "text" |
| 241 | + // }, |
| 242 | + "longBookDescription": { |
| 243 | + "label": "Description" |
| 244 | + }, |
| 245 | + "anObjectWithDeepStructure.pageCount": { //use "." for sub-keys of flattened objects |
| 246 | + "label": "# pages", |
| 247 | + "format": "number" |
| 248 | + }, |
| 249 | + "anObjectWithDeepStructure.isRead": { |
| 250 | + "label": "Have been read?", |
| 251 | + "format": "boolean" |
| 252 | + }, |
| 253 | + "anObjectWithDeepStructure.lastReadTime": { |
| 254 | + "label": "Last read time", |
| 255 | + "format": "date" |
| 256 | + }, |
| 257 | + "anObjectExample": { |
| 258 | + "label": "Some Object" |
| 259 | + }, |
| 260 | + "anArrayExample": { |
| 261 | + "label": "Some Array" |
| 262 | + } |
| 263 | + } |
| 264 | + } |
| 265 | + } |
| 266 | + } |
| 267 | + } |
| 268 | + } |
| 269 | +} |
| 270 | +``` |
| 271 | + |
| 272 | +### Nested structures |
| 273 | + |
| 274 | +The most frequently used data formats present the data in a tabular format (Output tab table, Excel, CSV). In case an actor produces nested JSON structures, there is a need to transform the nested data into a flat tabular format. There are three ways to flatten the data: |
| 275 | + |
| 276 | +**1)** use `transformation.flatten` to flatten the nested structure of specified fields. Flatten transforms the nested object into a flat structure. e.g. with `flatten:[”foo”]`, the object `{”foo”:{”bar”:”hello”}}` is turned into `{’foo.bar”:”hello”}`. Once the structure is flattened, it is necessary to use the flattened property name in both `transformation.fields` and [`display.properties`](http://display.properties), otherwise, fields might not be fetched or configured properly in the UI visualization. |
| 277 | +**2)** use `transformation.unwind` to deconstruct the nested children into parent objects. |
| 278 | +**3)** change the output structure in an actor from nested to flat before the results are saved in the dataset. |
| 279 | + |
| 280 | +## Dataset schema structure definitions |
| 281 | + |
| 282 | +### DatasetSchema object definition |
| 283 | + |
| 284 | +| Property | Type | Required | Description | |
| 285 | +| ------------------ | ---------------------------- | -------- | -------------------------------------------------------------------------------------------------- | |
| 286 | +| actorSpecification | integer | true | Specifies the version of dataset schema <br/>structure document. <br/>Currently only version 1 is available. | |
| 287 | +| fields | JSONSchema compatible object | true | Schema of one dataset object. <br/>Use JsonSchema Draft 2020-12 or <br/>other compatible formats. | |
| 288 | +| views | DatasetView object | true | An object with a description of an API <br/>and UI views. | |
| 289 | + |
| 290 | +### DatasetView object definition |
| 291 | + |
| 292 | +| Property | Type | Required | Description | |
| 293 | +| -------------- | ------------------------- | -------- | ----------------------------------------------------------------------------------------------------- | |
| 294 | +| title | string | true | The title is visible in UI in the Output tab <br/>as well as in the API. | |
| 295 | +| description | string | false | The description is only available in the API response. <br/>The usage of this field is optional. | |
| 296 | +| transformation | ViewTransformation object | true | The definition of data transformation <br/>is applied when dataset data are loaded from <br/>Dataset API. | |
| 297 | +| display | ViewDisplay object | true | The definition of Output tab UI visualization. | |
| 298 | + |
| 299 | +### ViewTransformation object definition |
| 300 | + |
| 301 | +| Property | Type | Required | Description | |
| 302 | +| -------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| 303 | +| fields | string[] | true | Selects fields that are going to be presented in the output. <br/>The order of fields matches the order of columns <br/>in visualization UI. In case the fields value <br/>is missing, it will be presented as “undefined” in the UI. | |
| 304 | +| unwind | string | false | Deconstructs nested children into parent object, <br/>e.g.: with unwind:[”foo”], the object `{”foo”:{”bar”:”hello”}}` <br/> is turned into `{’bar”:”hello”}`. | |
| 305 | +| flatten | string[] | false | Transforms nested object into flat structure. <br/>eg: with flatten:[”foo”] the object `{”foo”:{”bar”:”hello”}}` <br/> is turned into `{’foo.bar”:”hello”}`. | |
| 306 | +| omit | string | false | Removes the specified fields from the output. <br/>Nested fields names can be used there as well. | |
| 307 | +| limit | integer | false | The maximum number of results returned. <br/>Default is all results. | |
| 308 | +| desc | boolean | false | By default, results are sorted in ascending based <br/>on the write event into the dataset. desc:true param <br/>will return the newest writes to the dataset first. | |
| 309 | + |
| 310 | +### ViewDisplay object definition |
| 311 | + |
| 312 | +| Property | Type | Required | Description | |
| 313 | +| ---------- | ------------------------------------------------------------------------------------------------------------------ | -------- | ---------------------------------------------------------------------------------------------------------------------------- | |
| 314 | +| component | string | true | Only component “table” is available. | |
| 315 | +| properties | Object | false | Object with keys matching the `transformation.fields` <br/> and ViewDisplayProperty as values. In case properties are not set <br/>the table will be rendered automatically with fields formatted as Strings, <br/>Arrays or Objects. | |
| 316 | + |
| 317 | +### ViewDisplayProperty object definition |
| 318 | + |
| 319 | +| Property | Type | Required | Description | |
| 320 | +| -------- | ------------------------------------------------------- | -------- | ---------------------------------------------------------------------------------------------- | |
| 321 | +| label | string | false | In case the data are visualized as in Table view. <br/>The label will be visible table column’s header. | |
| 322 | +| format | enum(text, number, date, link, <br/>boolean, image, array, object) | false | Describes how output data values are formatted <br/>in order to be rendered in the output tab UI. | |
0 commit comments