Skip to content

Commit f8342c2

Browse files
committed
feat(api): dataset fields statistics
1 parent e768c63 commit f8342c2

File tree

7 files changed

+103
-1
lines changed

7 files changed

+103
-1
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
title: DatasetFieldStatistics
2+
type: object
3+
properties:
4+
min:
5+
type: number
6+
description: 'Minimum value of the field. For numbers, this is calculated directly. For strings, this is the length of the shortest string. For arrays, this is the length of the shortest array. For objects, this is the number of keys in the smallest object.'
7+
nullable: true
8+
max:
9+
type: number
10+
description: 'Maximum value of the field. For numbers, this is calculated directly. For strings, this is the length of the longest string. For arrays, this is the length of the longest array. For objects, this is the number of keys in the largest object.'
11+
nullable: true
12+
nullCount:
13+
type: number
14+
description: 'How many items in the dataset have a null value for this field.'
15+
nullable: true
16+
emptyCount:
17+
type: number
18+
description: 'How many items in the dataset are `undefined`, meaning that for example empty string is not considered empty.'
19+
nullable: true
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
title: GetDatasetFieldStatisticsResponse
2+
required:
3+
- data
4+
type: object
5+
properties:
6+
data:
7+
type: object
8+
required:
9+
- fields
10+
- statistics
11+
properties:
12+
fields:
13+
type: array
14+
items:
15+
type: string
16+
description: 'Keys of the fields for which the statistics are provided.'
17+
statistics:
18+
type: object
19+
additionalProperties:
20+
$ref: ./DatasetFieldStatistics.yaml
21+
description: 'Statistics for each field. The keys are the same as in the `fields` array.'

apify-api/openapi/components/tags.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,10 @@
758758
x-legacy-doc-urls:
759759
- '#/reference/datasets/item-collection'
760760
x-trait: 'true'
761+
- name: Datasets/Field statistics
762+
x-displayName: Field statistics
763+
x-parent-tag-name: Datasets
764+
x-trait: 'true'
761765
- name: Request queues
762766
x-displayName: Request queues
763767
x-legacy-doc-urls:

apify-api/openapi/components/x-tag-groups.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
- Datasets/Dataset collection
6363
- Datasets/Dataset
6464
- Datasets/Item collection
65+
- Datasets/Field statistics
6566
- name: Request queues
6667
tags:
6768
- Request queues

apify-api/openapi/openapi.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,8 @@ paths:
566566
$ref: 'paths/datasets/datasets@{datasetId}.yaml'
567567
'/v2/datasets/{datasetId}/items':
568568
$ref: 'paths/datasets/datasets@{datasetId}@items.yaml'
569+
'/v2/datasets/{datasetId}/field-statistics':
570+
$ref: 'paths/datasets/datasets@{datasetId}@field-statistics.yaml'
569571
/v2/request-queues:
570572
$ref: paths/request-queues/request-queues.yaml
571573
'/v2/request-queues/{queueId}':
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
get:
2+
tags:
3+
- Datasets/Field statistics
4+
summary: Get field statistics
5+
description: |
6+
Returns field statistics for given dataset.
7+
When you configure the dataset [fields schema](https://docs.apify.com/platform/actors/development/actor-definition/dataset-schema/validation), we generate a field list and measure the statistics such as `min`, `max`, `nullCount` and `emptyCount`.
8+
9+
See [documentation](https://docs.apify.com/platform/actors/development/actor-definition/dataset-schema/validation#dataset-field-statistics) for more information.
10+
11+
operationId: dataset_field_statistics_get
12+
parameters:
13+
- name: datasetId
14+
in: path
15+
description: Dataset ID or `username~dataset-name`.
16+
required: true
17+
style: simple
18+
schema:
19+
type: string
20+
example: WkzbQMuFYuamGv3YF
21+
- name: token
22+
in: query
23+
description: |
24+
API authentication token. It is required only when using the `username~dataset-name` format for `datasetId`.
25+
style: form
26+
explode: true
27+
schema:
28+
type: string
29+
example: soSkq9ekdmfOslopH
30+
responses:
31+
'200':
32+
description: ''
33+
content:
34+
application/json:
35+
schema:
36+
$ref: "../../components/schemas/datasets/GetDatasetFieldStatisticsResponse.yaml"
37+
example:
38+
data:
39+
fields: ["name", "price"]
40+
statistics: {
41+
name: {
42+
nullCount: 122
43+
},
44+
price: {
45+
min: 59,
46+
max: 89
47+
}
48+
}
49+
# TODO: add clients methods
50+
# x-js-parent: DatasetClient
51+
# x-js-name: fieldStatistics
52+
# x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#fieldStatistics
53+
# x-py-parent: DatasetClientAsync
54+
# x-py-name: field_statistics
55+
# x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#field_statistics

package-lock.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)