Skip to content

Commit 2705aa5

Browse files
authored
Schemas for connection config, and test script (#512)
* Schemas for connection config, and test script * Review comments * Fix pattern * Fix integrations object
1 parent ae83c7c commit 2705aa5

File tree

9 files changed

+283
-6
lines changed

9 files changed

+283
-6
lines changed

.github/workflows/test_and_build.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,24 @@ jobs:
6363

6464
- name: Build docs
6565
run: cd docs && make html SPHINXBUILD="uv run sphinx-build"
66+
67+
test_schemas:
68+
runs-on: ubuntu-latest
69+
70+
steps:
71+
- uses: actions/checkout@v6
72+
- name: Install python
73+
uses: actions/setup-python@v6
74+
with:
75+
python-version: '3.12'
76+
77+
- name: Install extractor uploader
78+
env:
79+
PYPI_ARTIFACTORY_USERNAME: ${{ secrets.ARTIFACTORY_READONLY_TOKEN_USER_PUBLIC_REPOS }}
80+
PYPI_ARTIFACTORY_PASSWORD: ${{ secrets.ARTIFACTORY_READONLY_TOKEN_PUBLIC_REPOS }}
81+
run: |
82+
python -m pip install --upgrade pip
83+
pip install cognite-extractor-publisher --extra-index-url "https://${PYPI_ARTIFACTORY_USERNAME}:${PYPI_ARTIFACTORY_PASSWORD}@cognite.jfrog.io/cognite/api/pypi/snakepit/simple"
84+
85+
- name: Test schemas
86+
run: ./tests/test_schemas.sh

cognite/extractorutils/unstable/configuration/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def _parse_expression(cls, expression: str) -> tuple[int, str]:
150150
except ValueError:
151151
pass
152152

153-
match = re.match(r"(\d+)[ \t]*(s|m|h|d)", expression)
153+
match = re.match(r"^(\d+)[ \t]*(s|m|h|d)$", expression.strip())
154154
if not match:
155155
raise InvalidConfigError("Invalid interval pattern")
156156

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
{
2+
"$id": "connection_config.schema.json",
3+
"$schema": "https://json-schema.org/draft/2020-12/schema",
4+
"type": "object",
5+
"unevaluatedProperties": false,
6+
"description": "Configuration for a connection to a data source or destination",
7+
"properties": {
8+
"project": {
9+
"type": "string",
10+
"description": "CDF project used for the connection."
11+
},
12+
"base-url": {
13+
"type": "string",
14+
"description": "CDF base URL used for the connection, for example https://api.cognitedata.com or https://az-eastus-1.cognitedata.com"
15+
},
16+
"integration": {
17+
"type": "object",
18+
"description": "Name of the integration to use for remote config and status reporting.",
19+
"unevaluatedProperties": false,
20+
"properties": {
21+
"external-id": {
22+
"type": "string",
23+
"description": "External ID of the integration to use for remote config and status reporting."
24+
}
25+
},
26+
"required": [
27+
"external-id"
28+
]
29+
},
30+
"authentication": {
31+
"type": "object",
32+
"unevaluatedProperties": false,
33+
"description": "Authentication configuration for the CDF connection.",
34+
"discriminatorProp": "type",
35+
"oneOf": [
36+
{
37+
"type": "object",
38+
"description": "Configuration for connecting to CDF using client credentials.",
39+
"unevaluatedProperties": false,
40+
"title": "Client Credentials",
41+
"properties": {
42+
"type": {
43+
"type": "string",
44+
"const": "client-credentials",
45+
"description": "Type of authentication, must be 'client-credentials' for connecting to CDF using client credentials."
46+
},
47+
"client-id": {
48+
"type": "string",
49+
"description": "Client ID"
50+
},
51+
"scopes": {
52+
"type": "string",
53+
"description": "Space separated list of scopes to request when obtaining access tokens, for example https://api.cognitedata.com/.default."
54+
},
55+
"client-secret": {
56+
"type": "string",
57+
"description": "Client secret"
58+
},
59+
"token-url": {
60+
"type": "string",
61+
"description": "Token URL for obtaining access tokens, for example https://login.microsoftonline.com/{tenant-id}/oauth2/v2.0/token"
62+
},
63+
"resource": {
64+
"type": "string",
65+
"description": "Resource identifier passed along with token requests."
66+
},
67+
"audience": {
68+
"type": "string",
69+
"description": "Audience identifier passed along with token requests."
70+
}
71+
},
72+
"required": [
73+
"type",
74+
"client-id",
75+
"scopes",
76+
"client-secret",
77+
"token-url"
78+
]
79+
},
80+
{
81+
"type": "object",
82+
"description": "Configuration for connecting to CDF using a certificate.",
83+
"unevaluatedProperties": false,
84+
"title": "Client Certificate",
85+
"properties": {
86+
"type": {
87+
"type": "string",
88+
"const": "client-certificate",
89+
"description": "Type of authentication, must be 'client-certificate' for connecting to CDF using a client certificate."
90+
},
91+
"client-id": {
92+
"type": "string",
93+
"description": "Client ID"
94+
},
95+
"scopes": {
96+
"type": "string",
97+
"description": "Space separated list of scopes to request when obtaining access tokens, for example https://api.cognitedata.com/.default."
98+
},
99+
"path": {
100+
"type": "string",
101+
"description": "Path to the client certificate file."
102+
},
103+
"authority-url": {
104+
"type": "string",
105+
"description": "Authority URL for obtaining access tokens."
106+
},
107+
"password": {
108+
"type": "string",
109+
"description": "Password for the client certificate, if it is encrypted."
110+
}
111+
},
112+
"required": [
113+
"type",
114+
"client-id",
115+
"scopes",
116+
"path",
117+
"authority-url"
118+
]
119+
}
120+
]
121+
},
122+
"connection": {
123+
"type": "object",
124+
"description": "Additional configuration for the connection.",
125+
"unevaluatedProperties": false,
126+
"properties": {
127+
"retries": {
128+
"$ref": "retries_config.schema.json"
129+
},
130+
"ssl-certificates": {
131+
"type": "object",
132+
"description": "Configuration for SSL certificates to use for the connection.",
133+
"unevaluatedProperties": false,
134+
"properties": {
135+
"verify": {
136+
"type": "boolean",
137+
"description": "Whether to verify SSL certificates for the connection.",
138+
"default": true
139+
},
140+
"allow-list": {
141+
"type": "array",
142+
"description": "List of SSL certificate thumbprints to allow for the connection, even if they are not valid according to the system's certificate store.",
143+
"items": {
144+
"type": "string"
145+
}
146+
}
147+
}
148+
}
149+
}
150+
}
151+
}
152+
}
File renamed without changes.

schema/unstable/metrics_config.schema.json

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,14 @@
3030
},
3131
"clear-after": {
3232
"type": "string",
33-
"description": "Interval for clearing metrics from the Prometheus Push Gateway, on the form 12s, 15m, 1h, etc. If not specified, metrics will not be cleared from the Push Gateway."
33+
"description": "Interval for clearing metrics from the Prometheus Push Gateway, on the form 12s, 15m, 1h, etc. If not specified, metrics will not be cleared from the Push Gateway.",
34+
"pattern": "([0-9]+)[ \t]*(s|m|h|d)"
3435
},
3536
"push-interval": {
3637
"type": "string",
3738
"description": "Interval for pushing metrics to the Prometheus Push Gateway, on the form 12s, 15m, 1h, etc.",
38-
"default": "30s"
39+
"default": "30s",
40+
"pattern": "^([0-9]+)[ \t]*(s|m|h|d)$"
3941
}
4042
},
4143
"required": [
@@ -64,7 +66,8 @@
6466
"push-interval": {
6567
"type": "string",
6668
"description": "Enter the interval between each push to CDF, on the form 12s, 15m, 1h, etc.",
67-
"default": "30s"
69+
"default": "30s",
70+
"pattern": "^([0-9]+)[ \t]*(s|m|h|d)$"
6871
},
6972
"data-set": {
7073
"description": "The data set where the metrics will be created.",
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"$id": "retries_config.schema.json",
3+
"$schema": "https://json-schema.org/draft/2020-12/schema",
4+
"type": "object",
5+
"unevaluatedProperties": false,
6+
"description": "Configuration for retries",
7+
"properties": {
8+
"max-retries": {
9+
"type": "integer",
10+
"description": "Maximum number of retries before giving up. Setting to -1 will retry indefinitely.",
11+
"default": 10,
12+
"minimum": -1
13+
},
14+
"max-backoff": {
15+
"type": "string",
16+
"description": "Maximum delay between retries, on the form 12s, 15m, 1h, etc.",
17+
"default": "30s"
18+
},
19+
"timeout": {
20+
"type": "string",
21+
"description": "Timeout in seconds for each retry attempt, on the form 12s, 15m, 1h, etc.",
22+
"default": "30s",
23+
"pattern": "^([0-9]+)[ \t]*(s|m|h|d)$"
24+
}
25+
}
26+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
{
2+
"$id": "schedule_config.schema.json",
3+
"$schema": "https://json-schema.org/draft/2020-12/schema",
4+
"type": "object",
5+
"unevaluatedProperties": false,
6+
"description": "Configuration for scheduling task runs",
7+
"discriminatorProp": "type",
8+
"oneOf": [
9+
{
10+
"type": "object",
11+
"description": "Configuration for a schedule that runs at a fixed interval",
12+
"unevaluatedProperties": false,
13+
"title": "Interval",
14+
"properties": {
15+
"type": {
16+
"type": "string",
17+
"const": "interval",
18+
"description": "Type of schedule, must be 'interval' for a schedule that runs at a fixed interval"
19+
},
20+
"expression": {
21+
"type": "string",
22+
"description": "Interval between task runs, on the form 12s, 15m, 1h, etc.",
23+
"pattern": "^([0-9]+)[ \t]*(s|m|h|d)$"
24+
}
25+
},
26+
"required": [
27+
"type",
28+
"expression"
29+
]
30+
},
31+
{
32+
"type": "object",
33+
"description": "Configuration for a schedule that runs according to a cron expression",
34+
"unevaluatedProperties": false,
35+
"title": "Cron",
36+
"properties": {
37+
"type": {
38+
"type": "string",
39+
"const": "cron",
40+
"description": "Type of schedule, must be 'cron' for a schedule that runs according to a cron expression"
41+
},
42+
"expression": {
43+
"type": "string",
44+
"description": "Cron expression defining when to run the task, on the form '0 0 * * *' (every day at midnight), '*/15 * * * *' (every 15 minutes), etc."
45+
}
46+
},
47+
"required": [
48+
"type",
49+
"expression"
50+
]
51+
}
52+
]
53+
}

schema/unstable/state_store_config.schema.json

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@
2727
},
2828
"upload-interval": {
2929
"type": "string",
30-
"description": "Interval for uploading state store data to CDF Raw, on the form 12s, 15m, 1h, etc."
30+
"description": "Interval for uploading state store data to CDF Raw, on the form 12s, 15m, 1h, etc.",
31+
"default": "30s",
32+
"pattern": "^([0-9]+)[ \t]*(s|m|h|d)$"
3133
}
3234
},
3335
"required": [
@@ -53,7 +55,9 @@
5355
},
5456
"save-interval": {
5557
"type": "string",
56-
"description": "Interval for saving state store data to the local JSON file, on the form 12s, 15m, 1h, etc."
58+
"description": "Interval for saving state store data to the local JSON file, on the form 12s, 15m, 1h, etc.",
59+
"default": "30s",
60+
"pattern": "^([0-9]+)[ \t]*(s|m|h|d)$"
5761
}
5862
},
5963
"required": [

tests/test_schemas.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
3+
set -e -x
4+
5+
target_schemas=(
6+
"schema/base_config.schema.json"
7+
"schema/unstable/connection_config.schema.json"
8+
"schema/unstable/schedule_config.schema.json"
9+
"schema/unstable/extractor_config.schema.json"
10+
)
11+
12+
for schema in "${target_schemas[@]}"; do
13+
echo "Processing $schema"
14+
publish-extractor schema --schema "$schema" --output bundled.schema.json
15+
echo "Generating docs for $schema"
16+
publish-extractor docs --schema bundled.schema.json
17+
rm bundled.schema.json
18+
done

0 commit comments

Comments
 (0)