Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/test_and_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,24 @@ jobs:

- name: Build docs
run: cd docs && make html SPHINXBUILD="uv run sphinx-build"

test_schemas:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v6
- name: Install python
uses: actions/setup-python@v6
with:
python-version: '3.12'

- name: Install extractor uploader
env:
PYPI_ARTIFACTORY_USERNAME: ${{ secrets.ARTIFACTORY_READONLY_TOKEN_USER_PUBLIC_REPOS }}
PYPI_ARTIFACTORY_PASSWORD: ${{ secrets.ARTIFACTORY_READONLY_TOKEN_PUBLIC_REPOS }}
run: |
python -m pip install --upgrade pip
pip install cognite-extractor-publisher --extra-index-url "https://${PYPI_ARTIFACTORY_USERNAME}:${PYPI_ARTIFACTORY_PASSWORD}@cognite.jfrog.io/cognite/api/pypi/snakepit/simple"

- name: Test schemas
run: ./tests/test_schemas.sh
2 changes: 1 addition & 1 deletion cognite/extractorutils/unstable/configuration/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def _parse_expression(cls, expression: str) -> tuple[int, str]:
except ValueError:
pass

match = re.match(r"(\d+)[ \t]*(s|m|h|d)", expression)
match = re.match(r"^(\d+)[ \t]*(s|m|h|d)$", expression.strip())
if not match:
raise InvalidConfigError("Invalid interval pattern")

Expand Down
152 changes: 152 additions & 0 deletions schema/unstable/connection_config.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
{
"$id": "connection_config.schema.json",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"unevaluatedProperties": false,
"description": "Configuration for a connection to a data source or destination",
"properties": {
"project": {
"type": "string",
"description": "CDF project used for the connection."
},
"base-url": {
"type": "string",
"description": "CDF base URL used for the connection, for example https://api.cognitedata.com or https://az-eastus-1.cognitedata.com"
},
"integration": {
"type": "object",
"description": "Name of the integration to use for remote config and status reporting.",
"unevaluatedProperties": false,
"properties": {
"external-id": {
"type": "string",
"description": "External ID of the integration to use for remote config and status reporting."
}
},
"required": [
"external-id"
]
},
"authentication": {
"type": "object",
"unevaluatedProperties": false,
"description": "Authentication configuration for the CDF connection.",
"discriminatorProp": "type",
"oneOf": [
{
"type": "object",
"description": "Configuration for connecting to CDF using client credentials.",
"unevaluatedProperties": false,
"title": "Client Credentials",
"properties": {
"type": {
"type": "string",
"const": "client-credentials",
"description": "Type of authentication, must be 'client-credentials' for connecting to CDF using client credentials."
},
"client-id": {
"type": "string",
"description": "Client ID"
},
"scopes": {
"type": "string",
"description": "Space separated list of scopes to request when obtaining access tokens, for example https://api.cognitedata.com/.default."
},
"client-secret": {
"type": "string",
"description": "Client secret"
},
"token-url": {
"type": "string",
"description": "Token URL for obtaining access tokens, for example https://login.microsoftonline.com/{tenant-id}/oauth2/v2.0/token"
},
"resource": {
"type": "string",
"description": "Resource identifier passed along with token requests."
},
"audience": {
"type": "string",
"description": "Audience identifier passed along with token requests."
}
},
"required": [
"type",
"client-id",
"scopes",
"client-secret",
"token-url"
]
},
{
"type": "object",
"description": "Configuration for connecting to CDF using a certificate.",
"unevaluatedProperties": false,
"title": "Client Certificate",
"properties": {
"type": {
"type": "string",
"const": "client-certificate",
"description": "Type of authentication, must be 'client-certificate' for connecting to CDF using a client certificate."
},
"client-id": {
"type": "string",
"description": "Client ID"
},
"scopes": {
"type": "string",
"description": "Space separated list of scopes to request when obtaining access tokens, for example https://api.cognitedata.com/.default."
},
"path": {
"type": "string",
"description": "Path to the client certificate file."
},
"authority-url": {
"type": "string",
"description": "Authority URL for obtaining access tokens."
},
"password": {
"type": "string",
"description": "Password for the client certificate, if it is encrypted."
}
},
"required": [
"type",
"client-id",
"scopes",
"path",
"authority-url"
]
}
]
},
"connection": {
"type": "object",
"description": "Additional configuration for the connection.",
"unevaluatedProperties": false,
"properties": {
"retries": {
"$ref": "retries_config.schema.json"
},
"ssl-certificates": {
"type": "object",
"description": "Configuration for SSL certificates to use for the connection.",
"unevaluatedProperties": false,
"properties": {
"verify": {
"type": "boolean",
"description": "Whether to verify SSL certificates for the connection.",
"default": true
},
"allow-list": {
"type": "array",
"description": "List of SSL certificate thumbprints to allow for the connection, even if they are not valid according to the system's certificate store.",
"items": {
"type": "string"
}
}
}
}
}
}
}
}
9 changes: 6 additions & 3 deletions schema/unstable/metrics_config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@
},
"clear-after": {
"type": "string",
"description": "Interval for clearing metrics from the Prometheus Push Gateway, on the form 12s, 15m, 1h, etc. If not specified, metrics will not be cleared from the Push Gateway."
"description": "Interval for clearing metrics from the Prometheus Push Gateway, on the form 12s, 15m, 1h, etc. If not specified, metrics will not be cleared from the Push Gateway.",
"pattern": "([0-9]+)[ \t]*(s|m|h|d)"
},
"push-interval": {
"type": "string",
"description": "Interval for pushing metrics to the Prometheus Push Gateway, on the form 12s, 15m, 1h, etc.",
"default": "30s"
"default": "30s",
"pattern": "^([0-9]+)[ \t]*(s|m|h|d)$"
}
},
"required": [
Expand Down Expand Up @@ -64,7 +66,8 @@
"push-interval": {
"type": "string",
"description": "Enter the interval between each push to CDF, on the form 12s, 15m, 1h, etc.",
"default": "30s"
"default": "30s",
"pattern": "^([0-9]+)[ \t]*(s|m|h|d)$"
},
"data-set": {
"description": "The data set where the metrics will be created.",
Expand Down
26 changes: 26 additions & 0 deletions schema/unstable/retries_config.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"$id": "retries_config.schema.json",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"unevaluatedProperties": false,
"description": "Configuration for retries",
"properties": {
"max-retries": {
"type": "integer",
"description": "Maximum number of retries before giving up. Setting to -1 will retry indefinitely.",
"default": 10,
"minimum": -1
},
"max-backoff": {
"type": "string",
"description": "Maximum delay between retries, on the form 12s, 15m, 1h, etc.",
"default": "30s"
},
"timeout": {
"type": "string",
"description": "Timeout in seconds for each retry attempt, on the form 12s, 15m, 1h, etc.",
"default": "30s",
"pattern": "^([0-9]+)[ \t]*(s|m|h|d)$"
}
}
}
53 changes: 53 additions & 0 deletions schema/unstable/schedule_config.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"$id": "schedule_config.schema.json",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"unevaluatedProperties": false,
"description": "Configuration for scheduling task runs",
"discriminatorProp": "type",
"oneOf": [
{
"type": "object",
"description": "Configuration for a schedule that runs at a fixed interval",
"unevaluatedProperties": false,
"title": "Interval",
"properties": {
"type": {
"type": "string",
"const": "interval",
"description": "Type of schedule, must be 'interval' for a schedule that runs at a fixed interval"
},
"expression": {
"type": "string",
"description": "Interval between task runs, on the form 12s, 15m, 1h, etc.",
"pattern": "^([0-9]+)[ \t]*(s|m|h|d)$"
}
},
"required": [
"type",
"expression"
]
},
{
"type": "object",
"description": "Configuration for a schedule that runs according to a cron expression",
"unevaluatedProperties": false,
"title": "Cron",
"properties": {
"type": {
"type": "string",
"const": "cron",
"description": "Type of schedule, must be 'cron' for a schedule that runs according to a cron expression"
},
"expression": {
"type": "string",
"description": "Cron expression defining when to run the task, on the form '0 0 * * *' (every day at midnight), '*/15 * * * *' (every 15 minutes), etc."
}
},
"required": [
"type",
"expression"
]
}
]
}
8 changes: 6 additions & 2 deletions schema/unstable/state_store_config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
},
"upload-interval": {
"type": "string",
"description": "Interval for uploading state store data to CDF Raw, on the form 12s, 15m, 1h, etc."
"description": "Interval for uploading state store data to CDF Raw, on the form 12s, 15m, 1h, etc.",
"default": "30s",
"pattern": "^([0-9]+)[ \t]*(s|m|h|d)$"
}
},
"required": [
Expand All @@ -53,7 +55,9 @@
},
"save-interval": {
"type": "string",
"description": "Interval for saving state store data to the local JSON file, on the form 12s, 15m, 1h, etc."
"description": "Interval for saving state store data to the local JSON file, on the form 12s, 15m, 1h, etc.",
"default": "30s",
"pattern": "^([0-9]+)[ \t]*(s|m|h|d)$"
}
},
"required": [
Expand Down
18 changes: 18 additions & 0 deletions tests/test_schemas.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

set -e -x

target_schemas=(
"schema/base_config.schema.json"
"schema/unstable/connection_config.schema.json"
"schema/unstable/schedule_config.schema.json"
"schema/unstable/extractor_config.schema.json"
)

for schema in "${target_schemas[@]}"; do
echo "Processing $schema"
publish-extractor schema --schema "$schema" --output bundled.schema.json
echo "Generating docs for $schema"
publish-extractor docs --schema bundled.schema.json
rm bundled.schema.json
done
Loading