Skip to content

Commit 777f75f

Browse files
Zashaarnq
andcommitted
bin: Validate config against a Schema
JSON Schema is a format for describing the structure of a JSON value, meant to support validation of data. This makes it usable for validating the structure of the cluster config files since JSON and YAML have overlapping data models. Being an open standard, there are multiple tools and libraries available for doing validation, as well as generating documentation or boilerplate code for type-aware languages. The initial schema only covers the top level 'global' entry for easier review, additional entries will be added in subsequent PRs Schema validation for secrets.yaml currently acts on the sops-encrypted file and thus only sees the basic structure. Includes a tool to generate basic json (but yaml) schemas, although it requires some post-processing so it is mostly useful to get a starting point. To validate, run `./bin/ck8s validate sc|wc`. If something is wrong it should print the path and value that violates the schema to make it easy to fix. Would have been nice if yajsv could output the the values that fail validation itself. suggestions from code review Co-authored-by: André Arnqvist <58822152+aarnq@users.noreply.github.com>
1 parent 36da0b1 commit 777f75f

File tree

13 files changed

+731
-8
lines changed

13 files changed

+731
-8
lines changed

.github/pull_request_template.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,5 @@ Include screenshots if applicable to help explain these changes.
112112
- [ ] The change does not cause any alerts to be generated by Falco
113113
- Bug checks:
114114
- [ ] The bug fix is covered by regression tests
115+
- Config checks:
116+
- [ ] The schema was updated

.vscode/settings.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"yaml.schemas": {
3+
"config/schemas/config.yaml": "config/config/*-config.yaml"
4+
}
5+
}

bin/common.bash

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ check_tools() {
113113
warn=0
114114
err=0
115115

116-
for executable in jq yq4 s3cmd sops kubectl helm helmfile dig pwgen htpasswd; do
116+
for executable in jq yq4 s3cmd sops kubectl helm helmfile dig pwgen htpasswd yajsv; do
117117
if ! command -v "${executable}" > /dev/null; then
118118
log_error "Required dependency ${executable} missing"
119119
err=1
@@ -288,13 +288,13 @@ load_config() {
288288

289289
if [[ "${1}" == "sc" ]]; then
290290
check_config "${config[default_sc]}" "${config[override_sc]}"
291-
config[config_file_sc]=$(mktemp)
291+
config[config_file_sc]=$(mktemp --suffix="_sc-config.yaml")
292292
append_trap "rm ${config[config_file_sc]}" EXIT
293293
merge_config "${config[default_sc]}" "${config[override_sc]}" "${config[config_file_sc]}"
294294

295295
elif [[ "${1}" == "wc" ]]; then
296296
check_config "${config[default_wc]}" "${config[override_wc]}"
297-
config[config_file_wc]=$(mktemp)
297+
config[config_file_wc]=$(mktemp --suffix="_wc-config.yaml")
298298
append_trap "rm ${config[config_file_wc]}" EXIT
299299
merge_config "${config[default_wc]}" "${config[override_wc]}" "${config[config_file_wc]}"
300300

@@ -362,7 +362,30 @@ validate_config() {
362362
fi
363363
}
364364

365-
template_file=$(mktemp)
365+
schema_validate() {
366+
merged_config="${1}"
367+
schema_file="${2}"
368+
369+
schema_validation_result="$(mktemp --suffix='.txt')"
370+
append_trap "rm ${schema_validation_result}" EXIT
371+
372+
if ! yajsv -s "${schema_file}" "${merged_config}" > "${schema_validation_result}"; then
373+
log_warning "Failed schema validation:"
374+
sed -r 's/^.*_(..-config\.yaml): fail: (.*)/\1: \2/; / failed validation$/q' < "${schema_validation_result}"
375+
grep -oP '(?<=fail: )[^:]+' "${schema_validation_result}" | sort -u |
376+
while read -r jpath; do
377+
echo -n ".$jpath = "
378+
yq4 -oj ".$jpath" "${merged_config}"
379+
done
380+
maybe_exit="true"
381+
fi
382+
383+
if ${maybe_exit} && ! ${CK8S_AUTO_APPROVE}; then
384+
ask_abort
385+
fi
386+
}
387+
388+
template_file=$(mktemp --suffix="-tpl.yaml")
366389
append_trap "rm ${template_file}" EXIT
367390

368391
if [[ $1 == "sc" ]]; then
@@ -373,7 +396,9 @@ validate_config() {
373396
"${config_template_path}/config/sc-config.yaml" \
374397
> "${template_file}"
375398
validate "${config[config_file_sc]}" "${template_file}"
399+
schema_validate "${config[config_file_sc]}" "${config_template_path}/schemas/config.yaml"
376400
validate "${secrets[secrets_file]}" "${config_template_path}/secrets/sc-secrets.yaml"
401+
schema_validate "${secrets[secrets_file]}" "${config_template_path}/schemas/secrets.yaml"
377402
elif [[ $1 == "wc" ]]; then
378403
check_config "${config_template_path}/config/common-config.yaml" \
379404
"${config_template_path}/config/wc-config.yaml" \
@@ -382,7 +407,9 @@ validate_config() {
382407
"${config_template_path}/config/wc-config.yaml" \
383408
> "${template_file}"
384409
validate "${config[config_file_wc]}" "${template_file}"
410+
schema_validate "${config[config_file_wc]}" "${config_template_path}/schemas/config.yaml"
385411
validate "${secrets[secrets_file]}" "${config_template_path}/secrets/wc-secrets.yaml"
412+
schema_validate "${secrets[secrets_file]}" "${config_template_path}/schemas/secrets.yaml"
386413
else
387414
log_error "ERROR: usage validate_config <sc|wc>"
388415
exit 1

config/schemas/README.md

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# Configuration Schema
2+
3+
Despite the name, JSON Schema can be used to describe most kinds of data
4+
structures, especially those with a data model very close to JSON, such
5+
as YAML (if you stay away from the fancier features).
6+
7+
A JSON or YAML value that satisfies a schema is called an "instance".
8+
9+
- [JSON Schema Core](https://json-schema.org/draft/2020-12/json-schema-core) specifies the basic structure of a schema.
10+
- [Json Schema Validation](https://json-schema.org/draft/2020-12/json-schema-validation) specifies additional ways to validate various values.
11+
12+
For example, given the following config snippet:
13+
14+
``` yaml
15+
some-service:
16+
enabled: true
17+
foo: hello
18+
bar: [ world ]
19+
```
20+
21+
A schema describing this might look like:
22+
23+
``` yaml
24+
properties:
25+
some-service:
26+
title: An Example
27+
description: Some words to describe this schema
28+
type: object
29+
required:
30+
- enabled
31+
properties:
32+
enabled:
33+
type: boolean
34+
foo:
35+
type: string
36+
examples:
37+
- hello
38+
default: baz
39+
bar:
40+
type: array
41+
items:
42+
type: string
43+
examples:
44+
- world
45+
additionalProperties: false
46+
```
47+
48+
Important things:
49+
50+
`type` declares the accepted type(s) of a value, using the JSON name for types:
51+
52+
- `object`
53+
- `array`
54+
- `string`
55+
- `number` / `integer`
56+
- `boolean`
57+
58+
`title`, `description` and `examples` serve as documentation, to describe the value.
59+
Default values can be provided in `default`.
60+
61+
`type: object` must have a `properties` map describing each value in the object.
62+
63+
Any value not covered by `properties` would be tried against the schema in `additionalProperties`.
64+
In most cases this should be `false`, which causes validation to fail in order to detect e.g. typos or that the schema is incomplete.
65+
Objects where all properties of are the same kind can have a schema object instead as `additionalProperties`.
66+
67+
Any object property that is **required** can be specified as a list in `required`.
68+
Other properties are allowed to be missing.
69+
70+
Lists, `type: array`, has schema for its items in `items`.
71+
72+
Scalar types can have various constraints and validation hints, e.g. length and range constraints, `format: email` etc. <!-- how much of json-schema-validation to duplicate? -->
73+
74+
The tool `bin/genschema.py` can be used to generate a schema from a YAML snippet.
75+
76+
```bash
77+
cat > conf-snippet.yaml <<EOF
78+
service:
79+
enabled: true
80+
features:
81+
- nice
82+
EOF
83+
./bin/genschema.py ./conf-snippet.yaml | tee ./conf-snippet.yaml
84+
```
85+
86+
The output can be tweaked and inserted into `config/schemas/config.yaml` under `.properties`.
87+
88+
## VSCode
89+
90+
The plugin `redhat.vscode-yaml` can provide auto completion, validation and help texts from the schema.
91+
This can be enabled this in other repositories by editing the file `.vscode/settings.json`, adding the path or URL to the schema under the key `.["yaml.schemas"]` like below:
92+
93+
```json
94+
{
95+
"yaml.schemas": {
96+
".../path/to/ck8s-apps/config/schemas/config.yaml": "config/config/*-config.yaml"
97+
}
98+
}
99+
```

config/schemas/config.yaml

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
$schema: https://json-schema.org/draft/2020-12/schema
2+
# TODO point to main branch before merge
3+
$id: https://github.com/elastisys/compliantkubernetes-apps/raw/ka/jsonschema/config/schemas/config.yaml
4+
title: Compliant Kubernetes Apps settings
5+
description: |
6+
This describes the structure of the configuration for both the service
7+
cluster and the workload cluster, but keep in mind that each configuration
8+
file will contain different settings.
9+
$defs:
10+
$comment: |
11+
Location for common types of things for reuse with a reference like
12+
13+
```yaml
14+
thing:
15+
$ref: "#/$defs/thing"
16+
```
17+
type: object
18+
required:
19+
- global
20+
properties:
21+
global:
22+
title: Global options
23+
description: Some common options used in various helm charts.
24+
type: object
25+
required:
26+
- ck8sVersion
27+
- ck8sCloudProvider
28+
- ck8sEnvironmentName
29+
- ck8sFlavor
30+
- baseDomain
31+
- opsDomain
32+
properties:
33+
ck8sVersion:
34+
title: Compliant Kubernetes Apps version
35+
description: |-
36+
Use version number if you are exactly at a release tag.
37+
Otherwise use full commit hash of current commit.
38+
`any`, can be used to disable this validation.
39+
type: string
40+
examples:
41+
- v0.42.1
42+
- any
43+
- 424442541a567646c232d949bad1af2b5b7cb885
44+
ck8sCloudProvider:
45+
type: string
46+
enum:
47+
- aws
48+
- baremetal
49+
- citycloud
50+
- elastx
51+
- exoscale
52+
- none
53+
- safespring
54+
- upcloud
55+
ck8sEnvironmentName:
56+
title: Environment name
57+
type: string
58+
examples:
59+
- my-ck8s-cluster
60+
ck8sFlavor:
61+
type: string
62+
enum:
63+
- prod
64+
- dev
65+
- air-gapped
66+
baseDomain:
67+
title: Base Domain
68+
description: |-
69+
Domain intended for ingress usage in the workload cluster
70+
and to reach application developer facing services such as Grafana, Harbor and OpenSearch Dashboards.
71+
E.g. with 'prod.domain.com', OpenSearch Dashboards is reached via 'opensearch.prod.domain.com'.
72+
type: string
73+
format: hostname
74+
opsDomain:
75+
description: |-
76+
Domain intended for ingress usage in the service cluster and to reach
77+
non-user facing services such as Thanos and OpenSearch.
78+
E.g. with 'ops.prod.domain.com', OpenSearch is reached via 'opensearch.ops.prod.domain.com'.
79+
type: string
80+
format: hostname
81+
scDomain:
82+
description: If baseDomain for wc and sc are not the same, set the domain of the sc cluster.
83+
type: string
84+
oneOf: # Templates do not handle missing values so they must be empty strings to disable. Future FIXME?
85+
- const: ""
86+
- format: hostname
87+
scOpsDomain:
88+
description: If opsDomain for wc and sc are not the same, set the ops domain of the sc cluster.
89+
type: string
90+
oneOf:
91+
- const: ""
92+
- format: hostname
93+
issuer:
94+
description: |-
95+
Default cert-manager issuer to use for issuing certificates for ingresses.
96+
Normally one of `letsencrypt-staging` or `letsencrypt-prod`.
97+
type: string
98+
default: letsencrypt-staging
99+
enum:
100+
- letsencrypt-staging
101+
- letsencrypt-prod
102+
verifyTls:
103+
description: Verify ingress certificates
104+
type: boolean
105+
default: true
106+
clusterDns:
107+
description: IP of the cluster DNS in kubernetes
108+
type: string
109+
default: 10.233.0.3
110+
format: ip-address
111+
clusterName:
112+
type: string
113+
clustersMonitoring:
114+
description: |-
115+
Names of the workload clusters that sends metrics to this cluster.
116+
Mainly used for filtering of metrics.
117+
type: array
118+
items:
119+
type: string
120+
pattern: -[sw]c$
121+
containerRuntime:
122+
title: Container runtime
123+
default: containerd
124+
type: string
125+
enum:
126+
- containerd
127+
- docker
128+
additionalProperties: false
129+
additionalProperties:
130+
type: object
131+
properties: {}

0 commit comments

Comments
 (0)