Skip to content

Commit 7ca2117

Browse files
author
Alan Christie
committed
Now validates the job definition against the built-in schema
1 parent d189ebb commit 7ca2117

File tree

5 files changed

+226
-1
lines changed

5 files changed

+226
-1
lines changed

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
include LICENSE
2+
include jote/schema.yaml

jote/jote.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import shutil
99
from typing import Any, Dict, List, Optional, Tuple
1010

11+
import jsonschema
1112
from munch import DefaultMunch
1213
import yaml
1314
from yamllint import linter
@@ -27,6 +28,18 @@
2728
# It must exist in the root of the repo we're running in.
2829
_YAMLLINT_FILE: str = '.yamllint'
2930

31+
# The (built-in) Job Definition schema...
32+
# from the same directory as ours.
33+
_SCHEMA_FILE: str = os.path.join(os.path.dirname(__file__), 'schema.yaml')
34+
35+
# Load the schema YAML file now.
36+
# This must work as the file is installed along with this module.
37+
_JOB_SCHEMA: Dict[str, Any] = {}
38+
assert os.path.isfile(_SCHEMA_FILE)
39+
with open(_SCHEMA_FILE, 'r', encoding='utf8') as schema_file:
40+
_JOB_SCHEMA = yaml.load(schema_file, Loader=yaml.FullLoader)
41+
assert _JOB_SCHEMA
42+
3043

3144
def _print_test_banner(collection: str,
3245
job_name: str,
@@ -64,6 +77,29 @@ def _lint(definition_filename: str) -> bool:
6477
return True
6578

6679

80+
def _validate_schema(definition_filename: str) -> bool:
81+
"""Checks the Job Definition against the built-in schema.
82+
"""
83+
84+
with open(definition_filename, 'rt', encoding='UTF-8') as definition_file:
85+
job_def: Optional[Dict[str, Any]] =\
86+
yaml.load(definition_file, Loader=yaml.FullLoader)
87+
assert job_def
88+
89+
# Validate the Job Definition against our schema
90+
try:
91+
jsonschema.validate(job_def, schema=_JOB_SCHEMA)
92+
except jsonschema.ValidationError as ex:
93+
print(f'! Job definition "{definition_filename}"'
94+
' does not comply with schema')
95+
print(f'! Errors is "{ex.message}"')
96+
print('! Full response follows:')
97+
print(ex)
98+
return False
99+
100+
return True
101+
102+
67103
def _check_cwd() -> bool:
68104
"""Checks the execution directory for sanity (cwd). Here we must find
69105
a .yamllint file and a data-manager directory?
@@ -102,9 +138,15 @@ def _load(skip_lint: bool = False) -> Tuple[List[DefaultMunch], int]:
102138
jd_filenames: List[str] = glob.glob(f'{_DEFINITION_DIRECTORY}/*.yaml')
103139
for jd_filename in jd_filenames:
104140

141+
# Does the definition comply with the dschema,
142+
# no options here - it must.
143+
if not _validate_schema(jd_filename):
144+
return [], -1
145+
146+
# YAML-lint the definition?
105147
if not skip_lint:
106148
if not _lint(jd_filename):
107-
return [], -1
149+
return [], -2
108150

109151
with open(jd_filename, 'r', encoding='UTF-8') as jd_file:
110152
job_def: Dict[str, Any] = yaml.load(jd_file, Loader=yaml.FullLoader)

jote/schema.yaml

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
---
2+
# The JSONSchema for the 'JobDefinition' (JD) YAML files.
3+
#
4+
# See https://json-schema.org/understanding-json-schema/index.html
5+
6+
$schema: http://json-schema.org/draft-07/schema#
7+
8+
title: Data Manager Job Definition
9+
description: >-
10+
Defines one or more jobs that can be executed
11+
by the Data manager Job Operator
12+
13+
type: object
14+
properties:
15+
kind:
16+
const: DataManagerJobDefinition
17+
kind-version:
18+
enum:
19+
- '2021.1'
20+
name:
21+
type: string
22+
maxLength: 80
23+
description:
24+
tyep: string
25+
collection:
26+
type: string
27+
minLength: 1
28+
maxLength: 80
29+
pattern: '^[a-z]{1}[a-z0-9-]*$'
30+
repository-url:
31+
type: string
32+
maxlength: 2048
33+
format: uri
34+
repository-tag:
35+
type: string
36+
minLength: 1
37+
maxLength: 24
38+
jobs:
39+
$ref: '#/definitions/job-identity'
40+
required:
41+
- kind
42+
- kind-version
43+
- collection
44+
- repository-url
45+
- repository-tag
46+
- jobs
47+
48+
# Sub-object definitions ------------------------------------------------------
49+
# Things like the Job structure, Image structure etc.
50+
51+
definitions:
52+
53+
# A Job.
54+
# Consists of an identity (i.e. 'filter-molecules')
55+
# followed by a Job object.
56+
job-identity:
57+
type: object
58+
patternProperties:
59+
'^[a-z]{1}[a-z0-9-]{0,79}$':
60+
$ref: '#/definitions/job'
61+
additionalProperties: false
62+
minProperties: 1
63+
64+
# An individual Job
65+
job:
66+
type: object
67+
properties:
68+
name:
69+
type: string
70+
minLength: 1
71+
maxLength: 80
72+
description:
73+
type: string
74+
version:
75+
type: string
76+
minLength: 1
77+
maxLength: 24
78+
category:
79+
type: string
80+
doc-url:
81+
type: string
82+
keywords:
83+
type: array
84+
items:
85+
type: string
86+
image:
87+
$ref: '#/definitions/image'
88+
command-encoding:
89+
const: JINJA2_3_0
90+
command:
91+
type: string
92+
minLength: 1
93+
maxLength: 4096
94+
required:
95+
- version
96+
- image
97+
- command
98+
- name
99+
100+
# A Job container image
101+
# The 'type' is optional and is used to indicate
102+
# a single job _image_ (the default) or a workflow image like _nextflow_
103+
image:
104+
type: object
105+
additionalProperties: false
106+
properties:
107+
name:
108+
type: string
109+
minLength: 1
110+
maxLenght: 120
111+
tag:
112+
type: string
113+
minLength: 1
114+
maxLength: 24
115+
project-directory:
116+
type: string
117+
minLength: 1
118+
maxLength: 255
119+
pattern: '^(/[a-zA-Z0-9_-]+)+$'
120+
working-directory:
121+
type: string
122+
minLength: 1
123+
maxLength: 255
124+
pattern: '^(/[a-zA-Z0-9_-]+)+$'
125+
type:
126+
type: string
127+
enum:
128+
- simple
129+
- nextflow
130+
default: simple
131+
environment:
132+
$ref: '#/definitions/environment'
133+
required:
134+
- name
135+
- tag
136+
- project-directory
137+
138+
# Image environment definitions.
139+
environment:
140+
type: array
141+
items:
142+
anyOf:
143+
- $ref: '#/definitions/environment-value-from-api-token'
144+
145+
# An Image environment from an 'api-token'.
146+
# Roles is an optional list of API token realm roles where, for now,
147+
# we limit the number in the list to a maximum 1.
148+
environment-value-from-api-token:
149+
type: object
150+
additionalProperties: false
151+
properties:
152+
name:
153+
$ref: '#/definitions/environment-name'
154+
value-from:
155+
type: object
156+
properties:
157+
api-token:
158+
type: object
159+
properties:
160+
roles:
161+
type: array
162+
items:
163+
type: string
164+
pattern: '^[a-z]{1,}[a-z-_]{0,}$'
165+
minItems: 0
166+
maxItems: 1
167+
uniqueItems: true
168+
required:
169+
- api-token
170+
required:
171+
- name
172+
- value-from
173+
174+
# The pattern for Image environment names.
175+
# Classic linux/shell,
176+
# i.e. letters, digits and '_' and must begin letter or '_'
177+
environment-name:
178+
type: string
179+
minLength: 1
180+
pattern: '^[a-zA-Z_]{1,}[a-zA-Z0-9_]{0,}$'

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
docker-compose == 1.29.2
22
im-data-manager-job-decoder == 1.0.0
3+
jsonschema == 3.2.0
34
munch == 2.5.0
45
pyyaml == 5.4.1
56
yamllint == 1.26.3

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def get_long_description():
3535
platforms=['any'],
3636
# Our modules to package
3737
packages=find_packages(exclude=["*.test", "*.test.*", "test.*", "test"]),
38+
include_package_data=True,
3839

3940
# Project classification:
4041
# https://pypi.python.org/pypi?%3Aaction=list_classifiers

0 commit comments

Comments
 (0)