Skip to content

Commit 44d8461

Browse files
authored
Merge pull request #2077 from candleindark/jsonschema-validator
[FIX] Enable format validation in validating BIDS schemas
2 parents 057baef + fdf4534 commit 44d8461

File tree

6 files changed

+257
-13
lines changed

6 files changed

+257
-13
lines changed

src/schema/objects/formats.yaml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,7 @@ hed_version:
4747
display_name: HED Version
4848
description: |
4949
The version string of the used HED schema.
50-
pattern: '^(?:[a-zA-Z]+:)?(?:[a-zA-Z]+_)?(?:0|[1-9]\d*)\.(?:0|[1-9]\d*)\.(?:0|[1-9]\d*)\
51-
(?:-(?:(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?\
52-
(?:\+(?:[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$'
50+
pattern: "(?:[a-zA-Z]+:)?(?:[a-zA-Z]+_)?(?:0|[1-9][0-9]*)\\.(?:0|[1-9][0-9]*)\\.(?:0|[1-9][0-9]*)"
5351
bids_uri:
5452
display_name: BIDS uniform resource indicator
5553
description: |
@@ -58,7 +56,7 @@ bids_uri:
5856
The validation for this format is minimal.
5957
It simply ensures that the value is a string with any characters that may appear in a valid URI,
6058
starting with "bids:".
61-
pattern: 'bids:[0-9a-zA-Z/#:\?\_\-\.]+'
59+
pattern: 'bids:[0-9a-zA-Z/#:?_\-.]+'
6260
dataset_relative:
6361
display_name: Path relative to the BIDS dataset directory
6462
description: |
@@ -67,7 +65,7 @@ dataset_relative:
6765
The validation for this format is minimal.
6866
It simply ensures that the value is a string with any characters that may appear in a valid path,
6967
without starting with "/" (an absolute path).
70-
pattern: '(?!/)[0-9a-zA-Z+/\_\-\.]+'
68+
pattern: '(?!/)[0-9a-zA-Z+/_\-.]+'
7169
date:
7270
display_name: Date
7371
description: |
@@ -98,7 +96,7 @@ file_relative:
9896
The validation for this format is minimal.
9997
It simply ensures that the value is a string with any characters that may appear in a valid path,
10098
without starting with "/" (an absolute path).
101-
pattern: '(?!/)[0-9a-zA-Z+/\_\-\.]+'
99+
pattern: '(?!/)[0-9a-zA-Z+/_\-.]+'
102100
participant_relative:
103101
display_name: Path relative to the participant directory
104102
description: |
@@ -108,7 +106,7 @@ participant_relative:
108106
It simply ensures that the value is a string with any characters that may appear in a valid path,
109107
without starting with "/" (an absolute path) or "sub/"
110108
(a relative path starting with the participant directory, rather than relative to that directory).
111-
pattern: '(?!/)(?!sub-)[0-9a-zA-Z+/\_\-\.]+'
109+
pattern: '(?!/)(?!sub-)[0-9a-zA-Z+/_\-.]+'
112110
rrid:
113111
display_name: Research resource identifier
114112
description: |
@@ -123,7 +121,7 @@ stimuli_relative:
123121
It simply ensures that the value is a string with any characters that may appear in a valid path,
124122
without starting with "/" (an absolute path) or "stimuli/"
125123
(a relative path starting with the stimuli directory, rather than relative to that directory).
126-
pattern: '(?!/)(?!stimuli/)[0-9a-zA-Z+/\_\-\.]+'
124+
pattern: '(?!/)(?!stimuli/)[0-9a-zA-Z+/_\-.]+'
127125
time:
128126
display_name: Time
129127
description: |

tools/schemacode/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ requires-python = ">=3.9"
1313
dependencies = [
1414
"click",
1515
"pyyaml",
16-
"jsonschema"
16+
"jsonschema[format]"
1717
]
1818
classifiers = [
1919
"Development Status :: 4 - Beta",
@@ -37,6 +37,7 @@ render = [
3737
]
3838
tests = [
3939
"bidsschematools[expressions,render]",
40+
"check-jsonschema",
4041
"codecov",
4142
"coverage[toml]",
4243
"flake8",

tools/schemacode/src/bidsschematools/schema.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
import tempfile
77
from collections.abc import Iterable, Mapping
88
from copy import deepcopy
9-
from functools import lru_cache
9+
from functools import cache, lru_cache
1010
from importlib.resources import files
1111

12-
from jsonschema import ValidationError, validate
12+
from jsonschema import ValidationError
13+
from jsonschema.protocols import Validator as JsonschemaValidator
1314

1415
from . import __bids_version__, __version__, utils
1516
from .types import Namespace
@@ -100,6 +101,13 @@ def _dereference(namespace, base_schema):
100101
struct.update({**target, **struct})
101102

102103

104+
@cache
105+
def get_schema_validator() -> JsonschemaValidator:
106+
"""Get the jsonschema validator for validating BIDS schemas."""
107+
metaschema = json.loads(files("bidsschematools.data").joinpath("metaschema.json").read_text())
108+
return utils.jsonschema_validator(metaschema, check_format=True)
109+
110+
103111
def dereference(namespace, inplace=True):
104112
"""Replace references in namespace with the contents of the referred object.
105113
@@ -293,12 +301,11 @@ def filter_schema(schema, **kwargs):
293301

294302
def validate_schema(schema: Namespace):
295303
"""Validate a schema against the BIDS metaschema."""
296-
metaschema = json.loads(files("bidsschematools.data").joinpath("metaschema.json").read_text())
297304

298305
# validate is put in this try/except clause because the error is sometimes too long to
299306
# print in the terminal
300307
try:
301-
validate(instance=schema.to_dict(), schema=metaschema)
308+
get_schema_validator().validate(instance=schema.to_dict())
302309
except ValidationError as e:
303310
with tempfile.NamedTemporaryFile(
304311
prefix="schema_error_", suffix=".txt", delete=False, mode="w+"

tools/schemacode/src/bidsschematools/tests/test_schema.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
"""Tests for the bidsschematools package."""
22

3+
import json
34
import os
5+
import subprocess
46
from collections.abc import Mapping
7+
from importlib.resources import files
58

69
import pytest
710
from jsonschema.exceptions import ValidationError
@@ -365,6 +368,41 @@ def test_valid_schema():
365368
schema.validate_schema(namespace)
366369

367370

371+
@pytest.mark.parametrize("regex_variant", ["default", "nonunicode", "python"])
372+
def test_valid_schema_with_check_jsonschema(tmp_path, regex_variant):
373+
"""
374+
Test that the BIDS schema is valid against the metaschema when validation is done
375+
using the `check-jsonschema` CLI
376+
"""
377+
bids_schema = schema.load_schema().to_dict()
378+
metaschema_path = str(files("bidsschematools.data").joinpath("metaschema.json"))
379+
380+
# Save BIDS schema to a temporary file
381+
bids_schema_path = tmp_path / "bids_schema.json"
382+
bids_schema_path.write_text(json.dumps(bids_schema))
383+
384+
# Invoke the check-jsonschema to validate the BIDS schema
385+
try:
386+
subprocess.run(
387+
[
388+
"check-jsonschema",
389+
"--regex-variant",
390+
regex_variant,
391+
"--schemafile",
392+
metaschema_path,
393+
str(bids_schema_path),
394+
],
395+
stdout=subprocess.PIPE, # Capture stdout
396+
stderr=subprocess.STDOUT, # Set stderr to into stdout
397+
text=True,
398+
check=True,
399+
)
400+
except subprocess.CalledProcessError as e:
401+
pytest.fail(
402+
f"check-jsonschema failed with code {e.returncode}:\n{e.stdout}", pytrace=False
403+
)
404+
405+
368406
def test_add_legal_field():
369407
"""Test that adding a legal field does not raise an error."""
370408
namespace = schema.load_schema()
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
from contextlib import nullcontext
2+
from typing import Any, cast
3+
4+
import pytest
5+
from jsonschema.exceptions import SchemaError, ValidationError
6+
from jsonschema.protocols import Validator as JsonschemaValidator
7+
from jsonschema.validators import Draft7Validator, Draft202012Validator
8+
9+
from bidsschematools.utils import jsonschema_validator
10+
11+
DRAFT_7_SCHEMA = {
12+
"$schema": "http://json-schema.org/draft-07/schema#",
13+
"type": "object",
14+
"properties": {"name": {"type": "string"}},
15+
"required": ["name"],
16+
}
17+
"""
18+
A minimal valid Draft 7 schema requiring a 'name' property of type 'string'.
19+
"""
20+
21+
22+
DRAFT_202012_SCHEMA = {
23+
"$schema": "https://json-schema.org/draft/2020-12/schema",
24+
"type": "object",
25+
"properties": {"title": {"type": "string"}},
26+
"required": ["title"],
27+
}
28+
"""
29+
A minimal valid Draft 2020-12 schema requiring a 'title' property of type 'string'.
30+
"""
31+
32+
DRAFT_202012_FORMAT_SCHEMA = {
33+
"$schema": "https://json-schema.org/draft/2020-12/schema",
34+
"type": "object",
35+
"properties": {"email": {"type": "string", "format": "email"}},
36+
"required": ["email"],
37+
}
38+
"""
39+
Draft 2020-12 schema that includes a 'format' requirement (e.g., 'email').
40+
Used to test the 'check_format' parameter.
41+
"""
42+
43+
44+
SCHEMA_NO_DOLLAR_SCHEMA = {
45+
"type": "object",
46+
"properties": {"foo": {"type": "string"}},
47+
"required": ["foo"],
48+
}
49+
"""
50+
Schema that lacks the '$schema' property altogether.
51+
Used to test that 'default_cls' is applied.
52+
"""
53+
54+
55+
class TestJsonschemaValidator:
56+
@pytest.mark.parametrize(
57+
("schema", "expected_validator_cls"),
58+
[
59+
pytest.param(DRAFT_202012_FORMAT_SCHEMA, Draft202012Validator, id="Draft202012"),
60+
pytest.param(DRAFT_7_SCHEMA, Draft7Validator, id="Draft7"),
61+
],
62+
)
63+
@pytest.mark.parametrize("check_format", [True, False])
64+
def test_set_by_dollar_schema(
65+
self,
66+
schema: dict[str, Any],
67+
expected_validator_cls: type,
68+
check_format: bool,
69+
) -> None:
70+
"""
71+
Test that the correct validator class is returned for different '$schema' values
72+
"""
73+
validator = jsonschema_validator(schema, check_format=check_format)
74+
75+
assert isinstance(validator, expected_validator_cls)
76+
77+
@pytest.mark.parametrize(
78+
("check_format", "instance", "expect_raises"),
79+
[
80+
(True, {"email": "[email protected]"}, False),
81+
(True, {"email": "not-an-email"}, True),
82+
(False, {"email": "not-an-email"}, False),
83+
],
84+
ids=[
85+
"check_format=True, valid email",
86+
"check_format=True, invalid email",
87+
"check_format=False, invalid email",
88+
],
89+
)
90+
def test_check_format_email_scenarios(
91+
self,
92+
check_format: bool,
93+
instance: dict,
94+
expect_raises: bool,
95+
) -> None:
96+
"""
97+
Parametrized test for check_format usage on valid/invalid email addresses under
98+
Draft202012Validator.
99+
"""
100+
validator = jsonschema_validator(DRAFT_202012_FORMAT_SCHEMA, check_format=check_format)
101+
102+
# If expect_raises is True, we use pytest.raises(ValidationError)
103+
# Otherwise, we enter a no-op context
104+
ctx = pytest.raises(ValidationError) if expect_raises else nullcontext()
105+
106+
with ctx:
107+
validator.validate(instance) # Should raise or not raise as parametrized
108+
109+
@pytest.mark.parametrize(
110+
("schema", "expected_validator_cls"),
111+
[
112+
# Scenario 1: no $schema => we expect the default_cls=Draft7Validator is used
113+
pytest.param(SCHEMA_NO_DOLLAR_SCHEMA, Draft7Validator, id="no-$schema"),
114+
# Scenario 2: has $schema => draft 2020-12 overrides the default_cls
115+
pytest.param(DRAFT_202012_SCHEMA, Draft202012Validator, id="with-$schema"),
116+
],
117+
)
118+
def test_default_cls(
119+
self,
120+
schema: dict[str, Any],
121+
expected_validator_cls: type,
122+
) -> None:
123+
"""
124+
If the schema has no '$schema' property, and we provide a 'default_cls',
125+
the returned validator should be an instance of that class.
126+
127+
If the schema *does* have '$schema', then the default_cls is ignored, and
128+
the validator class is inferred from the schema's '$schema' field.
129+
"""
130+
# Provide default_cls=Draft7Validator
131+
validator = jsonschema_validator(
132+
schema,
133+
check_format=False,
134+
default_cls=cast(type[JsonschemaValidator], Draft7Validator),
135+
)
136+
assert isinstance(validator, expected_validator_cls)
137+
138+
def test_invalid_schema_raises_schema_error(self) -> None:
139+
"""
140+
Provide an invalid schema, ensuring that 'SchemaError' is raised.
141+
"""
142+
invalid_schema = {
143+
"$schema": "https://json-schema.org/draft/2020-12/schema",
144+
"type": 123, # 'type' must be string/array, so this is invalid
145+
}
146+
with pytest.raises(SchemaError):
147+
jsonschema_validator(invalid_schema, check_format=False)

tools/schemacode/src/bidsschematools/utils.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
import logging
44
import os
55
import sys
6+
from typing import Any, Optional
7+
8+
from jsonschema.protocols import Validator as JsonschemaValidator
9+
from jsonschema.validators import validator_for
610

711
from . import data
812

@@ -82,3 +86,52 @@ def set_logger_level(lgr, level):
8286
lgr.warning("Do not know how to treat loglevel %s" % level)
8387
return
8488
lgr.setLevel(level)
89+
90+
91+
def jsonschema_validator(
92+
schema: dict[str, Any],
93+
*,
94+
check_format: bool,
95+
default_cls: Optional[type[JsonschemaValidator]] = None,
96+
) -> JsonschemaValidator:
97+
"""
98+
Create a jsonschema validator appropriate for validating instances against a given
99+
JSON schema
100+
101+
Parameters
102+
----------
103+
schema : dict[str, Any]
104+
The JSON schema to validate against
105+
check_format : bool
106+
Indicates whether to check the format against format specifications in the
107+
schema
108+
default_cls : type[JsonschemaValidator] or None, optional
109+
The default JSON schema validator class to use to create the
110+
validator should the appropriate validator class cannot be determined based on
111+
the schema (by assessing the `$schema` property). If `None`, the class
112+
representing the latest JSON schema draft supported by the `jsonschema` package
113+
114+
Returns
115+
-------
116+
JsonschemaValidator
117+
The JSON schema validator
118+
119+
Raises
120+
------
121+
jsonschema.exceptions.SchemaError
122+
If the JSON schema is invalid
123+
"""
124+
# Retrieve appropriate validator class for validating the given schema
125+
validator_cls: type[JsonschemaValidator] = (
126+
validator_for(schema, default_cls) if default_cls is not None else validator_for(schema)
127+
)
128+
129+
# Ensure the schema is valid
130+
validator_cls.check_schema(schema)
131+
132+
if check_format:
133+
# Return a validator with format checking enabled
134+
return validator_cls(schema, format_checker=validator_cls.FORMAT_CHECKER)
135+
136+
# Return a validator with format checking disabled
137+
return validator_cls(schema)

0 commit comments

Comments
 (0)