Skip to content
Merged
22 changes: 14 additions & 8 deletions src/schema/objects/formats.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,15 @@ hed_version:
display_name: HED Version
description: |
The version string of the used HED schema.
pattern: '^(?:[a-zA-Z]+:)?(?:[a-zA-Z]+_)?(?:0|[1-9]\d*)\.(?:0|[1-9]\d*)\.(?:0|[1-9]\d*)\
(?:-(?:(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?\
(?:\+(?:[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$'
pattern: "\
(?:[a-zA-Z]+:)?\
(?:[a-zA-Z]+_)?\
(?:0|[1-9][0-9]*)\\.(?:0|[1-9][0-9]*)\\.(?:0|[1-9][0-9]*)\
(?:-(?:\
(?:0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z\\-]*)\
(?:\\.(?:0|[1-9][0-9]*|[0-9]*[a-zA-Z\\-][0-9a-zA-Z\\-]*))*\
))?\
(?:\\+(?:[0-9a-zA-Z\\-]+(?:\\.[0-9a-zA-Z\\-]+)*))?"
bids_uri:
display_name: BIDS uniform resource indicator
description: |
Expand All @@ -58,7 +64,7 @@ bids_uri:
The validation for this format is minimal.
It simply ensures that the value is a string with any characters that may appear in a valid URI,
starting with "bids:".
pattern: 'bids:[0-9a-zA-Z/#:\?\_\-\.]+'
pattern: 'bids:[0-9a-zA-Z/#:?_\-.]+'
dataset_relative:
display_name: Path relative to the BIDS dataset directory
description: |
Expand All @@ -67,7 +73,7 @@ dataset_relative:
The validation for this format is minimal.
It simply ensures that the value is a string with any characters that may appear in a valid path,
without starting with "/" (an absolute path).
pattern: '(?!/)[0-9a-zA-Z+/\_\-\.]+'
pattern: '(?!/)[0-9a-zA-Z+/_\-.]+'
date:
display_name: Date
description: |
Expand Down Expand Up @@ -98,7 +104,7 @@ file_relative:
The validation for this format is minimal.
It simply ensures that the value is a string with any characters that may appear in a valid path,
without starting with "/" (an absolute path).
pattern: '(?!/)[0-9a-zA-Z+/\_\-\.]+'
pattern: '(?!/)[0-9a-zA-Z+/_\-.]+'
participant_relative:
display_name: Path relative to the participant directory
description: |
Expand All @@ -108,7 +114,7 @@ participant_relative:
It simply ensures that the value is a string with any characters that may appear in a valid path,
without starting with "/" (an absolute path) or "sub/"
(a relative path starting with the participant directory, rather than relative to that directory).
pattern: '(?!/)(?!sub-)[0-9a-zA-Z+/\_\-\.]+'
pattern: '(?!/)(?!sub-)[0-9a-zA-Z+/_\-.]+'
rrid:
display_name: Research resource identifier
description: |
Expand All @@ -123,7 +129,7 @@ stimuli_relative:
It simply ensures that the value is a string with any characters that may appear in a valid path,
without starting with "/" (an absolute path) or "stimuli/"
(a relative path starting with the stimuli directory, rather than relative to that directory).
pattern: '(?!/)(?!stimuli/)[0-9a-zA-Z+/\_\-\.]+'
pattern: '(?!/)(?!stimuli/)[0-9a-zA-Z+/_\-.]+'
time:
display_name: Time
description: |
Expand Down
3 changes: 2 additions & 1 deletion tools/schemacode/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ requires-python = ">=3.9"
dependencies = [
"click",
"pyyaml",
"jsonschema"
"jsonschema[format]"
]
classifiers = [
"Development Status :: 4 - Beta",
Expand All @@ -37,6 +37,7 @@ render = [
]
tests = [
"bidsschematools[expressions,render]",
"check-jsonschema",
"codecov",
"coverage[toml]",
"flake8",
Expand Down
15 changes: 11 additions & 4 deletions tools/schemacode/src/bidsschematools/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
import tempfile
from collections.abc import Iterable, Mapping
from copy import deepcopy
from functools import lru_cache
from functools import cache, lru_cache
from importlib.resources import files

from jsonschema import ValidationError, validate
from jsonschema import ValidationError
from jsonschema.protocols import Validator as JsonschemaValidator

from . import __bids_version__, __version__, utils
from .types import Namespace
Expand Down Expand Up @@ -100,6 +101,13 @@ def _dereference(namespace, base_schema):
struct.update({**target, **struct})


@cache
def get_schema_validator() -> JsonschemaValidator:
"""Get the jsonschema validator for validating BIDS schemas."""
metaschema = json.loads(files("bidsschematools.data").joinpath("metaschema.json").read_text())
return utils.jsonschema_validator(metaschema, check_format=True)


def dereference(namespace, inplace=True):
"""Replace references in namespace with the contents of the referred object.

Expand Down Expand Up @@ -293,12 +301,11 @@ def filter_schema(schema, **kwargs):

def validate_schema(schema: Namespace):
"""Validate a schema against the BIDS metaschema."""
metaschema = json.loads(files("bidsschematools.data").joinpath("metaschema.json").read_text())

# validate is put in this try/except clause because the error is sometimes too long to
# print in the terminal
try:
validate(instance=schema.to_dict(), schema=metaschema)
get_schema_validator().validate(instance=schema.to_dict())
except ValidationError as e:
with tempfile.NamedTemporaryFile(
prefix="schema_error_", suffix=".txt", delete=False, mode="w+"
Expand Down
38 changes: 38 additions & 0 deletions tools/schemacode/src/bidsschematools/tests/test_schema.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""Tests for the bidsschematools package."""

import json
import os
import subprocess
from collections.abc import Mapping
from importlib.resources import files

import pytest
from jsonschema.exceptions import ValidationError
Expand Down Expand Up @@ -365,6 +368,41 @@ def test_valid_schema():
schema.validate_schema(namespace)


@pytest.mark.parametrize("regex_variant", ["default", "nonunicode", "python"])
def test_valid_schema_with_check_jsonschema(tmp_path, regex_variant):
"""
Test that the BIDS schema is valid against the metaschema when validation is done
using the `check-jsonschema` CLI
"""
bids_schema = schema.load_schema().to_dict()
metaschema_path = str(files("bidsschematools.data").joinpath("metaschema.json"))

# Save BIDS schema to a temporary file
bids_schema_path = tmp_path / "bids_schema.json"
bids_schema_path.write_text(json.dumps(bids_schema))

# Invoke the check-jsonschema to validate the BIDS schema
try:
subprocess.run(
[
"check-jsonschema",
"--regex-variant",
regex_variant,
"--schemafile",
metaschema_path,
str(bids_schema_path),
],
stdout=subprocess.PIPE, # Capture stdout
stderr=subprocess.STDOUT, # Set stderr to into stdout
text=True,
check=True,
)
except subprocess.CalledProcessError as e:
pytest.fail(
f"check-jsonschema failed with code {e.returncode}:\n{e.stdout}", pytrace=False
)


def test_add_legal_field():
"""Test that adding a legal field does not raise an error."""
namespace = schema.load_schema()
Expand Down
147 changes: 147 additions & 0 deletions tools/schemacode/src/bidsschematools/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
from contextlib import nullcontext
from typing import Any, cast

import pytest
from jsonschema.exceptions import SchemaError, ValidationError
from jsonschema.protocols import Validator as JsonschemaValidator
from jsonschema.validators import Draft7Validator, Draft202012Validator

from bidsschematools.utils import jsonschema_validator

DRAFT_7_SCHEMA = {
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {"name": {"type": "string"}},
"required": ["name"],
}
"""
A minimal valid Draft 7 schema requiring a 'name' property of type 'string'.
"""


DRAFT_202012_SCHEMA = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"properties": {"title": {"type": "string"}},
"required": ["title"],
}
"""
A minimal valid Draft 2020-12 schema requiring a 'title' property of type 'string'.
"""

DRAFT_202012_FORMAT_SCHEMA = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"properties": {"email": {"type": "string", "format": "email"}},
"required": ["email"],
}
"""
Draft 2020-12 schema that includes a 'format' requirement (e.g., 'email').
Used to test the 'check_format' parameter.
"""


SCHEMA_NO_DOLLAR_SCHEMA = {
"type": "object",
"properties": {"foo": {"type": "string"}},
"required": ["foo"],
}
"""
Schema that lacks the '$schema' property altogether.
Used to test that 'default_cls' is applied.
"""


class TestJsonschemaValidator:
@pytest.mark.parametrize(
("schema", "expected_validator_cls"),
[
pytest.param(DRAFT_202012_FORMAT_SCHEMA, Draft202012Validator, id="Draft202012"),
pytest.param(DRAFT_7_SCHEMA, Draft7Validator, id="Draft7"),
],
)
@pytest.mark.parametrize("check_format", [True, False])
def test_set_by_dollar_schema(
self,
schema: dict[str, Any],
expected_validator_cls: type,
check_format: bool,
) -> None:
"""
Test that the correct validator class is returned for different '$schema' values
"""
validator = jsonschema_validator(schema, check_format=check_format)

assert isinstance(validator, expected_validator_cls)

@pytest.mark.parametrize(
("check_format", "instance", "expect_raises"),
[
(True, {"email": "[email protected]"}, False),
(True, {"email": "not-an-email"}, True),
(False, {"email": "not-an-email"}, False),
],
ids=[
"check_format=True, valid email",
"check_format=True, invalid email",
"check_format=False, invalid email",
],
)
def test_check_format_email_scenarios(
self,
check_format: bool,
instance: dict,
expect_raises: bool,
) -> None:
"""
Parametrized test for check_format usage on valid/invalid email addresses under
Draft202012Validator.
"""
validator = jsonschema_validator(DRAFT_202012_FORMAT_SCHEMA, check_format=check_format)

# If expect_raises is True, we use pytest.raises(ValidationError)
# Otherwise, we enter a no-op context
ctx = pytest.raises(ValidationError) if expect_raises else nullcontext()

with ctx:
validator.validate(instance) # Should raise or not raise as parametrized

@pytest.mark.parametrize(
("schema", "expected_validator_cls"),
[
# Scenario 1: no $schema => we expect the default_cls=Draft7Validator is used
pytest.param(SCHEMA_NO_DOLLAR_SCHEMA, Draft7Validator, id="no-$schema"),
# Scenario 2: has $schema => draft 2020-12 overrides the default_cls
pytest.param(DRAFT_202012_SCHEMA, Draft202012Validator, id="with-$schema"),
],
)
def test_default_cls(
self,
schema: dict[str, Any],
expected_validator_cls: type,
) -> None:
"""
If the schema has no '$schema' property, and we provide a 'default_cls',
the returned validator should be an instance of that class.

If the schema *does* have '$schema', then the default_cls is ignored, and
the validator class is inferred from the schema's '$schema' field.
"""
# Provide default_cls=Draft7Validator
validator = jsonschema_validator(
schema,
check_format=False,
default_cls=cast(type[JsonschemaValidator], Draft7Validator),
)
assert isinstance(validator, expected_validator_cls)

def test_invalid_schema_raises_schema_error(self) -> None:
"""
Provide an invalid schema, ensuring that 'SchemaError' is raised.
"""
invalid_schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": 123, # 'type' must be string/array, so this is invalid
}
with pytest.raises(SchemaError):
jsonschema_validator(invalid_schema, check_format=False)
53 changes: 53 additions & 0 deletions tools/schemacode/src/bidsschematools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import logging
import os
import sys
from typing import Any, Optional

from jsonschema.protocols import Validator as JsonschemaValidator
from jsonschema.validators import validator_for

from . import data

Expand Down Expand Up @@ -82,3 +86,52 @@ def set_logger_level(lgr, level):
lgr.warning("Do not know how to treat loglevel %s" % level)
return
lgr.setLevel(level)


def jsonschema_validator(
schema: dict[str, Any],
*,
check_format: bool,
default_cls: Optional[type[JsonschemaValidator]] = None,
) -> JsonschemaValidator:
"""
Create a jsonschema validator appropriate for validating instances against a given
JSON schema

Parameters
----------
schema : dict[str, Any]
The JSON schema to validate against
check_format : bool
Indicates whether to check the format against format specifications in the
schema
default_cls : type[JsonschemaValidator] or None, optional
The default JSON schema validator class to use to create the
validator should the appropriate validator class cannot be determined based on
the schema (by assessing the `$schema` property). If `None`, the class
representing the latest JSON schema draft supported by the `jsonschema` package

Returns
-------
JsonschemaValidator
The JSON schema validator

Raises
------
jsonschema.exceptions.SchemaError
If the JSON schema is invalid
"""
# Retrieve appropriate validator class for validating the given schema
validator_cls: type[JsonschemaValidator] = (
validator_for(schema, default_cls) if default_cls is not None else validator_for(schema)
)

# Ensure the schema is valid
validator_cls.check_schema(schema)

if check_format:
# Return a validator with format checking enabled
return validator_cls(schema, format_checker=validator_cls.FORMAT_CHECKER)

# Return a validator with format checking disabled
return validator_cls(schema)