diff --git a/src/schema/objects/formats.yaml b/src/schema/objects/formats.yaml index 7e0530a1d6..15a43b7bce 100644 --- a/src/schema/objects/formats.yaml +++ b/src/schema/objects/formats.yaml @@ -47,9 +47,7 @@ hed_version: display_name: HED Version description: | The version string of the used HED schema. - pattern: '^(?:[a-zA-Z]+:)?(?:[a-zA-Z]+_)?(?:0|[1-9]\d*)\.(?:0|[1-9]\d*)\.(?:0|[1-9]\d*)\ - (?:-(?:(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?\ - (?:\+(?:[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$' + pattern: "(?:[a-zA-Z]+:)?(?:[a-zA-Z]+_)?(?:0|[1-9][0-9]*)\\.(?:0|[1-9][0-9]*)\\.(?:0|[1-9][0-9]*)" bids_uri: display_name: BIDS uniform resource indicator description: | @@ -58,7 +56,7 @@ bids_uri: The validation for this format is minimal. It simply ensures that the value is a string with any characters that may appear in a valid URI, starting with "bids:". - pattern: 'bids:[0-9a-zA-Z/#:\?\_\-\.]+' + pattern: 'bids:[0-9a-zA-Z/#:?_\-.]+' dataset_relative: display_name: Path relative to the BIDS dataset directory description: | @@ -67,7 +65,7 @@ dataset_relative: The validation for this format is minimal. It simply ensures that the value is a string with any characters that may appear in a valid path, without starting with "/" (an absolute path). - pattern: '(?!/)[0-9a-zA-Z+/\_\-\.]+' + pattern: '(?!/)[0-9a-zA-Z+/_\-.]+' date: display_name: Date description: | @@ -98,7 +96,7 @@ file_relative: The validation for this format is minimal. It simply ensures that the value is a string with any characters that may appear in a valid path, without starting with "/" (an absolute path). - pattern: '(?!/)[0-9a-zA-Z+/\_\-\.]+' + pattern: '(?!/)[0-9a-zA-Z+/_\-.]+' participant_relative: display_name: Path relative to the participant directory description: | @@ -108,7 +106,7 @@ participant_relative: It simply ensures that the value is a string with any characters that may appear in a valid path, without starting with "/" (an absolute path) or "sub/" (a relative path starting with the participant directory, rather than relative to that directory). - pattern: '(?!/)(?!sub-)[0-9a-zA-Z+/\_\-\.]+' + pattern: '(?!/)(?!sub-)[0-9a-zA-Z+/_\-.]+' rrid: display_name: Research resource identifier description: | @@ -123,7 +121,7 @@ stimuli_relative: It simply ensures that the value is a string with any characters that may appear in a valid path, without starting with "/" (an absolute path) or "stimuli/" (a relative path starting with the stimuli directory, rather than relative to that directory). - pattern: '(?!/)(?!stimuli/)[0-9a-zA-Z+/\_\-\.]+' + pattern: '(?!/)(?!stimuli/)[0-9a-zA-Z+/_\-.]+' time: display_name: Time description: | diff --git a/tools/schemacode/pyproject.toml b/tools/schemacode/pyproject.toml index 7e7b10fc52..c00637cf68 100644 --- a/tools/schemacode/pyproject.toml +++ b/tools/schemacode/pyproject.toml @@ -13,7 +13,7 @@ requires-python = ">=3.9" dependencies = [ "click", "pyyaml", - "jsonschema" + "jsonschema[format]" ] classifiers = [ "Development Status :: 4 - Beta", @@ -37,6 +37,7 @@ render = [ ] tests = [ "bidsschematools[expressions,render]", + "check-jsonschema", "codecov", "coverage[toml]", "flake8", diff --git a/tools/schemacode/src/bidsschematools/schema.py b/tools/schemacode/src/bidsschematools/schema.py index 53bcbfabf4..f86e059410 100644 --- a/tools/schemacode/src/bidsschematools/schema.py +++ b/tools/schemacode/src/bidsschematools/schema.py @@ -6,10 +6,11 @@ import tempfile from collections.abc import Iterable, Mapping from copy import deepcopy -from functools import lru_cache +from functools import cache, lru_cache from importlib.resources import files -from jsonschema import ValidationError, validate +from jsonschema import ValidationError +from jsonschema.protocols import Validator as JsonschemaValidator from . import __bids_version__, __version__, utils from .types import Namespace @@ -100,6 +101,13 @@ def _dereference(namespace, base_schema): struct.update({**target, **struct}) +@cache +def get_schema_validator() -> JsonschemaValidator: + """Get the jsonschema validator for validating BIDS schemas.""" + metaschema = json.loads(files("bidsschematools.data").joinpath("metaschema.json").read_text()) + return utils.jsonschema_validator(metaschema, check_format=True) + + def dereference(namespace, inplace=True): """Replace references in namespace with the contents of the referred object. @@ -293,12 +301,11 @@ def filter_schema(schema, **kwargs): def validate_schema(schema: Namespace): """Validate a schema against the BIDS metaschema.""" - metaschema = json.loads(files("bidsschematools.data").joinpath("metaschema.json").read_text()) # validate is put in this try/except clause because the error is sometimes too long to # print in the terminal try: - validate(instance=schema.to_dict(), schema=metaschema) + get_schema_validator().validate(instance=schema.to_dict()) except ValidationError as e: with tempfile.NamedTemporaryFile( prefix="schema_error_", suffix=".txt", delete=False, mode="w+" diff --git a/tools/schemacode/src/bidsschematools/tests/test_schema.py b/tools/schemacode/src/bidsschematools/tests/test_schema.py index 6b1449e13e..26f02293f0 100644 --- a/tools/schemacode/src/bidsschematools/tests/test_schema.py +++ b/tools/schemacode/src/bidsschematools/tests/test_schema.py @@ -1,7 +1,10 @@ """Tests for the bidsschematools package.""" +import json import os +import subprocess from collections.abc import Mapping +from importlib.resources import files import pytest from jsonschema.exceptions import ValidationError @@ -365,6 +368,41 @@ def test_valid_schema(): schema.validate_schema(namespace) +@pytest.mark.parametrize("regex_variant", ["default", "nonunicode", "python"]) +def test_valid_schema_with_check_jsonschema(tmp_path, regex_variant): + """ + Test that the BIDS schema is valid against the metaschema when validation is done + using the `check-jsonschema` CLI + """ + bids_schema = schema.load_schema().to_dict() + metaschema_path = str(files("bidsschematools.data").joinpath("metaschema.json")) + + # Save BIDS schema to a temporary file + bids_schema_path = tmp_path / "bids_schema.json" + bids_schema_path.write_text(json.dumps(bids_schema)) + + # Invoke the check-jsonschema to validate the BIDS schema + try: + subprocess.run( + [ + "check-jsonschema", + "--regex-variant", + regex_variant, + "--schemafile", + metaschema_path, + str(bids_schema_path), + ], + stdout=subprocess.PIPE, # Capture stdout + stderr=subprocess.STDOUT, # Set stderr to into stdout + text=True, + check=True, + ) + except subprocess.CalledProcessError as e: + pytest.fail( + f"check-jsonschema failed with code {e.returncode}:\n{e.stdout}", pytrace=False + ) + + def test_add_legal_field(): """Test that adding a legal field does not raise an error.""" namespace = schema.load_schema() diff --git a/tools/schemacode/src/bidsschematools/tests/test_utils.py b/tools/schemacode/src/bidsschematools/tests/test_utils.py new file mode 100644 index 0000000000..4ee87663e2 --- /dev/null +++ b/tools/schemacode/src/bidsschematools/tests/test_utils.py @@ -0,0 +1,147 @@ +from contextlib import nullcontext +from typing import Any, cast + +import pytest +from jsonschema.exceptions import SchemaError, ValidationError +from jsonschema.protocols import Validator as JsonschemaValidator +from jsonschema.validators import Draft7Validator, Draft202012Validator + +from bidsschematools.utils import jsonschema_validator + +DRAFT_7_SCHEMA = { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name"], +} +""" +A minimal valid Draft 7 schema requiring a 'name' property of type 'string'. +""" + + +DRAFT_202012_SCHEMA = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": {"title": {"type": "string"}}, + "required": ["title"], +} +""" +A minimal valid Draft 2020-12 schema requiring a 'title' property of type 'string'. +""" + +DRAFT_202012_FORMAT_SCHEMA = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": {"email": {"type": "string", "format": "email"}}, + "required": ["email"], +} +""" +Draft 2020-12 schema that includes a 'format' requirement (e.g., 'email'). +Used to test the 'check_format' parameter. +""" + + +SCHEMA_NO_DOLLAR_SCHEMA = { + "type": "object", + "properties": {"foo": {"type": "string"}}, + "required": ["foo"], +} +""" +Schema that lacks the '$schema' property altogether. +Used to test that 'default_cls' is applied. +""" + + +class TestJsonschemaValidator: + @pytest.mark.parametrize( + ("schema", "expected_validator_cls"), + [ + pytest.param(DRAFT_202012_FORMAT_SCHEMA, Draft202012Validator, id="Draft202012"), + pytest.param(DRAFT_7_SCHEMA, Draft7Validator, id="Draft7"), + ], + ) + @pytest.mark.parametrize("check_format", [True, False]) + def test_set_by_dollar_schema( + self, + schema: dict[str, Any], + expected_validator_cls: type, + check_format: bool, + ) -> None: + """ + Test that the correct validator class is returned for different '$schema' values + """ + validator = jsonschema_validator(schema, check_format=check_format) + + assert isinstance(validator, expected_validator_cls) + + @pytest.mark.parametrize( + ("check_format", "instance", "expect_raises"), + [ + (True, {"email": "test@example.com"}, False), + (True, {"email": "not-an-email"}, True), + (False, {"email": "not-an-email"}, False), + ], + ids=[ + "check_format=True, valid email", + "check_format=True, invalid email", + "check_format=False, invalid email", + ], + ) + def test_check_format_email_scenarios( + self, + check_format: bool, + instance: dict, + expect_raises: bool, + ) -> None: + """ + Parametrized test for check_format usage on valid/invalid email addresses under + Draft202012Validator. + """ + validator = jsonschema_validator(DRAFT_202012_FORMAT_SCHEMA, check_format=check_format) + + # If expect_raises is True, we use pytest.raises(ValidationError) + # Otherwise, we enter a no-op context + ctx = pytest.raises(ValidationError) if expect_raises else nullcontext() + + with ctx: + validator.validate(instance) # Should raise or not raise as parametrized + + @pytest.mark.parametrize( + ("schema", "expected_validator_cls"), + [ + # Scenario 1: no $schema => we expect the default_cls=Draft7Validator is used + pytest.param(SCHEMA_NO_DOLLAR_SCHEMA, Draft7Validator, id="no-$schema"), + # Scenario 2: has $schema => draft 2020-12 overrides the default_cls + pytest.param(DRAFT_202012_SCHEMA, Draft202012Validator, id="with-$schema"), + ], + ) + def test_default_cls( + self, + schema: dict[str, Any], + expected_validator_cls: type, + ) -> None: + """ + If the schema has no '$schema' property, and we provide a 'default_cls', + the returned validator should be an instance of that class. + + If the schema *does* have '$schema', then the default_cls is ignored, and + the validator class is inferred from the schema's '$schema' field. + """ + # Provide default_cls=Draft7Validator + validator = jsonschema_validator( + schema, + check_format=False, + default_cls=cast(type[JsonschemaValidator], Draft7Validator), + ) + assert isinstance(validator, expected_validator_cls) + + def test_invalid_schema_raises_schema_error(self) -> None: + """ + Provide an invalid schema, ensuring that 'SchemaError' is raised. + """ + invalid_schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": 123, # 'type' must be string/array, so this is invalid + } + with pytest.raises(SchemaError): + jsonschema_validator(invalid_schema, check_format=False) diff --git a/tools/schemacode/src/bidsschematools/utils.py b/tools/schemacode/src/bidsschematools/utils.py index 8a47ad0e93..481eb2e7ed 100644 --- a/tools/schemacode/src/bidsschematools/utils.py +++ b/tools/schemacode/src/bidsschematools/utils.py @@ -3,6 +3,10 @@ import logging import os import sys +from typing import Any, Optional + +from jsonschema.protocols import Validator as JsonschemaValidator +from jsonschema.validators import validator_for from . import data @@ -82,3 +86,52 @@ def set_logger_level(lgr, level): lgr.warning("Do not know how to treat loglevel %s" % level) return lgr.setLevel(level) + + +def jsonschema_validator( + schema: dict[str, Any], + *, + check_format: bool, + default_cls: Optional[type[JsonschemaValidator]] = None, +) -> JsonschemaValidator: + """ + Create a jsonschema validator appropriate for validating instances against a given + JSON schema + + Parameters + ---------- + schema : dict[str, Any] + The JSON schema to validate against + check_format : bool + Indicates whether to check the format against format specifications in the + schema + default_cls : type[JsonschemaValidator] or None, optional + The default JSON schema validator class to use to create the + validator should the appropriate validator class cannot be determined based on + the schema (by assessing the `$schema` property). If `None`, the class + representing the latest JSON schema draft supported by the `jsonschema` package + + Returns + ------- + JsonschemaValidator + The JSON schema validator + + Raises + ------ + jsonschema.exceptions.SchemaError + If the JSON schema is invalid + """ + # Retrieve appropriate validator class for validating the given schema + validator_cls: type[JsonschemaValidator] = ( + validator_for(schema, default_cls) if default_cls is not None else validator_for(schema) + ) + + # Ensure the schema is valid + validator_cls.check_schema(schema) + + if check_format: + # Return a validator with format checking enabled + return validator_cls(schema, format_checker=validator_cls.FORMAT_CHECKER) + + # Return a validator with format checking disabled + return validator_cls(schema)