11from copy import deepcopy
22from enum import Enum
3- from functools import lru_cache
3+ from functools import cache
44from inspect import isclass
55import json
66from pathlib import Path
77from typing import Any , Dict , Iterable , Optional , TypeVar , Union , cast , get_args
88
9- import jsonschema
9+ from jsonschema . protocols import Validator as JsonschemaValidator
1010import pydantic
1111import requests
1212
2121from .utils import (
2222 TransitionalGenerateJsonSchema ,
2323 _ensure_newline ,
24+ dandi_jsonschema_validator ,
25+ json_object_adapter ,
2426 sanitize_value ,
2527 strip_top_level_optional ,
28+ validate_json ,
2629 version2tuple ,
2730)
2831
29- schema_map = {
32+ # A mapping of the schema keys of DANDI models to the names of their JSON schema files
33+ SCHEMA_MAP = {
3034 "Dandiset" : "dandiset.json" ,
3135 "PublishedDandiset" : "published-dandiset.json" ,
3236 "Asset" : "asset.json" ,
@@ -130,7 +134,7 @@ def publish_model_schemata(releasedir: Union[str, Path]) -> Path:
130134 version = models .get_schema_version ()
131135 vdir = Path (releasedir , version )
132136 vdir .mkdir (exist_ok = True , parents = True )
133- for class_ , filename in schema_map .items ():
137+ for class_ , filename in SCHEMA_MAP .items ():
134138 (vdir / filename ).write_text (
135139 _ensure_newline (
136140 json .dumps (
@@ -147,49 +151,122 @@ def publish_model_schemata(releasedir: Union[str, Path]) -> Path:
147151 return vdir
148152
149153
150- def _validate_obj_json (data : dict , schema : dict , missing_ok : bool = False ) -> None :
151- validator : Union [jsonschema .Draft202012Validator , jsonschema .Draft7Validator ]
152-
153- if version2tuple (data ["schemaVersion" ]) >= version2tuple ("0.6.5" ):
154- # schema version 0.7.0 and above is produced with Pydantic V2
155- # which is compliant with JSON Schema Draft 2020-12
156- validator = jsonschema .Draft202012Validator (
157- schema , format_checker = jsonschema .Draft202012Validator .FORMAT_CHECKER
158- )
159- else :
160- validator = jsonschema .Draft7Validator (
161- schema , format_checker = jsonschema .Draft7Validator .FORMAT_CHECKER
162- )
163-
164- error_list = []
165- for error in sorted (validator .iter_errors (data ), key = str ):
166- if missing_ok and "is a required property" in error .message :
167- continue
168- error_list .append (error )
169- if error_list :
170- raise JsonschemaValidationError (error_list )
154+ def _validate_obj_json (
155+ instance : Any , validator : JsonschemaValidator , * , missing_ok : bool = False
156+ ) -> None :
157+ """
158+ Validate a data instance using a jsonschema validator with an option to filter out
159+ errors related to missing required properties
160+
161+ :param instance: The data instance to validate
162+ :param validator: The JSON schema validator to use
163+ :param missing_ok: Indicates whether to filter out errors related to missing
164+ required properties
165+ :raises JsonschemaValidationError: If the metadata instance is invalid, and there
166+ are errors detected in the validation, optionally discounting errors
167+ related to missing required properties. An instance of this exception containing
168+ a list of `jsonschema.exceptions.ValidationError` instances representing all the
169+ (remaining) errors detected in the validation
170+ """
171+ try :
172+ validate_json (instance , validator )
173+ except JsonschemaValidationError as e :
174+ if missing_ok :
175+ remaining_errs = [
176+ err for err in e .errors if "is a required property" not in err .message
177+ ]
178+ # Raise an exception only if there are errors left after filtering
179+ if remaining_errs :
180+ raise JsonschemaValidationError (remaining_errs ) from e
181+ else :
182+ raise e
171183
172184
173185def _validate_dandiset_json (data : dict , schema_dir : Union [str , Path ]) -> None :
174186 with Path (schema_dir , "dandiset.json" ).open () as fp :
175187 schema = json .load (fp )
176- _validate_obj_json (data , schema )
188+ _validate_obj_json (data , dandi_jsonschema_validator ( schema ) )
177189
178190
179191def _validate_asset_json (data : dict , schema_dir : Union [str , Path ]) -> None :
180192 with Path (schema_dir , "asset.json" ).open () as fp :
181193 schema = json .load (fp )
182- _validate_obj_json (data , schema )
194+ _validate_obj_json (data , dandi_jsonschema_validator (schema ))
195+
183196
197+ @cache
198+ def _get_jsonschema_validator (
199+ schema_version : str , schema_key : str
200+ ) -> JsonschemaValidator :
201+ """
202+ Get jsonschema validator for validating instances against a specific DANDI schema
203+
204+ :param schema_version: The version of the specific DANDI schema
205+ :param schema_key: The schema key that identifies the specific DANDI schema
206+ :return: The jsonschema validator appropriate for validating instances against the
207+ specific DANDI schema
208+ :raises ValueError: If the provided schema version is among the allowed versions,
209+ `ALLOWED_VALIDATION_SCHEMAS`
210+ :raises ValueError: If the provided schema key is not among the keys in `SCHEMA_MAP`
211+ :raises requests.HTTPError: If the schema cannot be fetched from the `dandi/schema`
212+ repository
213+ :raises RuntimeError: If the fetched schema is not a valid JSON object
214+ """
215+ if schema_version not in ALLOWED_VALIDATION_SCHEMAS :
216+ raise ValueError (
217+ f"DANDI schema version { schema_version } is not allowed. "
218+ f"Allowed are: { ', ' .join (ALLOWED_VALIDATION_SCHEMAS )} ."
219+ )
220+ if schema_key not in SCHEMA_MAP :
221+ raise ValueError (
222+ f"Schema key must be one of { ', ' .join (map (repr , SCHEMA_MAP .keys ()))} "
223+ )
184224
185- @lru_cache
186- def _get_schema (schema_version : str , schema_name : str ) -> Any :
187- r = requests .get (
188- "https://raw.githubusercontent.com/dandi/schema/"
189- f"master/releases/{ schema_version } /{ schema_name } "
225+ # Fetch the schema from the `dandi/schema` repository
226+ schema_url = (
227+ f"https://raw.githubusercontent.com/dandi/schema/"
228+ f"master/releases/{ schema_version } /{ SCHEMA_MAP [schema_key ]} "
190229 )
230+ r = requests .get (schema_url )
191231 r .raise_for_status ()
192- return r .json ()
232+ schema = r .json ()
233+
234+ # Validate that the retrieved schema is a valid JSON object, i.e., a dictionary
235+ # This step is needed because the `jsonschema` package requires the schema to be a
236+ # `Mapping[str, Any]` object
237+ try :
238+ json_object_adapter .validate_python (schema )
239+ except pydantic .ValidationError as e :
240+ msg = (
241+ f"The JSON schema at { schema_url } is not a valid JSON object. "
242+ f"Received: { schema } "
243+ )
244+ raise RuntimeError (msg ) from e
245+
246+ # Create a jsonschema validator for the schema
247+ return dandi_jsonschema_validator (schema )
248+
249+
250+ @cache
251+ def _get_jsonschema_validator_local (schema_key : str ) -> JsonschemaValidator :
252+ """
253+ Get jsonschema validator for validating instances against a specific DANDI schema
254+ generated from the corresponding locally defined Pydantic model
255+
256+ :param schema_key: The schema key that identifies the specific DANDI schema
257+ :raises ValueError: If the provided schema key is not among the keys in `SCHEMA_MAP`
258+ """
259+ if schema_key not in SCHEMA_MAP :
260+ raise ValueError (
261+ f"Schema key must be one of { ', ' .join (map (repr , SCHEMA_MAP .keys ()))} "
262+ )
263+
264+ # The pydantic model with the specified schema key
265+ m : type [pydantic .BaseModel ] = getattr (models , schema_key )
266+
267+ return dandi_jsonschema_validator (
268+ m .model_json_schema (schema_generator = TransitionalGenerateJsonSchema )
269+ )
193270
194271
195272def validate (
@@ -232,25 +309,22 @@ def validate(
232309 if schema_key is None :
233310 raise ValueError ("Provided object has no known schemaKey" )
234311 schema_version = schema_version or obj .get ("schemaVersion" )
235- if schema_version not in ALLOWED_VALIDATION_SCHEMAS and schema_key in schema_map :
312+ if schema_version not in ALLOWED_VALIDATION_SCHEMAS and schema_key in SCHEMA_MAP :
236313 raise ValueError (
237314 f"Metadata version { schema_version } is not allowed. "
238315 f"Allowed are: { ', ' .join (ALLOWED_VALIDATION_SCHEMAS )} ."
239316 )
240317 if json_validation :
241318 if schema_version == DANDI_SCHEMA_VERSION :
242- klass = getattr (models , schema_key )
243- schema = klass .model_json_schema (
244- schema_generator = TransitionalGenerateJsonSchema
245- )
319+ jvalidator = _get_jsonschema_validator_local (schema_key )
246320 else :
247- if schema_key not in schema_map :
321+ if schema_key not in SCHEMA_MAP :
248322 raise ValueError (
249323 "Only dandisets and assets can be validated "
250324 "using json schema for older versions"
251325 )
252- schema = _get_schema (schema_version , schema_map [ schema_key ] )
253- _validate_obj_json (obj , schema , missing_ok )
326+ jvalidator = _get_jsonschema_validator (schema_version , schema_key )
327+ _validate_obj_json (obj , jvalidator , missing_ok = missing_ok )
254328 klass = getattr (models , schema_key )
255329 try :
256330 klass (** obj )
@@ -358,8 +432,7 @@ def migrate(
358432 # Optionally validate the instance against the DANDI schema it specifies
359433 # before migration
360434 if not skip_validation :
361- schema = _get_schema (obj_ver , "dandiset.json" )
362- _validate_obj_json (obj , schema )
435+ _validate_obj_json (obj , _get_jsonschema_validator (obj_ver , "Dandiset" ))
363436
364437 obj_migrated = deepcopy (obj )
365438
0 commit comments