11"""
2- Schema APIs
2+ Schema APIs
33
4- Logical document structure in this module:
5- {
6- "url": <schema_url>,
7- "namespace": <schema_name>,
8- "source": { ... } // only in returned docs for now
9- }
4+ Logical document structure in this module:
5+ {
6+ "url": <schema_url>,
7+ "namespace": <schema_name>,
8+ "source": { ... } // only in returned docs for now
9+ }
1010
11- Add read-only protection to cores schemas.
12- Add authentication and permission control.
13- Add convenience features to assist frontend rendering.
11+ Add read-only protection to cores schemas.
12+ Add authentication and permission control.
13+ Add convenience features to assist frontend rendering.
1414
1515"""
1616
1717import json
18- import re
1918import logging
19+ import re
2020from datetime import date , datetime
2121
2222import certifi
2626from discovery .model .schema import Schema
2727from discovery .notify import SchemaNotifier
2828from discovery .registry import schemas
29- from discovery .utils .adapters import SchemaAdapter
3029from discovery .registry .common import NoEntityError
30+ from discovery .utils .adapters import SchemaAdapter
3131
3232from .base import APIBaseHandler , authenticated , registryOperation
3333
@@ -88,7 +88,9 @@ def trace_root(klass):
8888 while index < len (queue ):
8989 for parent_line_string in klass .get ("parent_classes" , []):
9090 parents = parent_line_string .split (", " )
91- ids = [(parent .split (":" )[0 ], parent ) for parent in parents if ":" in parent ][::- 1 ]
91+ ids = [
92+ (parent .split (":" )[0 ], parent ) for parent in parents if ":" in parent
93+ ][::- 1 ]
9294 for _id in ids :
9395 klass = schemas .get_class (_id [0 ], _id [1 ])
9496 if klass and klass not in queue :
@@ -128,7 +130,10 @@ class SchemaRegistryHandler(APIBaseHandler):
128130 "verbose" : {"type" : bool , "default" : False , "alias" : ["v" ]},
129131 "start" : {"type" : int , "default" : 0 , "alias" : ["from" , "skip" ]},
130132 "size" : {"type" : int , "default" : 10 , "max" : 100 , "alias" : "skip" },
131- "context" : {"type" : bool , "default" : True }, # consider not default in future
133+ "context" : {
134+ "type" : bool ,
135+ "default" : True ,
136+ }, # consider not default in future
132137 "source" : {"type" : bool , "default" : True },
133138 },
134139 }
@@ -225,7 +230,7 @@ def get(self, namespace=None, curie=None):
225230 _fields = [x .strip () for x in self .args .field .split ("," )]
226231 if not ("_meta" in _fields or "_meta.url" in _fields ):
227232 _fields .append ("_meta" ) # always include _meta.url in the response
228- if not ( "_status" in _fields ) :
233+ if "_status" not in _fields :
229234 _fields .append ("_status" )
230235 hits = [
231236 to_api_doc_repr (schema )
@@ -364,7 +369,7 @@ class SchemaViewHandler(APIBaseHandler):
364369 }, # indicates the special target namespace of the schema, e.g. schema.org or bioschemas.
365370 "validation_merge" : {
366371 "type" : bool ,
367- "default" : False
372+ "default" : False ,
368373 }, # whether to merge validation schemas from parent classes
369374 }
370375 }
@@ -392,31 +397,39 @@ async def get(self):
392397 doc = None
393398 if self .args .url :
394399 # load doc from url
395- response = await AsyncHTTPClient ().fetch (self .args .url , ca_certs = certifi .where ())
400+ response = await AsyncHTTPClient ().fetch (
401+ self .args .url , ca_certs = certifi .where ()
402+ )
396403 doc = response .body
397404 elif self .request .body :
398405 # load doc from request body
399406 doc = self .request .body
400407 if doc :
401408 doc = json .loads (doc )
402409 # Use the validation_merge parameter from query args, defaults to False
403- validation_merge = getattr (self .args , 'validation_merge' , False )
404- validator_options = {"validation_merge" : validation_merge , "raise_on_validation_error" : False }
410+ validation_merge = getattr (self .args , "validation_merge" , False )
411+ validator_options = {
412+ "validation_merge" : validation_merge ,
413+ "raise_on_validation_error" : False ,
414+ }
415+ schema_org_version = schemas .get_schema_org_version ()
416+ _kwargs = {
417+ "validator_options" : validator_options ,
418+ "schema_org_version" : schema_org_version ,
419+ }
405420 if self .args .ns :
406421 if self .args .ns == "schema.org" :
407422 # do no load any base schemas
408- schema = SchemaAdapter (
409- doc , base_schema = [], validator_options = validator_options
410- )
423+ schema = SchemaAdapter (doc , base_schema = [], ** _kwargs )
411424 # elif self.args.ns == "bioschemas":
412425 # # do not load bioschemas, only schema.org
413426 # schema = SchemaAdapter(
414- # doc, base_schema=["schema.org"], validator_options=validator_options
427+ # doc, base_schema=["schema.org"], **_kwargs
415428 # )
416429 else :
417- schema = SchemaAdapter (doc , validator_options = validator_options )
430+ schema = SchemaAdapter (doc , ** _kwargs )
418431 else :
419- schema = SchemaAdapter (doc , validator_options = validator_options )
432+ schema = SchemaAdapter (doc , ** _kwargs )
420433 else :
421434 self .finish ({})
422435 return
@@ -459,7 +472,7 @@ class SchemaHandler(APIBaseHandler):
459472 "default" : "json" ,
460473 "enum" : ("json" , "yaml" , "html" , "msgpack" ),
461474 }
462- }
475+ },
463476 }
464477
465478 def class_property_filter (self , metadata , class_id ):
@@ -482,7 +495,9 @@ def class_property_filter(self, metadata, class_id):
482495 property_list .append (data_dict )
483496 break
484497 elif "schema:domainIncludes" not in data_dict :
485- raise HTTPError (400 , reason = "No key 'schema:domainIncludes' found." )
498+ raise HTTPError (
499+ 400 , reason = "No key 'schema:domainIncludes' found."
500+ )
486501 else :
487502 # odd case -- error exception case
488503 raise HTTPError (
@@ -493,7 +508,9 @@ def class_property_filter(self, metadata, class_id):
493508
494509 def get_context_matches (self , metadata , context_dict ):
495510 matches = []
496- pattern = re .compile (r"^([a-zA-Z0-9_-]+):([a-zA-Z0-9_-]+)$" ) # Regex to match STRINGA:STRINGB
511+ pattern = re .compile (
512+ r"^([a-zA-Z0-9_-]+):([a-zA-Z0-9_-]+)$"
513+ ) # Regex to match STRINGA:STRINGB
497514
498515 def recursive_search (data ):
499516 if isinstance (data , dict ):
@@ -509,6 +526,7 @@ def recursive_search(data):
509526 prefix = match .group (1 )
510527 if prefix in context_dict :
511528 matches .append (prefix )
529+
512530 recursive_search (metadata )
513531 return set (matches )
514532
@@ -541,7 +559,7 @@ def build_schema_org_context_dict(self, metadata):
541559 "vann" : "http://purl.org/vocab/vann/" ,
542560 "void" : "http://rdfs.org/ns/void#" ,
543561 "xsd" : "http://www.w3.org/2001/XMLSchema#" ,
544- "cvisb" : "https://data.cvisb.org/schema"
562+ "cvisb" : "https://data.cvisb.org/schema" ,
545563 }
546564
547565 matches = self .get_context_matches (metadata , context_dict )
@@ -550,17 +568,17 @@ def build_schema_org_context_dict(self, metadata):
550568
551569 def add_schema_org_property_to_list (self , data_dict , property_list ):
552570 temp_dict = {
553- "@id" : data_dict [' curie' ],
571+ "@id" : data_dict [" curie" ],
554572 "@type" : "rdf:Property" ,
555- "rdfs:comment" : data_dict [' description' ],
556- "rdfs:label" : data_dict [' label' ],
557- "schema:domainIncludes" : [{"@id" : value } for value in data_dict [' domain' ]],
558- "schema:rangeIncludes" : [{"@id" : value } for value in data_dict [' range' ]],
573+ "rdfs:comment" : data_dict [" description" ],
574+ "rdfs:label" : data_dict [" label" ],
575+ "schema:domainIncludes" : [{"@id" : value } for value in data_dict [" domain" ]],
576+ "schema:rangeIncludes" : [{"@id" : value } for value in data_dict [" range" ]],
559577 }
560578 property_list .append (temp_dict )
561579
562580 def filter_schema_org_class_with_properties (self , metadata , property_list ):
563- class_dict = {
581+ class_dict = {
564582 "@id" : metadata ["_id" ].replace ("schema::" , "" , 1 ),
565583 "@type" : "rdfs:Class" ,
566584 "rdfs:comment" : metadata ["description" ],
@@ -569,7 +587,7 @@ def filter_schema_org_class_with_properties(self, metadata, property_list):
569587 }
570588
571589 property_list .append (class_dict )
572- for data_dict in metadata [' properties' ]:
590+ for data_dict in metadata [" properties" ]:
573591 self .add_schema_org_property_to_list (data_dict , property_list )
574592 return property_list
575593
@@ -609,10 +627,16 @@ def graph_data_filter(self, metadata, curie, property_list):
609627 return property_list
610628
611629 def raise_404_not_found_error (self , curie ):
612- raise HTTPError (404 , reason = f"The requested namespace or class, { curie } , does not exist in registry." )
630+ raise HTTPError (
631+ 404 ,
632+ reason = f"The requested namespace or class, { curie } , does not exist in registry." ,
633+ )
613634
614635 def raise_404_no_validation_error (self , curie ):
615- raise HTTPError (404 , reason = f"The validation schema is not provided for this class or property: { curie } " )
636+ raise HTTPError (
637+ 404 ,
638+ reason = f"The validation schema is not provided for this class or property: { curie } " ,
639+ )
616640
617641 def get_curie (self , metadata , curie , ns ):
618642 """
@@ -635,23 +659,37 @@ def get_curie(self, metadata, curie, ns):
635659 if ns == "schema" :
636660 try :
637661 klass = schemas .get_class ("schema" , curie_str )
638- property_list = self .filter_schema_org_class_with_properties (klass , property_list )
662+ property_list = self .filter_schema_org_class_with_properties (
663+ klass , property_list
664+ )
639665 except NoEntityError as no_class_error :
640666 try :
641- logger .info (f"Error retrieving schema class: { no_class_error } , attempting to retrieve property instead..." )
667+ logger .info (
668+ f"Error retrieving schema class: { no_class_error } , attempting to retrieve property instead..."
669+ )
642670 property_label = curie_str .split (":" )[1 ]
643- klass = schemas .get_schema_org_property (property_label )
644- property_list = self .filter_schema_org_property (klass , property_list )
671+ klass = schemas .get_schema_org_property (property_label )
672+ property_list = self .filter_schema_org_property (
673+ klass , property_list
674+ )
645675 except NoEntityError as no_property_error :
646- logger .info (f"Error retrieving schema class: { no_property_error } , attempting to retrieve property instead..." )
676+ logger .info (
677+ f"Error retrieving schema class: { no_property_error } , attempting to retrieve property instead..."
678+ )
647679 self .raise_404_not_found_error (curie )
648680 # set the context property for schema.org
649- metadata ["@context" ] = self .build_schema_org_context_dict (property_list )
681+ metadata ["@context" ] = self .build_schema_org_context_dict (
682+ property_list
683+ )
650684 else :
651- property_list = self .graph_data_filter (metadata , curie_str , property_list )
685+ property_list = self .graph_data_filter (
686+ metadata , curie_str , property_list
687+ )
652688 elif isinstance (curie , list ):
653689 for curie_str in curie :
654- property_list = self .graph_data_filter (metadata , curie_str , property_list )
690+ property_list = self .graph_data_filter (
691+ metadata , curie_str , property_list
692+ )
655693 else :
656694 raise HTTPError (400 , reason = "Unidentified curie input request" )
657695
@@ -772,12 +810,18 @@ def get(self, curie=None, validation=None):
772810 # if no curie is given, throw error
773811 if curie is None :
774812 raise HTTPError (
775- 400 , reason = "A curie with a namespace prefix is required, i.e 'n3c:Dataset'"
813+ 400 ,
814+ reason = "A curie with a namespace prefix is required, i.e 'n3c:Dataset'" ,
776815 )
777816
778817 # curie: /{ns}
779818 if ":" not in curie and validation :
780- raise (HTTPError (400 , reason = "A validation request must be for a class or property, not a namespace." ))
819+ raise (
820+ HTTPError (
821+ 400 ,
822+ reason = "A validation request must be for a class or property, not a namespace." ,
823+ )
824+ )
781825
782826 elif ":" not in curie :
783827 self .handle_namespace_request (curie )
@@ -790,7 +834,9 @@ def get(self, curie=None, validation=None):
790834 # check if request has too many ns fields
791835 ns_list = list (set ([x .split (":" )[0 ] for x in curie .split ("," )]))
792836 if len (ns_list ) > 1 :
793- raise HTTPError (400 , reason = "Too many schemas(namespaces) requested" )
837+ raise HTTPError (
838+ 400 , reason = "Too many schemas(namespaces) requested"
839+ )
794840 else :
795841 ns = curie .split (":" )[0 ]
796842
@@ -810,6 +856,7 @@ def get(self, curie=None, validation=None):
810856 else :
811857 self .handle_class_request (curie , schema_metadata )
812858
859+
813860class CoverageHandler (APIBaseHandler ):
814861 """
815862 Fetch - GET ./api/coverage
@@ -836,5 +883,7 @@ def get(self, curie=None):
836883 except (ValueError , KeyError ) as error :
837884 raise HTTPError (400 , reason = f"No coverage found because: { error } " )
838885 except Exception as error :
839- raise HTTPError (400 , reason = f"Error retrieving coverage with exception { error } " )
886+ raise HTTPError (
887+ 400 , reason = f"Error retrieving coverage with exception { error } "
888+ )
840889 self .finish (coverage )
0 commit comments