@@ -134,7 +134,7 @@ def rdm_creators_contributors(person_list, peopleroles):
134134
135135def customize_schema_rdm (json_record ):
136136 # Get vocabularies used in InvenioRDM
137-
137+
138138 vocabularies = get_vocabularies ()
139139 validate_metadata (json_record )
140140 peopleroles = vocabularies ["crr" ]
@@ -386,6 +386,7 @@ def customize_schema_rdm(json_record):
386386
387387 return final
388388
389+
389390def validate_metadata (json_record ):
390391 """
391392 Validates the presence and structure of required fields in a CaltechDATA JSON record.
@@ -394,122 +395,161 @@ def validate_metadata(json_record):
394395 errors = []
395396
396397 # Check for 'types' and 'resourceTypeGeneral'
397- if ' types' not in json_record :
398+ if " types" not in json_record :
398399 errors .append ("'types' field is missing." )
399- elif not isinstance (json_record [' types' ], dict ):
400+ elif not isinstance (json_record [" types" ], dict ):
400401 errors .append ("'types' field should be a dictionary." )
401- elif ' resourceTypeGeneral' not in json_record [' types' ]:
402+ elif " resourceTypeGeneral" not in json_record [" types" ]:
402403 errors .append ("'resourceTypeGeneral' field is missing in 'types'." )
403404
404405 # Check for 'title'
405- if ' titles' not in json_record :
406+ if " titles" not in json_record :
406407 errors .append ("'titles' field is missing." )
407- elif not isinstance (json_record [' titles' ], list ) or len (json_record [' titles' ]) == 0 :
408+ elif not isinstance (json_record [" titles" ], list ) or len (json_record [" titles" ]) == 0 :
408409 errors .append ("'titles' should be a non-empty list." )
409410 else :
410411 # Ensure each title is a dictionary with 'title' field
411- for title in json_record ['titles' ]:
412- if not isinstance (title , dict ) or 'title' not in title :
413- errors .append ("Each entry in 'titles' must be a dictionary with a 'title' key." )
412+ for title in json_record ["titles" ]:
413+ if not isinstance (title , dict ) or "title" not in title :
414+ errors .append (
415+ "Each entry in 'titles' must be a dictionary with a 'title' key."
416+ )
414417
415418 # Check for 'publication_date'
416- if 'publicationYear' not in json_record and 'dates' not in json_record :
417- errors .append ("A publication date is required ('publicationYear' or 'dates' field is missing)." )
418- if 'dates' in json_record :
419- if not isinstance (json_record ['dates' ], list ):
419+ if "publicationYear" not in json_record and "dates" not in json_record :
420+ errors .append (
421+ "A publication date is required ('publicationYear' or 'dates' field is missing)."
422+ )
423+ if "dates" in json_record :
424+ if not isinstance (json_record ["dates" ], list ):
420425 errors .append ("'dates' should be a list." )
421426 else :
422- for date_entry in json_record ['dates' ]:
423- if not isinstance (date_entry , dict ) or 'dateType' not in date_entry or 'date' not in date_entry :
424- errors .append ("Each entry in 'dates' must be a dictionary with 'dateType' and 'date' keys." )
427+ for date_entry in json_record ["dates" ]:
428+ if (
429+ not isinstance (date_entry , dict )
430+ or "dateType" not in date_entry
431+ or "date" not in date_entry
432+ ):
433+ errors .append (
434+ "Each entry in 'dates' must be a dictionary with 'dateType' and 'date' keys."
435+ )
425436
426437 # Check for 'creators'
427- if ' creators' not in json_record :
438+ if " creators" not in json_record :
428439 errors .append ("'creators' field is missing." )
429- elif not isinstance (json_record ['creators' ], list ) or len (json_record ['creators' ]) == 0 :
440+ elif (
441+ not isinstance (json_record ["creators" ], list )
442+ or len (json_record ["creators" ]) == 0
443+ ):
430444 errors .append ("'creators' should be a non-empty list." )
431445 else :
432- for creator in json_record ['creators' ]:
433- if not isinstance (creator , dict ) or 'name' not in creator :
434- errors .append ("Each creator in 'creators' must be a dictionary with a 'name' key." )
446+ for creator in json_record ["creators" ]:
447+ if not isinstance (creator , dict ) or "name" not in creator :
448+ errors .append (
449+ "Each creator in 'creators' must be a dictionary with a 'name' key."
450+ )
435451
436452 # Check for 'contributors'
437- if ' contributors' in json_record :
438- if not isinstance (json_record [' contributors' ], list ):
453+ if " contributors" in json_record :
454+ if not isinstance (json_record [" contributors" ], list ):
439455 errors .append ("'contributors' should be a list." )
440456 else :
441- for contributor in json_record ['contributors' ]:
442- if not isinstance (contributor , dict ) or 'name' not in contributor :
443- errors .append ("Each contributor must be a dictionary with a 'name' key." )
457+ for contributor in json_record ["contributors" ]:
458+ if not isinstance (contributor , dict ) or "name" not in contributor :
459+ errors .append (
460+ "Each contributor must be a dictionary with a 'name' key."
461+ )
444462
445463 # Check for 'resourceType'
446- if ' resourceType' not in json_record [' types' ]:
464+ if " resourceType" not in json_record [" types" ]:
447465 errors .append ("'resourceType' field is missing in 'types'." )
448- elif not isinstance (json_record [' types' ][ ' resourceType' ], str ):
466+ elif not isinstance (json_record [" types" ][ " resourceType" ], str ):
449467 errors .append ("'resourceType' should be a string." )
450468
451469 # Check for 'identifiers'
452- if ' identifiers' in json_record :
453- if not isinstance (json_record [' identifiers' ], list ):
470+ if " identifiers" in json_record :
471+ if not isinstance (json_record [" identifiers" ], list ):
454472 errors .append ("'identifiers' should be a list." )
455473 else :
456- for identifier in json_record ['identifiers' ]:
457- if not isinstance (identifier , dict ) or 'identifier' not in identifier or 'identifierType' not in identifier :
458- errors .append ("Each identifier must be a dictionary with 'identifier' and 'identifierType' keys." )
474+ for identifier in json_record ["identifiers" ]:
475+ if (
476+ not isinstance (identifier , dict )
477+ or "identifier" not in identifier
478+ or "identifierType" not in identifier
479+ ):
480+ errors .append (
481+ "Each identifier must be a dictionary with 'identifier' and 'identifierType' keys."
482+ )
459483
460484 # Check for 'subjects'
461- if ' subjects' in json_record :
462- if not isinstance (json_record [' subjects' ], list ):
485+ if " subjects" in json_record :
486+ if not isinstance (json_record [" subjects" ], list ):
463487 errors .append ("'subjects' should be a list." )
464488 else :
465- for subject in json_record ['subjects' ]:
466- if not isinstance (subject , dict ) or 'subject' not in subject :
467- errors .append ("Each subject must be a dictionary with a 'subject' key." )
489+ for subject in json_record ["subjects" ]:
490+ if not isinstance (subject , dict ) or "subject" not in subject :
491+ errors .append (
492+ "Each subject must be a dictionary with a 'subject' key."
493+ )
468494
469495 # Check for 'relatedIdentifiers'
470- if ' relatedIdentifiers' in json_record :
471- if not isinstance (json_record [' relatedIdentifiers' ], list ):
496+ if " relatedIdentifiers" in json_record :
497+ if not isinstance (json_record [" relatedIdentifiers" ], list ):
472498 errors .append ("'relatedIdentifiers' should be a list." )
473499 else :
474- for related_id in json_record ['relatedIdentifiers' ]:
475- if not isinstance (related_id , dict ) or 'relatedIdentifier' not in related_id :
476- errors .append ("Each relatedIdentifier must be a dictionary with a 'relatedIdentifier' key." )
500+ for related_id in json_record ["relatedIdentifiers" ]:
501+ if (
502+ not isinstance (related_id , dict )
503+ or "relatedIdentifier" not in related_id
504+ ):
505+ errors .append (
506+ "Each relatedIdentifier must be a dictionary with a 'relatedIdentifier' key."
507+ )
477508
478509 # Check for 'rightsList'
479- if ' rightsList' in json_record :
480- if not isinstance (json_record [' rightsList' ], list ):
510+ if " rightsList" in json_record :
511+ if not isinstance (json_record [" rightsList" ], list ):
481512 errors .append ("'rightsList' should be a list." )
482513 else :
483- for rights in json_record ['rightsList' ]:
484- if not isinstance (rights , dict ) or 'rights' not in rights :
485- errors .append ("Each entry in 'rightsList' must be a dictionary with a 'rights' key." )
514+ for rights in json_record ["rightsList" ]:
515+ if not isinstance (rights , dict ) or "rights" not in rights :
516+ errors .append (
517+ "Each entry in 'rightsList' must be a dictionary with a 'rights' key."
518+ )
486519
487520 # Check for 'geoLocations'
488- if ' geoLocations' in json_record :
489- if not isinstance (json_record [' geoLocations' ], list ):
521+ if " geoLocations" in json_record :
522+ if not isinstance (json_record [" geoLocations" ], list ):
490523 errors .append ("'geoLocations' should be a list." )
491524 else :
492- for location in json_record [' geoLocations' ]:
525+ for location in json_record [" geoLocations" ]:
493526 if not isinstance (location , dict ):
494527 errors .append ("Each entry in 'geoLocations' must be a dictionary." )
495- elif 'geoLocationPoint' not in location and 'geoLocationBox' not in location and 'geoLocationPlace' not in location :
496- errors .append ("Each geoLocation entry must contain at least one of 'geoLocationPoint', 'geoLocationBox', or 'geoLocationPlace'." )
528+ elif (
529+ "geoLocationPoint" not in location
530+ and "geoLocationBox" not in location
531+ and "geoLocationPlace" not in location
532+ ):
533+ errors .append (
534+ "Each geoLocation entry must contain at least one of 'geoLocationPoint', 'geoLocationBox', or 'geoLocationPlace'."
535+ )
497536
498537 # Check for 'fundingReferences'
499- if ' fundingReferences' in json_record :
500- if not isinstance (json_record [' fundingReferences' ], list ):
538+ if " fundingReferences" in json_record :
539+ if not isinstance (json_record [" fundingReferences" ], list ):
501540 errors .append ("'fundingReferences' should be a list." )
502541 else :
503- for funding in json_record [' fundingReferences' ]:
542+ for funding in json_record [" fundingReferences" ]:
504543 if not isinstance (funding , dict ):
505544 errors .append ("Each funding reference must be a dictionary." )
506- if ' funderName' not in funding :
545+ if " funderName" not in funding :
507546 errors .append ("Each funding reference must contain 'funderName'." )
508547
509548 # Return errors if any are found
510549 if errors :
511550 raise ValueError (f"Validation errors in metadata: { ', ' .join (errors )} " )
512551
552+
513553if __name__ == "__main__" :
514554 # Read in from file for demo purposes
515555
0 commit comments