@@ -394,73 +394,103 @@ def validate_metadata(json_record):
394394 """
395395 errors = []
396396
397+
397398 if "titles" not in json_record :
398399 errors .append ("'titles' field is missing." )
399400 elif not isinstance (json_record ["titles" ], list ) or len (json_record ["titles" ]) == 0 :
400401 errors .append ("'titles' should be a non-empty list." )
401402 else :
403+
404+ # Ensure each title is a dictionary with 'title' field
402405 for title in json_record ["titles" ]:
403406 if not isinstance (title , dict ) or "title" not in title :
404- errors .append ("Each entry in 'titles' must be a dictionary with a 'title' key." )
405-
407+ errors .append (
408+ "Each entry in 'titles' must be a dictionary with a 'title' key."
409+ )
410+
411+ # Publication date is handled by customize function
412+
413+ # Check for 'creators'
414+ if "creators" not in json_record :
415+ errors .append ("'creators' field is missing." )
416+ elif (
417+ not isinstance (json_record ["creators" ], list )
418+ or len (json_record ["creators" ]) == 0
419+ ):
420+ errors .append ("'creators' should be a non-empty list." )
421+ else :
422+ for creator in json_record ["creators" ]:
423+ if not isinstance (creator , dict ) or "name" not in creator :
424+ errors .append (
425+ "Each creator in 'creators' must be a dictionary with a 'name' key."
426+ )
427+
406428 # Check for 'contributors'
407429 if "contributors" in json_record :
408- if not isinstance (json_record ["contributors" ], list ) or len ( json_record [ "contributors" ]) == 0 :
409- errors .append ("'contributors' should be a non-empty list." )
430+ if not isinstance (json_record ["contributors" ], list ):
431+ errors .append ("'contributors' should be a list." )
410432 else :
411433 for contributor in json_record ["contributors" ]:
412- if not isinstance (contributor , dict ) or "name" not in contributor or "contributorType" not in contributor :
413- errors .append ("Each 'contributor' must have 'name' and 'contributorType'." )
414- if "nameIdentifiers" in contributor :
415- if not isinstance (contributor ["nameIdentifiers" ], list ):
416- errors .append ("'nameIdentifiers' should be a list." )
417- for name_id in contributor ["nameIdentifiers" ]:
418- if not isinstance (name_id , dict ) or "nameIdentifier" not in name_id or "nameIdentifierScheme" not in name_id :
419- errors .append ("Each 'nameIdentifier' should have 'nameIdentifier' and 'nameIdentifierScheme'." )
420- if "affiliation" in contributor :
421- if not isinstance (contributor ["affiliation" ], list ):
422- errors .append ("'affiliation' should be a list." )
423- for affiliation in contributor ["affiliation" ]:
424- if not isinstance (affiliation , dict ) or "name" not in affiliation :
425- errors .append ("Each 'affiliation' should have a 'name' key." )
426-
427- # Check for 'descriptions'
428- if "descriptions" not in json_record :
429- errors .append ("'descriptions' field is missing." )
430- elif not isinstance (json_record ["descriptions" ], list ) or len (json_record ["descriptions" ]) == 0 :
431- errors .append ("'descriptions' should be a non-empty list." )
432- else :
433- for description in json_record ["descriptions" ]:
434- if not isinstance (description , dict ) or "description" not in description or "descriptionType" not in description :
435- errors .append ("Each 'description' must have 'description' and 'descriptionType'." )
434+ if not isinstance (contributor , dict ) or "name" not in contributor :
435+ errors .append (
436+ "Each contributor must be a dictionary with a 'name' key."
437+ )
436438
437- # Check for 'fundingReferences'
438- if "fundingReferences" in json_record :
439- if not isinstance (json_record ["fundingReferences" ], list ):
440- errors .append ("'fundingReferences' should be a list." )
439+ # Check for 'resourceType'
440+ if "resourceType" not in json_record ["types" ]:
441+ errors .append ("'resourceType' field is missing in 'types'." )
442+ elif not isinstance (json_record ["types" ]["resourceType" ], str ):
443+ errors .append ("'resourceType' should be a string." )
444+
445+ # Check for 'identifiers'
446+ if "identifiers" in json_record :
447+ if not isinstance (json_record ["identifiers" ], list ):
448+ errors .append ("'identifiers' should be a list." )
441449 else :
442- for fund_ref in json_record ["fundingReferences" ]:
443- if not isinstance (fund_ref , dict ) or "funderName" not in fund_ref :
444- errors .append ("Each 'fundingReference' must have 'funderName'." )
445- if "funderIdentifier" in fund_ref and "funderIdentifierType" not in fund_ref :
446- errors .append ("'funderIdentifier' should have an associated 'funderIdentifierType'." )
450+ for identifier in json_record ["identifiers" ]:
451+ if (
452+ not isinstance (identifier , dict )
453+ or "identifier" not in identifier
454+ or "identifierType" not in identifier
455+ ):
456+ errors .append (
457+ "Each identifier must be a dictionary with 'identifier' and 'identifierType' keys."
458+ )
459+
460+ # Check for 'subjects'
461+ if "subjects" in json_record :
462+ if not isinstance (json_record ["subjects" ], list ):
463+ errors .append ("'subjects' should be a list." )
464+ else :
465+ for subject in json_record ["subjects" ]:
466+ if not isinstance (subject , dict ) or "subject" not in subject :
467+ errors .append (
468+ "Each subject must be a dictionary with a 'subject' key."
469+ )
470+
447471
448472 # Check for 'relatedIdentifiers'
449473 if "relatedIdentifiers" in json_record :
450474 if not isinstance (json_record ["relatedIdentifiers" ], list ):
451475 errors .append ("'relatedIdentifiers' should be a list." )
452476 else :
453477 for related_id in json_record ["relatedIdentifiers" ]:
454- if not isinstance (related_id , dict ) or "relatedIdentifier" not in related_id or "relationType" not in related_id :
455- errors .append ("Each 'relatedIdentifier' must have 'relatedIdentifier' and 'relationType'." )
456- if "relatedIdentifierType" not in related_id :
457- errors .append ("Each 'relatedIdentifier' must have 'relatedIdentifierType'." )
478+
479+ if (
480+ not isinstance (related_id , dict )
481+ or "relatedIdentifier" not in related_id
482+ ):
483+ errors .append (
484+ "Each relatedIdentifier must be a dictionary with a 'relatedIdentifier' key."
485+ )
486+
458487
459488 # Check for 'rightsList'
460489 if "rightsList" in json_record :
461490 if not isinstance (json_record ["rightsList" ], list ):
462491 errors .append ("'rightsList' should be a list." )
463492 else :
493+
464494 for right in json_record ["rightsList" ]:
465495 if not isinstance (right , dict ) or "rights" not in right :
466496 errors .append ("Each 'rightsList' entry must have 'rights'." )
@@ -512,11 +542,18 @@ def validate_metadata(json_record):
512542 if not isinstance (affiliation , dict ) or "name" not in affiliation :
513543 errors .append ("Each 'affiliation' in 'creators' must have a 'name'." )
514544
545+ for rights in json_record ["rightsList" ]:
546+ if not isinstance (rights , dict ) or "rights" not in rights :
547+ errors .append (
548+ "Each entry in 'rightsList' must be a dictionary with a 'rights' key."
549+ )
550+
515551 # Check for 'geoLocations'
516552 if "geoLocations" in json_record :
517553 if not isinstance (json_record ["geoLocations" ], list ):
518554 errors .append ("'geoLocations' should be a list." )
519555 else :
556+
520557 for geo_loc in json_record ["geoLocations" ]:
521558 if not isinstance (geo_loc , dict ) or "geoLocationPlace" not in geo_loc :
522559 errors .append ("Each 'geoLocation' must have 'geoLocationPlace'." )
@@ -560,6 +597,29 @@ def validate_metadata(json_record):
560597 errors .append ("'types' must have 'resourceTypeGeneral'." )
561598 if "resourceType" in json_record ["types" ] and not isinstance (json_record ["types" ]["resourceType" ], str ):
562599 errors .append ("'resourceType' should be a string if provided." )
600+ == == == =
601+ for location in json_record ["geoLocations" ]:
602+ if not isinstance (location , dict ):
603+ errors .append ("Each entry in 'geoLocations' must be a dictionary." )
604+ elif (
605+ "geoLocationPoint" not in location
606+ and "geoLocationBox" not in location
607+ and "geoLocationPlace" not in location
608+ ):
609+ errors .append (
610+ "Each geoLocation entry must contain at least one of 'geoLocationPoint', 'geoLocationBox', or 'geoLocationPlace'."
611+ )
612+
613+ # Check for 'fundingReferences'
614+ if "fundingReferences" in json_record :
615+ if not isinstance (json_record ["fundingReferences" ], list ):
616+ errors .append ("'fundingReferences' should be a list." )
617+ else :
618+ for funding in json_record ["fundingReferences" ]:
619+ if not isinstance (funding , dict ):
620+ errors .append ("Each funding reference must be a dictionary." )
621+ if "funderName" not in funding :
622+ errors .append ("Each funding reference must contain 'funderName'." )
563623
564624 # Return errors if any are found
565625 if errors :
0 commit comments