Skip to content

Commit f46700a

Browse files
Update customize_schema.py
1 parent d248c29 commit f46700a

File tree

1 file changed

+97
-57
lines changed

1 file changed

+97
-57
lines changed

caltechdata_api/customize_schema.py

Lines changed: 97 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def rdm_creators_contributors(person_list, peopleroles):
134134

135135
def customize_schema_rdm(json_record):
136136
# Get vocabularies used in InvenioRDM
137-
137+
138138
vocabularies = get_vocabularies()
139139
validate_metadata(json_record)
140140
peopleroles = vocabularies["crr"]
@@ -386,6 +386,7 @@ def customize_schema_rdm(json_record):
386386

387387
return final
388388

389+
389390
def validate_metadata(json_record):
390391
"""
391392
Validates the presence and structure of required fields in a CaltechDATA JSON record.
@@ -394,122 +395,161 @@ def validate_metadata(json_record):
394395
errors = []
395396

396397
# Check for 'types' and 'resourceTypeGeneral'
397-
if 'types' not in json_record:
398+
if "types" not in json_record:
398399
errors.append("'types' field is missing.")
399-
elif not isinstance(json_record['types'], dict):
400+
elif not isinstance(json_record["types"], dict):
400401
errors.append("'types' field should be a dictionary.")
401-
elif 'resourceTypeGeneral' not in json_record['types']:
402+
elif "resourceTypeGeneral" not in json_record["types"]:
402403
errors.append("'resourceTypeGeneral' field is missing in 'types'.")
403404

404405
# Check for 'title'
405-
if 'titles' not in json_record:
406+
if "titles" not in json_record:
406407
errors.append("'titles' field is missing.")
407-
elif not isinstance(json_record['titles'], list) or len(json_record['titles']) == 0:
408+
elif not isinstance(json_record["titles"], list) or len(json_record["titles"]) == 0:
408409
errors.append("'titles' should be a non-empty list.")
409410
else:
410411
# Ensure each title is a dictionary with 'title' field
411-
for title in json_record['titles']:
412-
if not isinstance(title, dict) or 'title' not in title:
413-
errors.append("Each entry in 'titles' must be a dictionary with a 'title' key.")
412+
for title in json_record["titles"]:
413+
if not isinstance(title, dict) or "title" not in title:
414+
errors.append(
415+
"Each entry in 'titles' must be a dictionary with a 'title' key."
416+
)
414417

415418
# Check for 'publication_date'
416-
if 'publicationYear' not in json_record and 'dates' not in json_record:
417-
errors.append("A publication date is required ('publicationYear' or 'dates' field is missing).")
418-
if 'dates' in json_record:
419-
if not isinstance(json_record['dates'], list):
419+
if "publicationYear" not in json_record and "dates" not in json_record:
420+
errors.append(
421+
"A publication date is required ('publicationYear' or 'dates' field is missing)."
422+
)
423+
if "dates" in json_record:
424+
if not isinstance(json_record["dates"], list):
420425
errors.append("'dates' should be a list.")
421426
else:
422-
for date_entry in json_record['dates']:
423-
if not isinstance(date_entry, dict) or 'dateType' not in date_entry or 'date' not in date_entry:
424-
errors.append("Each entry in 'dates' must be a dictionary with 'dateType' and 'date' keys.")
427+
for date_entry in json_record["dates"]:
428+
if (
429+
not isinstance(date_entry, dict)
430+
or "dateType" not in date_entry
431+
or "date" not in date_entry
432+
):
433+
errors.append(
434+
"Each entry in 'dates' must be a dictionary with 'dateType' and 'date' keys."
435+
)
425436

426437
# Check for 'creators'
427-
if 'creators' not in json_record:
438+
if "creators" not in json_record:
428439
errors.append("'creators' field is missing.")
429-
elif not isinstance(json_record['creators'], list) or len(json_record['creators']) == 0:
440+
elif (
441+
not isinstance(json_record["creators"], list)
442+
or len(json_record["creators"]) == 0
443+
):
430444
errors.append("'creators' should be a non-empty list.")
431445
else:
432-
for creator in json_record['creators']:
433-
if not isinstance(creator, dict) or 'name' not in creator:
434-
errors.append("Each creator in 'creators' must be a dictionary with a 'name' key.")
446+
for creator in json_record["creators"]:
447+
if not isinstance(creator, dict) or "name" not in creator:
448+
errors.append(
449+
"Each creator in 'creators' must be a dictionary with a 'name' key."
450+
)
435451

436452
# Check for 'contributors'
437-
if 'contributors' in json_record:
438-
if not isinstance(json_record['contributors'], list):
453+
if "contributors" in json_record:
454+
if not isinstance(json_record["contributors"], list):
439455
errors.append("'contributors' should be a list.")
440456
else:
441-
for contributor in json_record['contributors']:
442-
if not isinstance(contributor, dict) or 'name' not in contributor:
443-
errors.append("Each contributor must be a dictionary with a 'name' key.")
457+
for contributor in json_record["contributors"]:
458+
if not isinstance(contributor, dict) or "name" not in contributor:
459+
errors.append(
460+
"Each contributor must be a dictionary with a 'name' key."
461+
)
444462

445463
# Check for 'resourceType'
446-
if 'resourceType' not in json_record['types']:
464+
if "resourceType" not in json_record["types"]:
447465
errors.append("'resourceType' field is missing in 'types'.")
448-
elif not isinstance(json_record['types']['resourceType'], str):
466+
elif not isinstance(json_record["types"]["resourceType"], str):
449467
errors.append("'resourceType' should be a string.")
450468

451469
# Check for 'identifiers'
452-
if 'identifiers' in json_record:
453-
if not isinstance(json_record['identifiers'], list):
470+
if "identifiers" in json_record:
471+
if not isinstance(json_record["identifiers"], list):
454472
errors.append("'identifiers' should be a list.")
455473
else:
456-
for identifier in json_record['identifiers']:
457-
if not isinstance(identifier, dict) or 'identifier' not in identifier or 'identifierType' not in identifier:
458-
errors.append("Each identifier must be a dictionary with 'identifier' and 'identifierType' keys.")
474+
for identifier in json_record["identifiers"]:
475+
if (
476+
not isinstance(identifier, dict)
477+
or "identifier" not in identifier
478+
or "identifierType" not in identifier
479+
):
480+
errors.append(
481+
"Each identifier must be a dictionary with 'identifier' and 'identifierType' keys."
482+
)
459483

460484
# Check for 'subjects'
461-
if 'subjects' in json_record:
462-
if not isinstance(json_record['subjects'], list):
485+
if "subjects" in json_record:
486+
if not isinstance(json_record["subjects"], list):
463487
errors.append("'subjects' should be a list.")
464488
else:
465-
for subject in json_record['subjects']:
466-
if not isinstance(subject, dict) or 'subject' not in subject:
467-
errors.append("Each subject must be a dictionary with a 'subject' key.")
489+
for subject in json_record["subjects"]:
490+
if not isinstance(subject, dict) or "subject" not in subject:
491+
errors.append(
492+
"Each subject must be a dictionary with a 'subject' key."
493+
)
468494

469495
# Check for 'relatedIdentifiers'
470-
if 'relatedIdentifiers' in json_record:
471-
if not isinstance(json_record['relatedIdentifiers'], list):
496+
if "relatedIdentifiers" in json_record:
497+
if not isinstance(json_record["relatedIdentifiers"], list):
472498
errors.append("'relatedIdentifiers' should be a list.")
473499
else:
474-
for related_id in json_record['relatedIdentifiers']:
475-
if not isinstance(related_id, dict) or 'relatedIdentifier' not in related_id:
476-
errors.append("Each relatedIdentifier must be a dictionary with a 'relatedIdentifier' key.")
500+
for related_id in json_record["relatedIdentifiers"]:
501+
if (
502+
not isinstance(related_id, dict)
503+
or "relatedIdentifier" not in related_id
504+
):
505+
errors.append(
506+
"Each relatedIdentifier must be a dictionary with a 'relatedIdentifier' key."
507+
)
477508

478509
# Check for 'rightsList'
479-
if 'rightsList' in json_record:
480-
if not isinstance(json_record['rightsList'], list):
510+
if "rightsList" in json_record:
511+
if not isinstance(json_record["rightsList"], list):
481512
errors.append("'rightsList' should be a list.")
482513
else:
483-
for rights in json_record['rightsList']:
484-
if not isinstance(rights, dict) or 'rights' not in rights:
485-
errors.append("Each entry in 'rightsList' must be a dictionary with a 'rights' key.")
514+
for rights in json_record["rightsList"]:
515+
if not isinstance(rights, dict) or "rights" not in rights:
516+
errors.append(
517+
"Each entry in 'rightsList' must be a dictionary with a 'rights' key."
518+
)
486519

487520
# Check for 'geoLocations'
488-
if 'geoLocations' in json_record:
489-
if not isinstance(json_record['geoLocations'], list):
521+
if "geoLocations" in json_record:
522+
if not isinstance(json_record["geoLocations"], list):
490523
errors.append("'geoLocations' should be a list.")
491524
else:
492-
for location in json_record['geoLocations']:
525+
for location in json_record["geoLocations"]:
493526
if not isinstance(location, dict):
494527
errors.append("Each entry in 'geoLocations' must be a dictionary.")
495-
elif 'geoLocationPoint' not in location and 'geoLocationBox' not in location and 'geoLocationPlace' not in location:
496-
errors.append("Each geoLocation entry must contain at least one of 'geoLocationPoint', 'geoLocationBox', or 'geoLocationPlace'.")
528+
elif (
529+
"geoLocationPoint" not in location
530+
and "geoLocationBox" not in location
531+
and "geoLocationPlace" not in location
532+
):
533+
errors.append(
534+
"Each geoLocation entry must contain at least one of 'geoLocationPoint', 'geoLocationBox', or 'geoLocationPlace'."
535+
)
497536

498537
# Check for 'fundingReferences'
499-
if 'fundingReferences' in json_record:
500-
if not isinstance(json_record['fundingReferences'], list):
538+
if "fundingReferences" in json_record:
539+
if not isinstance(json_record["fundingReferences"], list):
501540
errors.append("'fundingReferences' should be a list.")
502541
else:
503-
for funding in json_record['fundingReferences']:
542+
for funding in json_record["fundingReferences"]:
504543
if not isinstance(funding, dict):
505544
errors.append("Each funding reference must be a dictionary.")
506-
if 'funderName' not in funding:
545+
if "funderName" not in funding:
507546
errors.append("Each funding reference must contain 'funderName'.")
508547

509548
# Return errors if any are found
510549
if errors:
511550
raise ValueError(f"Validation errors in metadata: {', '.join(errors)}")
512551

552+
513553
if __name__ == "__main__":
514554
# Read in from file for demo purposes
515555

0 commit comments

Comments
 (0)