Skip to content

Commit 08d9201

Browse files
Add tests and improve CLI
This commit adds tests for both the validator and the cli. It fixes a bug in the cli and provides a better error message for incorrect tokens. Co-authored-by: RohanBhattaraiNP <[email protected]>
1 parent faa360a commit 08d9201

21 files changed

+1853
-12
lines changed

CITATION.cff

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ authors:
1111
abstract: Python wrapper for CaltechDATA API.
1212
repository-code: "https://github.com/caltechlibrary/caltechdata_api"
1313
type: software
14+
doi: 10.22002/wfjr5-kw507
1415
version: 1.8.2
1516
license-url: "https://data.caltech.edu/license"
1617
keywords:
1718
- GitHub
1819
- metadata
1920
- software
2021
- InvenioRDM
21-
date-released: 2024-11-06
22+
date-released: 2024-11-08

caltechdata_api/caltechdata_write.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,10 @@ def caltechdata_write(
248248
# Make draft and publish
249249
result = requests.post(url + "/api/records", headers=headers, json=data)
250250
if result.status_code != 201:
251-
raise Exception(result.text)
251+
if result.status_code == 400 and "Referer checking failed" in result.text:
252+
raise Exception("Token is incorrect or missing referer.")
253+
else:
254+
raise Exception(result.text)
252255
idv = result.json()["id"]
253256
publish_link = result.json()["links"]["publish"]
254257

caltechdata_api/cli.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import requests
33
import s3fs
44
from caltechdata_api import caltechdata_write, caltechdata_edit
5-
from .md_to_json import parse_readme_to_json
5+
from md_to_json import parse_readme_to_json
66
import json
77
import os
88
from cryptography.fernet import Fernet
@@ -469,6 +469,7 @@ def create_record(production):
469469
"descriptions": [
470470
{"description": args["description"], "descriptionType": "Abstract"}
471471
],
472+
"publisher": "CaltechDATA",
472473
"creators": [
473474
{
474475
"affiliation": [

caltechdata_api/customize_schema.py

Lines changed: 126 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -393,20 +393,12 @@ def validate_metadata(json_record):
393393
"""
394394
errors = []
395395

396-
# Check for 'types' and 'resourceTypeGeneral'
397-
if "types" not in json_record:
398-
errors.append("'types' field is missing.")
399-
elif not isinstance(json_record["types"], dict):
400-
errors.append("'types' field should be a dictionary.")
401-
elif "resourceTypeGeneral" not in json_record["types"]:
402-
errors.append("'resourceTypeGeneral' field is missing in 'types'.")
403-
404-
# Check for 'title'
405396
if "titles" not in json_record:
406397
errors.append("'titles' field is missing.")
407398
elif not isinstance(json_record["titles"], list) or len(json_record["titles"]) == 0:
408399
errors.append("'titles' should be a non-empty list.")
409400
else:
401+
410402
# Ensure each title is a dictionary with 'title' field
411403
for title in json_record["titles"]:
412404
if not isinstance(title, dict) or "title" not in title:
@@ -480,6 +472,7 @@ def validate_metadata(json_record):
480472
errors.append("'relatedIdentifiers' should be a list.")
481473
else:
482474
for related_id in json_record["relatedIdentifiers"]:
475+
483476
if (
484477
not isinstance(related_id, dict)
485478
or "relatedIdentifier" not in related_id
@@ -493,6 +486,76 @@ def validate_metadata(json_record):
493486
if not isinstance(json_record["rightsList"], list):
494487
errors.append("'rightsList' should be a list.")
495488
else:
489+
490+
for right in json_record["rightsList"]:
491+
if not isinstance(right, dict) or "rights" not in right:
492+
errors.append("Each 'rightsList' entry must have 'rights'.")
493+
if "rightsURI" in right and not isinstance(right["rightsURI"], str):
494+
errors.append("'rightsURI' should be a string.")
495+
496+
# Check for 'subjects'
497+
if "subjects" in json_record:
498+
if not isinstance(json_record["subjects"], list):
499+
errors.append("'subjects' should be a list.")
500+
else:
501+
for subject in json_record["subjects"]:
502+
if not isinstance(subject, dict) or "subject" not in subject:
503+
errors.append("Each 'subject' must have a 'subject' key.")
504+
505+
# Check for 'dates'
506+
if "dates" not in json_record:
507+
errors.append("'dates' field is missing.")
508+
elif not isinstance(json_record["dates"], list) or len(json_record["dates"]) == 0:
509+
errors.append("'dates' should be a non-empty list.")
510+
else:
511+
for date in json_record["dates"]:
512+
if (
513+
not isinstance(date, dict)
514+
or "date" not in date
515+
or "dateType" not in date
516+
):
517+
errors.append("Each 'date' must have 'date' and 'dateType'.")
518+
519+
# Check for 'identifiers'
520+
if "identifiers" not in json_record:
521+
errors.append("'identifiers' field is missing.")
522+
elif (
523+
not isinstance(json_record["identifiers"], list)
524+
or len(json_record["identifiers"]) == 0
525+
):
526+
errors.append("'identifiers' should be a non-empty list.")
527+
else:
528+
for identifier in json_record["identifiers"]:
529+
if (
530+
not isinstance(identifier, dict)
531+
or "identifier" not in identifier
532+
or "identifierType" not in identifier
533+
):
534+
errors.append(
535+
"Each 'identifier' must have 'identifier' and 'identifierType'."
536+
)
537+
538+
# Check for 'creators'
539+
if "creators" not in json_record:
540+
errors.append("'creators' field is missing.")
541+
elif (
542+
not isinstance(json_record["creators"], list)
543+
or len(json_record["creators"]) == 0
544+
):
545+
errors.append("'creators' should be a non-empty list.")
546+
else:
547+
for creator in json_record["creators"]:
548+
if not isinstance(creator, dict) or "name" not in creator:
549+
errors.append("Each 'creator' must have 'name'.")
550+
if "affiliation" in creator:
551+
if not isinstance(creator["affiliation"], list):
552+
errors.append("'affiliation' in 'creators' should be a list.")
553+
for affiliation in creator["affiliation"]:
554+
if not isinstance(affiliation, dict) or "name" not in affiliation:
555+
errors.append(
556+
"Each 'affiliation' in 'creators' must have a 'name'."
557+
)
558+
496559
for rights in json_record["rightsList"]:
497560
if not isinstance(rights, dict) or "rights" not in rights:
498561
errors.append(
@@ -504,6 +567,60 @@ def validate_metadata(json_record):
504567
if not isinstance(json_record["geoLocations"], list):
505568
errors.append("'geoLocations' should be a list.")
506569
else:
570+
571+
for geo_loc in json_record["geoLocations"]:
572+
if not isinstance(geo_loc, dict) or "geoLocationPlace" not in geo_loc:
573+
errors.append("Each 'geoLocation' must have 'geoLocationPlace'.")
574+
if "geoLocationPoint" in geo_loc:
575+
point = geo_loc["geoLocationPoint"]
576+
if (
577+
not isinstance(point, dict)
578+
or "pointLatitude" not in point
579+
or "pointLongitude" not in point
580+
):
581+
errors.append(
582+
"'geoLocationPoint' must have 'pointLatitude' and 'pointLongitude'."
583+
)
584+
585+
# Check for 'formats'
586+
if "formats" in json_record and (
587+
not isinstance(json_record["formats"], list) or len(json_record["formats"]) == 0
588+
):
589+
errors.append("'formats' should be a non-empty list.")
590+
591+
# Check for 'language'
592+
if "language" in json_record:
593+
if not isinstance(json_record["language"], str):
594+
errors.append("'language' should be a string.")
595+
596+
# Check for 'version'
597+
if "version" in json_record and not isinstance(json_record["version"], str):
598+
errors.append("'version' should be a string.")
599+
600+
# Check for 'publisher'
601+
if "publisher" not in json_record:
602+
errors.append("'publisher' field is missing.")
603+
elif not isinstance(json_record["publisher"], str):
604+
errors.append("'publisher' should be a string.")
605+
606+
# Check for 'publicationYear'
607+
if "publicationYear" not in json_record:
608+
errors.append("'publicationYear' field is missing.")
609+
elif not isinstance(json_record["publicationYear"], str):
610+
errors.append("'publicationYear' should be a string.")
611+
612+
# Check for 'types'
613+
if "types" not in json_record:
614+
errors.append("'types' field is missing.")
615+
elif not isinstance(json_record["types"], dict):
616+
errors.append("'types' should be a dictionary.")
617+
else:
618+
if "resourceTypeGeneral" not in json_record["types"]:
619+
errors.append("'types' must have 'resourceTypeGeneral'.")
620+
if "resourceType" in json_record["types"] and not isinstance(
621+
json_record["types"]["resourceType"], str
622+
):
623+
errors.append("'resourceType' should be a string if provided.")
507624
for location in json_record["geoLocations"]:
508625
if not isinstance(location, dict):
509626
errors.append("Each entry in 'geoLocations' must be a dictionary.")

0 commit comments

Comments
 (0)