Skip to content

Commit cf879fe

Browse files
committed
All functions complete and test framework in place
1 parent 59bf835 commit cf879fe

File tree

11 files changed

+327
-137
lines changed

11 files changed

+327
-137
lines changed

caltechdata_api/__init__.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
from .caltechdata_write import caltechdata_write
2-
from .caltechdata_write import send_s3
3-
from .caltechdata_edit import caltechdata_add
4-
from .caltechdata_edit import caltechdata_edit
5-
from .caltechdata_edit import caltechdata_unembargo
1+
from .caltechdata_edit import (caltechdata_add, caltechdata_edit,
2+
caltechdata_unembargo)
3+
from .caltechdata_write import caltechdata_write, send_s3
64
from .customize_schema import customize_schema
75
from .decustomize_schema import decustomize_schema
86
from .get_metadata import get_metadata

caltechdata_api/caltechdata_edit.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import copy
2+
import json
3+
14
from requests import session
2-
import json, copy
3-
from caltechdata_api import customize_schema
4-
from caltechdata_api import send_s3
5+
6+
from caltechdata_api import customize_schema, send_s3
57

68

79
def caltechdata_unembargo(token, ids, production=False):

caltechdata_api/caltechdata_write.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
import copy
2+
import json
3+
import os
4+
15
from requests import session
6+
27
from caltechdata_api import customize_schema
3-
import json, copy
4-
import os
58

69

710
def send_s3(filepath, token, production=False):

caltechdata_api/customize_schema.py

Lines changed: 122 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
1-
# Convert a DataCite 4 standard schema json record to the customized internal
1+
# Convert a DataCite 4 or 4.3 standard schema json record to the customized internal
22
# schema used by TIND in CaltechDATA
3-
import json
43
import argparse
4+
import json
55

66

7-
def customize_schema(json_record):
7+
def customize_schema(json_record, schema='4'):
88

9-
# Extract subjects to single string
10-
if "subjects" in json_record:
11-
subjects = json_record["subjects"]
12-
subs = []
13-
for s in subjects:
14-
subs.append(s["subject"])
15-
json_record["subjects"] = subs
9+
if schema == '4':
10+
return customize_schema_4(json_record)
11+
elif schema == '43':
12+
return customize_schema_43(json_record)
13+
else:
14+
raise ValueError(f'Error: schema {schema} not defined')
1615

16+
def customize_schema_4(json_record):
1717
# Extract identifier and label as DOI
1818
if "identifier" in json_record:
1919
identifier = json_record["identifier"]["identifier"]
@@ -22,31 +22,6 @@ def customize_schema(json_record):
2222
del json_record["identifier"]
2323
# will delete other ideintifiers in file
2424

25-
# Extract description
26-
if "descriptions" in json_record:
27-
for d in json_record["descriptions"]:
28-
d["descriptionValue"] = d["description"]
29-
del d["description"]
30-
31-
# Extract title
32-
if "titles" in json_record:
33-
titles = json_record["titles"]
34-
for t in titles:
35-
if "titleType" not in t:
36-
json_record["title"] = t["title"]
37-
del json_record["titles"]
38-
39-
# Language - only translating english
40-
if "language" in json_record:
41-
if json_record["language"] == "en":
42-
json_record["language"] = "eng"
43-
44-
# Change related identifier labels
45-
if "relatedIdentifiers" in json_record:
46-
for listing in json_record["relatedIdentifiers"]:
47-
listing["relatedIdentifierRelation"] = listing.pop("relationType")
48-
listing["relatedIdentifierScheme"] = listing.pop("relatedIdentifierType")
49-
5025
# change author formatting
5126
# We're dropping URIs
5227
if "creators" in json_record:
@@ -102,6 +77,117 @@ def customize_schema(json_record):
10277
newc.append(new)
10378
json_record["contributors"] = newc
10479

80+
81+
def customize_schema_43(json_record):
82+
# Extract identifiers and label as DOI or alternativeIdentifiers
83+
if "identifiers" in json_record:
84+
alt = []
85+
for identifier = json_record["identifiers"]:
86+
if identifier['identifierType'] == 'DOI':
87+
json_record["doi"] = identifier
88+
else:
89+
alt.append(identifier)
90+
if alt != []:
91+
json_record['alternativeIdentifiers'] = alt
92+
del json_record["identifier"]
93+
94+
# change author formatting
95+
# We're dropping URIs
96+
if "creators" in json_record:
97+
authors = json_record["creators"]
98+
newa = []
99+
for a in authors:
100+
new = {}
101+
if "affiliations" in a:
102+
affiliation = []
103+
for aff in a["affiliations"]:
104+
name = {}
105+
name['affiliation'] = a["name"]
106+
if 'ROR' in a:
107+
name['ROR'] = a['ROR']
108+
new["authorAffiliation"] = affiliation
109+
new["authorName"] = a["name"]
110+
if "nameIdentifiers" in a:
111+
idn = []
112+
for n in a["nameIdentifiers"]:
113+
idn.append(
114+
{
115+
"authorIdentifier": n["nameIdentifier"],
116+
"authorIdentifierScheme": n["nameIdentifierScheme"],
117+
}
118+
)
119+
new["authorIdentifiers"] = idn
120+
newa.append(new)
121+
json_record["authors"] = newa
122+
del json_record["creators"]
123+
124+
# strip creator URI
125+
if "contributors" in json_record:
126+
newc = []
127+
for c in json_record["contributors"]:
128+
new = {}
129+
if "nameIdentifiers" in c:
130+
idn = []
131+
for n in c["nameIdentifiers"]:
132+
idn.append(
133+
{
134+
"contributorIdentifier": n["nameIdentifier"],
135+
"contributorIdentifierScheme": n["nameIdentifierScheme"],
136+
}
137+
)
138+
new["contributorIdentifiers"] = idn
139+
if "affiliations" in a:
140+
affiliation = []
141+
for aff in a["affiliations"]:
142+
name = {}
143+
name['affiliation'] = a["name"]
144+
if 'ROR' in a:
145+
name['ROR'] = a['ROR']
146+
new["contributorAffiliation"] = affiliation
147+
new["contributorName"] = c["name"]
148+
if "contributorType" in c:
149+
new["contributorType"] = c["contributorType"]
150+
if "contributorEmail" in c:
151+
new["contributorEmail"] = c["contributorEmail"]
152+
newc.append(new)
153+
json_record["contributors"] = newc
154+
155+
156+
def customize_standard(json_record)
157+
158+
# Extract subjects to single string
159+
if "subjects" in json_record:
160+
subjects = json_record["subjects"]
161+
subs = []
162+
for s in subjects:
163+
subs.append(s["subject"])
164+
json_record["subjects"] = subs
165+
166+
# Extract description
167+
if "descriptions" in json_record:
168+
for d in json_record["descriptions"]:
169+
d["descriptionValue"] = d["description"]
170+
del d["description"]
171+
172+
# Extract title
173+
if "titles" in json_record:
174+
titles = json_record["titles"]
175+
for t in titles:
176+
if "titleType" not in t:
177+
json_record["title"] = t["title"]
178+
del json_record["titles"]
179+
180+
# Language - only translating english
181+
if "language" in json_record:
182+
if json_record["language"] == "en":
183+
json_record["language"] = "eng"
184+
185+
# Change related identifier labels
186+
if "relatedIdentifiers" in json_record:
187+
for listing in json_record["relatedIdentifiers"]:
188+
listing["relatedIdentifierRelation"] = listing.pop("relationType")
189+
listing["relatedIdentifierScheme"] = listing.pop("relatedIdentifierType")
190+
105191
# format
106192
if "formats" in json_record:
107193
json_record["format"] = json_record.pop("formats")
@@ -177,7 +263,7 @@ def customize_schema(json_record):
177263
# Read in from file for demo purposes
178264

179265
parser = argparse.ArgumentParser(
180-
description="customize_schema converts a DataCite 4 standard json record\
266+
description="customize_schema converts a DataCite 4 or 4.3 standard json record\
181267
to TIND customized internal schema in CaltechDATA"
182268
)
183269
parser.add_argument("json_files", nargs="+", help="json file name")

0 commit comments

Comments
 (0)