1- # Convert a DataCite 4 standard schema json record to the customized internal
1+ # Convert a DataCite 4 or 4.3 standard schema json record to the customized internal
22# schema used by TIND in CaltechDATA
3- import json
43import argparse
4+ import json
55
66
7- def customize_schema (json_record ):
7+ def customize_schema (json_record , schema = '4' ):
88
9- # Extract subjects to single string
10- if "subjects" in json_record :
11- subjects = json_record ["subjects" ]
12- subs = []
13- for s in subjects :
14- subs .append (s ["subject" ])
15- json_record ["subjects" ] = subs
9+ if schema == '4' :
10+ return customize_schema_4 (json_record )
11+ elif schema == '43' :
12+ return customize_schema_43 (json_record )
13+ else :
14+ raise ValueError (f'Error: schema { schema } not defined' )
1615
16+ def customize_schema_4 (json_record ):
1717 # Extract identifier and label as DOI
1818 if "identifier" in json_record :
1919 identifier = json_record ["identifier" ]["identifier" ]
@@ -22,31 +22,6 @@ def customize_schema(json_record):
2222 del json_record ["identifier" ]
2323 # will delete other ideintifiers in file
2424
25- # Extract description
26- if "descriptions" in json_record :
27- for d in json_record ["descriptions" ]:
28- d ["descriptionValue" ] = d ["description" ]
29- del d ["description" ]
30-
31- # Extract title
32- if "titles" in json_record :
33- titles = json_record ["titles" ]
34- for t in titles :
35- if "titleType" not in t :
36- json_record ["title" ] = t ["title" ]
37- del json_record ["titles" ]
38-
39- # Language - only translating english
40- if "language" in json_record :
41- if json_record ["language" ] == "en" :
42- json_record ["language" ] = "eng"
43-
44- # Change related identifier labels
45- if "relatedIdentifiers" in json_record :
46- for listing in json_record ["relatedIdentifiers" ]:
47- listing ["relatedIdentifierRelation" ] = listing .pop ("relationType" )
48- listing ["relatedIdentifierScheme" ] = listing .pop ("relatedIdentifierType" )
49-
5025 # change author formatting
5126 # We're dropping URIs
5227 if "creators" in json_record :
@@ -102,6 +77,117 @@ def customize_schema(json_record):
10277 newc .append (new )
10378 json_record ["contributors" ] = newc
10479
80+
81+ def customize_schema_43 (json_record ):
82+ # Extract identifiers and label as DOI or alternativeIdentifiers
83+ if "identifiers" in json_record :
84+ alt = []
85+ for identifier = json_record ["identifiers" ]:
86+ if identifier ['identifierType' ] == 'DOI' :
87+ json_record ["doi" ] = identifier
88+ else :
89+ alt .append (identifier )
90+ if alt != []:
91+ json_record ['alternativeIdentifiers' ] = alt
92+ del json_record ["identifier" ]
93+
94+ # change author formatting
95+ # We're dropping URIs
96+ if "creators" in json_record :
97+ authors = json_record ["creators" ]
98+ newa = []
99+ for a in authors :
100+ new = {}
101+ if "affiliations" in a :
102+ affiliation = []
103+ for aff in a ["affiliations" ]:
104+ name = {}
105+ name ['affiliation' ] = a ["name" ]
106+ if 'ROR' in a :
107+ name ['ROR' ] = a ['ROR' ]
108+ new ["authorAffiliation" ] = affiliation
109+ new ["authorName" ] = a ["name" ]
110+ if "nameIdentifiers" in a :
111+ idn = []
112+ for n in a ["nameIdentifiers" ]:
113+ idn .append (
114+ {
115+ "authorIdentifier" : n ["nameIdentifier" ],
116+ "authorIdentifierScheme" : n ["nameIdentifierScheme" ],
117+ }
118+ )
119+ new ["authorIdentifiers" ] = idn
120+ newa .append (new )
121+ json_record ["authors" ] = newa
122+ del json_record ["creators" ]
123+
124+ # strip creator URI
125+ if "contributors" in json_record :
126+ newc = []
127+ for c in json_record ["contributors" ]:
128+ new = {}
129+ if "nameIdentifiers" in c :
130+ idn = []
131+ for n in c ["nameIdentifiers" ]:
132+ idn .append (
133+ {
134+ "contributorIdentifier" : n ["nameIdentifier" ],
135+ "contributorIdentifierScheme" : n ["nameIdentifierScheme" ],
136+ }
137+ )
138+ new ["contributorIdentifiers" ] = idn
139+ if "affiliations" in a :
140+ affiliation = []
141+ for aff in a ["affiliations" ]:
142+ name = {}
143+ name ['affiliation' ] = a ["name" ]
144+ if 'ROR' in a :
145+ name ['ROR' ] = a ['ROR' ]
146+ new ["contributorAffiliation" ] = affiliation
147+ new ["contributorName" ] = c ["name" ]
148+ if "contributorType" in c :
149+ new ["contributorType" ] = c ["contributorType" ]
150+ if "contributorEmail" in c :
151+ new ["contributorEmail" ] = c ["contributorEmail" ]
152+ newc .append (new )
153+ json_record ["contributors" ] = newc
154+
155+
156+ def customize_standard (json_record )
157+
158+ # Extract subjects to single string
159+ if "subjects" in json_record :
160+ subjects = json_record ["subjects" ]
161+ subs = []
162+ for s in subjects :
163+ subs .append (s ["subject" ])
164+ json_record ["subjects" ] = subs
165+
166+ # Extract description
167+ if "descriptions" in json_record :
168+ for d in json_record ["descriptions" ]:
169+ d ["descriptionValue" ] = d ["description" ]
170+ del d ["description" ]
171+
172+ # Extract title
173+ if "titles" in json_record :
174+ titles = json_record ["titles" ]
175+ for t in titles :
176+ if "titleType" not in t :
177+ json_record ["title" ] = t ["title" ]
178+ del json_record ["titles" ]
179+
180+ # Language - only translating english
181+ if "language" in json_record :
182+ if json_record ["language" ] == "en" :
183+ json_record ["language" ] = "eng"
184+
185+ # Change related identifier labels
186+ if "relatedIdentifiers" in json_record :
187+ for listing in json_record ["relatedIdentifiers" ]:
188+ listing ["relatedIdentifierRelation" ] = listing .pop ("relationType" )
189+ listing ["relatedIdentifierScheme" ] = listing .pop ("relatedIdentifierType" )
190+
105191 # format
106192 if "formats" in json_record :
107193 json_record ["format" ] = json_record .pop ("formats" )
@@ -177,7 +263,7 @@ def customize_schema(json_record):
177263 # Read in from file for demo purposes
178264
179265 parser = argparse .ArgumentParser (
180- description = "customize_schema converts a DataCite 4 standard json record\
266+ description = "customize_schema converts a DataCite 4 or 4.3 standard json record\
181267 to TIND customized internal schema in CaltechDATA"
182268 )
183269 parser .add_argument ("json_files" , nargs = "+" , help = "json file name" )
0 commit comments