@@ -17,6 +17,12 @@ def decustomize_schema(
1717 raise ValueError (f'Error: schema { schema } not defined' )
1818
1919def decustomize_standard (json_record , pass_emails , pass_media , pass_owner ):
20+
21+ #If passed a direct API json blob, extract metadata section
22+ if 'metadata' in json_record :
23+ json_record = json_record ["metadata" ]
24+
25+
2026 # Extract subjects to single string
2127 if "subjects" in json_record :
2228 if isinstance (json_record ["subjects" ], str ):
@@ -166,6 +172,11 @@ def decustomize_standard(json_record, pass_emails, pass_media, pass_owner):
166172 if "descriptionValue" in d :
167173 d ["description" ] = d .pop ("descriptionValue" )
168174
175+ #change rightsList into array
176+ print (json_record )
177+ if "rightsList" in json_record :
178+ json_record ["rightsList" ] = [json_record ["rightsList" ]]
179+
169180 # Handle file info
170181 if pass_media == False :
171182 if "electronic_location_and_access" in json_record :
@@ -174,7 +185,6 @@ def decustomize_standard(json_record, pass_emails, pass_media, pass_owner):
174185 others = [
175186 "files" ,
176187 "id" ,
177- "pid_value" ,
178188 "control_number" ,
179189 "_oai" ,
180190 "_form_uuid" ,
@@ -210,6 +220,14 @@ def decustomize_schema_43(json_record, pass_emails, pass_media, pass_owner):
210220 })
211221 del json_record ["doi" ]
212222
223+ #Extract resourceType into types
224+ if 'resourceType' in json_record :
225+ json_record ['types' ] = json_record ['resourceType' ]
226+ if 'resourceType' not in json_record ['types' ]:
227+ json_record ['types' ]['resourceType' ] = \
228+ json_record ['resourceType' ]['resourceTypeGeneral' ]
229+ del json_record ["resourceType" ]
230+
213231 # Save CaltechDATA ID in all records
214232 identifiers .append ({
215233 "alternateIdentifier" : json_record ["pid_value" ],
@@ -223,6 +241,7 @@ def decustomize_schema_43(json_record, pass_emails, pass_media, pass_owner):
223241 "identifierType" : altid ["alternateIdentifierType" ],
224242 })
225243 del json_record ["alternateIdentifiers" ]
244+ del json_record ["pid_value" ]
226245 json_record ['identifiers' ] = identifiers
227246
228247 # change author formatting
@@ -236,11 +255,11 @@ def decustomize_schema_43(json_record, pass_emails, pass_media, pass_owner):
236255 a ["authorAffiliation" ] = [a ["authorAffiliation" ]]
237256 affiliation = []
238257 for aff in a ["authorAffiliation" ]:
239- name = {}
240- name [ 'name' ] = a [ " affiliation" ]
241- if 'ROR' in a :
242- name [ 'ROR' ] = a [ 'ROR' ]
243- new ["affiliation" ] = affiliation
258+ if isinstance ( aff , dict ):
259+ affiliation . append ( aff )
260+ else :
261+ affiliation . append ({ ' name' : aff })
262+ new ["affiliation" ] = affiliation
244263 if "authorIdentifiers" in a :
245264 idv = []
246265 if isinstance (a ["authorIdentifiers" ], list ):
@@ -268,12 +287,12 @@ def decustomize_schema_43(json_record, pass_emails, pass_media, pass_owner):
268287 if isinstance (c ["contributorAffiliation" ], list ) == False :
269288 c ["contributorAffiliation" ] = [c ["contributorAffiliation" ]]
270289 affiliation = []
271- for aff in a ["contributorAffiliation" ]:
272- name = {}
273- name [ 'name' ] = a [ " affiliation" ]
274- if 'ROR' in a :
275- name [ 'ROR' ] = a [ 'ROR' ]
276- new ['affiliation' ] = affiliation
290+ for aff in c ["contributorAffiliation" ]:
291+ if isinstance ( aff , dict ):
292+ affiliation . append ( aff )
293+ else :
294+ affiliation . append ({ ' name' : aff })
295+ new ['affiliation' ] = affiliation
277296 if "contributorIdentifiers" in c :
278297 if isinstance (c ["contributorIdentifiers" ], list ):
279298 newa = []
@@ -289,7 +308,7 @@ def decustomize_schema_43(json_record, pass_emails, pass_media, pass_owner):
289308 else :
290309 print ("Contributor identifier not an array - please check" , doi )
291310 del c ["contributorIdentifiers" ]
292- new ["name" ] = c ["creatorName " ]
311+ new ["name" ] = c ["contributorName " ]
293312 if pass_emails == True :
294313 if "contributorEmail" in c :
295314 new ["contributorEmail" ] = c ["contributorEmail" ]
@@ -382,6 +401,7 @@ def decustomize_schema_4(json_record, pass_emails, pass_media, pass_owner):
382401 json_record ["alternateIdentifiers" ].append (idv )
383402 else :
384403 json_record ["alternateIdentifiers" ] = [idv ]
404+ del json_record ["pid_value" ]
385405
386406 return json_record
387407
0 commit comments