22import json
33import argparse
44
5- def decustomize_schema (json_record ):
5+ def decustomize_schema (json_record , pass_emails = False ):
66
77 #Extract subjects to single string
88 if "subjects" in json_record :
@@ -62,14 +62,17 @@ def decustomize_schema(json_record):
6262 new ['affiliations' ] = [a ['authorAffiliation' ]]
6363 if 'authorIdentifiers' in a :
6464 idv = []
65- for cid in a ['authorIdentifiers' ]:
66- nid = {}
67- nid ['nameIdentifier' ] = \
68- cid .pop ('authorIdentifier' )
69- nid ['nameIdentifierScheme' ] = \
70- cid .pop ('authorIdentifierScheme' )
71- idv .append (nid )
72- new ['nameIdentifiers' ]= idv
65+ if isinstance (a ['authorIdentifiers' ],list ):
66+ for cid in a ['authorIdentifiers' ]:
67+ nid = {}
68+ nid ['nameIdentifier' ] = \
69+ cid .pop ('authorIdentifier' )
70+ nid ['nameIdentifierScheme' ] = \
71+ cid .pop ('authorIdentifierScheme' )
72+ idv .append (nid )
73+ new ['nameIdentifiers' ]= idv
74+ else :
75+ print ("Author identifiers not an array - please check" )
7376 del a ['authorIdentifiers' ]
7477 new ['creatorName' ] = a ['authorName' ]
7578 newa .append (new )
@@ -85,26 +88,23 @@ def decustomize_schema(json_record):
8588 else :
8689 c ['affiliations' ] = [c .pop ('contributorAffiliation' )]
8790 if 'contributorIdentifiers' in c :
88- # if isinstance(c['contributorIdentifiers'],list):
89- newa = []
90- for cid in c ['contributorIdentifiers' ]:
91- new = {}
92- new ['nameIdentifier' ] = \
91+ if isinstance (c ['contributorIdentifiers' ],list ):
92+ newa = []
93+ for cid in c ['contributorIdentifiers' ]:
94+ new = {}
95+ new ['nameIdentifier' ] = \
9396 cid .pop ('contributorIdentifier' )
94- if 'contributorIdentifierScheme' in cid :
95- new ['nameIdentifierScheme' ] = \
97+ if 'contributorIdentifierScheme' in cid :
98+ new ['nameIdentifierScheme' ] = \
9699 cid .pop ('contributorIdentifierScheme' )
97- newa .append (new )
98- c ['nameIdentifiers' ]= newa
100+ newa .append (new )
101+ c ['nameIdentifiers' ]= newa
102+ else :
103+ print ("Contributor identifier not an array - please check" )
99104 del c ['contributorIdentifiers' ]
100- #else:
101- # c['contributorIdentifiers']['nameIdentifier'] =\
102- # c['contributorIdentifiers'].pop('contributorIdentifier')
103- # c['contributorIdentifiers']['nameIdentifierScheme'] =\
104- # c['contributorIdentifiers'].pop('contributorIdentifierScheme')
105- # c['nameIdentifiers'] = [c.pop('contributorIdentifiers')]
106- if 'contributorEmail' in c :
107- del c ['contributorEmail' ]
105+ if pass_emails == False :
106+ if 'contributorEmail' in c :
107+ del c ['contributorEmail' ]
108108 #format
109109 if "format" in json_record :
110110 if isinstance (json_record ['format' ],list ):
@@ -124,21 +124,26 @@ def decustomize_schema(json_record):
124124 json_record ['dates' ]= json_record .pop ('relevantDates' )
125125
126126 #set publicationYear
127- year = json_record ['publicationDate' ].split ('-' )[0 ]
128- json_record ['publicationYear' ] = year
129- #If "Submitted' date type was not manually set in metadata
130- #Or 'Issued was not manually set
131- #We want to save the entire publicationDate
132- if 'Submitted' in datetypes or 'Issued' in datetypes :
133- print ("Custom Dates Present-Dropping TIND Publication Date" )
134- else :
135- if 'dates' in json_record :
136- json_record ['dates' ].append ({"date" :json_record ['publicationDate' ],\
137- "dateType" : "Submitted" })
127+ if 'publicationDate' in json_record :
128+ year = json_record ['publicationDate' ].split ('-' )[0 ]
129+ json_record ['publicationYear' ] = year
130+
131+ #If "Submitted' date type was not manually set in metadata
132+ #Or 'Issued was not manually set
133+ #We want to save the entire publicationDate
134+ if 'Submitted' in datetypes or 'Issued' in datetypes :
135+ print ("Custom Dates Present-Dropping TIND Publication Date" )
138136 else :
139- json_record ['dates' ]= [{"date" :json_record ['publicationDate' ],\
137+ if 'dates' in json_record :
138+ json_record ['dates' ].append ({"date" :json_record ['publicationDate' ],\
139+ "dateType" : "Submitted" })
140+ else :
141+ json_record ['dates' ]= [{"date" :json_record ['publicationDate' ],\
140142 "dateType" : "Submitted" }]
141- del json_record ['publicationDate' ]
143+ del json_record ['publicationDate' ]
144+
145+ else :
146+ print ("No publication date set - something is odd with the record" )
142147
143148 #license - no url available
144149 if 'rightsList' not in json_record :
@@ -152,7 +157,7 @@ def decustomize_schema(json_record):
152157 if 'fundings' in json_record :
153158 #Metadata changes and all should all be DataCite standard
154159 #Clean out any residual issues
155- print ("Check funding information" )
160+ print ("Legacy funding information (fundings) not transferred " )
156161 del json_record ['fundings' ]
157162
158163 #Geo
0 commit comments