Skip to content

Commit 5d01f3a

Browse files
committed
Round trip metadata for TCCON
1 parent 5db975b commit 5d01f3a

File tree

3 files changed

+54
-44
lines changed

3 files changed

+54
-44
lines changed

caltechdata_api/customize_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def customize_schema(json_record):
4747
listing['relatedIdentifierScheme'] = listing.pop('relatedIdentifierType')
4848

4949
#change author formatting
50-
#We're only supporting ORCIDS, and losing all URIs
50+
#We're dropping URIs
5151
if "creators" in json_record:
5252
authors = json_record['creators']
5353
newa = []

caltechdata_api/decustomize_schema.py

Lines changed: 52 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def decustomize_schema(json_record):
5656
for a in authors:
5757
new = {}
5858
if 'authorAffiliation' in a:
59-
new['affiliations'] = [a['authorAffiliation']]
59+
new['affiliations'] = a['authorAffiliation']
6060
if 'authorIdentifiers' in a:
6161
idv = []
6262
for cid in a['authorIdentifiers']:
@@ -77,25 +77,25 @@ def decustomize_schema(json_record):
7777
if "contributors" in json_record:
7878
for c in json_record['contributors']:
7979
if 'contributorAffiliation' in c:
80-
c['affiliations'] = [c.pop('contributorAffiliation')]
80+
c['affiliations'] = c.pop('contributorAffiliation')
8181
if 'contributorIdentifiers' in c:
82-
if isinstance(c['contributorIdentifiers'],list):
83-
newa = []
84-
for cid in c['contributorIdentifiers']:
85-
new = {}
86-
new['nameIdentifier'] =\
82+
#if isinstance(c['contributorIdentifiers'],list):
83+
newa = []
84+
for cid in c['contributorIdentifiers']:
85+
new = {}
86+
new['nameIdentifier'] =\
8787
cid.pop('contributorIdentifier')
88-
new['nameIdentifierScheme'] =\
88+
new['nameIdentifierScheme'] =\
8989
cid.pop('contributorIdentifierScheme')
90-
newa.append(new)
91-
c['nameIdentifiers']=newa
92-
del c['contributorIdentifiers']
93-
else:
94-
c['contributorIdentifiers']['nameIdentifier'] =\
95-
c['contributorIdentifiers'].pop('contributorIdentifier')
96-
c['contributorIdentifiers']['nameIdentifierScheme'] =\
97-
c['contributorIdentifiers'].pop('contributorIdentifierScheme')
98-
c['nameIdentifiers'] = [c.pop('contributorIdentifiers')]
90+
newa.append(new)
91+
c['nameIdentifiers']=newa
92+
del c['contributorIdentifiers']
93+
#else:
94+
# c['contributorIdentifiers']['nameIdentifier'] =\
95+
# c['contributorIdentifiers'].pop('contributorIdentifier')
96+
# c['contributorIdentifiers']['nameIdentifierScheme'] =\
97+
# c['contributorIdentifiers'].pop('contributorIdentifierScheme')
98+
# c['nameIdentifiers'] = [c.pop('contributorIdentifiers')]
9999
if 'contributorEmail' in c:
100100
del c['contributorEmail']
101101
#format
@@ -131,38 +131,48 @@ def decustomize_schema(json_record):
131131
del json_record['publicationDate']
132132

133133
#license - no url available
134-
if 'license' in json_record:
135-
json_record['rightsList']=[{"rights":json_record.pop('license')}]
134+
if 'rightsList' not in json_record:
135+
if 'license' in json_record:
136+
json_record['rightsList']=[{"rights":json_record.pop('license')}]
136137

137138
#Funding
138139
if 'fundings' in json_record:
139-
funding = json_record['fundings']
140-
newf = []
141-
for f in funding:
142-
frec = {}
143-
if 'fundingName' in f:
144-
frec['funderName'] = f['fundingName']
145-
#f['fundingName']=f.pop('funderName')
146-
if 'fundingAwardNumber' in f:
147-
frec['awardNumber']={'awardNumber':f['fundingAwardNumber']}
148-
newf.append(frec)
149-
json_record['fundingReferences']=newf
140+
#funding = json_record['fundings']
141+
#newf = []
142+
#for f in funding:
143+
# frec = {}
144+
# if 'fundingName' in f:
145+
# frec['funderName'] = f['fundingName']
146+
# #f['fundingName']=f.pop('funderName')
147+
# if 'fundingAwardNumber' in f:
148+
# frec['awardNumber']={'awardNumber':f['fundingAwardNumber']}
149+
# newf.append(frec)
150+
#json_record['fundingReferences']=newf
150151
del json_record['fundings']
151152

152153
#Geo
153154
if 'geographicCoverage' in json_record:
154155
geo = json_record['geographicCoverage']
155-
newgeo = {}
156-
if 'geoLocationPlace' in geo:
157-
newgeo['geoLocationPlace'] = geo['geoLocationPlace']
158-
if 'geoLocationPoint' in geo:
159-
pt = geo['geoLocationPoint'][0]
160-
newpt = {}
161-
newpt['pointLatitude'] = float(pt['pointLatitude'])
162-
newpt['pointLongitude'] = float(pt['pointLongitude'])
163-
newgeo['geoLocationPoint'] = newpt
164-
json_record['geoLocations'] = [newgeo]
165-
del json_record['geographicCoverage']
156+
if isinstance(geo,list):
157+
#We have the correct formatting
158+
for g in geo:
159+
if 'geoLocationPoint' in g:
160+
pt = g['geoLocationPoint']
161+
pt['pointLatitude'] = float(pt['pointLatitude'])
162+
pt['pointLongitude'] = float(pt['pointLongitude'])
163+
json_record['geoLocations']=json_record.pop('geographicCoverage')
164+
else:
165+
newgeo = {}
166+
if 'geoLocationPlace' in geo:
167+
newgeo['geoLocationPlace'] = geo['geoLocationPlace']
168+
if 'geoLocationPoint' in geo:
169+
pt = geo['geoLocationPoint'][0]
170+
newpt = {}
171+
newpt['pointLatitude'] = float(pt['pointLatitude'])
172+
newpt['pointLongitude'] = float(pt['pointLongitude'])
173+
newgeo['geoLocationPoint'] = newpt
174+
json_record['geoLocations'] = [newgeo]
175+
del json_record['geographicCoverage']
166176

167177
#Publisher
168178
if "publishers" in json_record:
@@ -180,7 +190,7 @@ def decustomize_schema(json_record):
180190

181191
others = ['files', 'id', 'owners', 'pid_value', 'control_number', '_oai',
182192
'_form_uuid', 'electronic_location_and_access', 'access_right',
183-
'embargo_date']
193+
'embargo_date','license']
184194
for v in others:
185195
if v in json_record:
186196
del json_record[v]

caltechdata_api/get_metadata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def get_metadata(idv,production=True):
1313
r = requests.get(api_url+str(idv))
1414
metadata = r.json()['metadata']
1515
metadata = decustomize_schema(metadata)
16-
16+
1717
try:
1818
assert schema40.validate(metadata)
1919
except AssertionError:

0 commit comments

Comments
 (0)