Skip to content

Commit 99b79f6

Browse files
authored
some improvements on schema parsing (#304)
1 parent 3ab19ca commit 99b79f6

File tree

1 file changed

+68
-37
lines changed

1 file changed

+68
-37
lines changed

pygeometa/schemas/schema_org/__init__.py

Lines changed: 68 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -125,46 +125,66 @@ def import_(self, metadata: str) -> dict:
125125
id_ = md.get('identifier', md.get('@id'))
126126
mcf['metadata']['identifier'] = id_
127127
mcf['metadata']['charset'] = 'utf-8'
128-
mcf['metadata']['type'] = TYPES[md.get('type', 'Dataset')]
128+
mcf['metadata']['type'] = TYPES.get(
129+
md.get('type', 'Dataset'), 'dataset')
129130
mcf['metadata']['language'] = md.get('inLanguage', 'en')
130131

131132
if 'spatialCoverage' in md or 'spatial' in md:
132-
sc = _get_list_or_dict(md['spatialCoverage'])
133-
crs = 4326
134-
135-
geo = _get_list_or_dict(sc['geo'])
136-
137-
if geo['@type'] == 'GeoCoordinates':
138-
mcf['spatial']['datatype'] = 'vector'
139-
mcf['spatial']['geomtype'] = 'point'
140-
bbox = [geo['longitude'], geo['latitude'],
141-
geo['longitude'], geo['latitude']]
142-
elif geo['@type'] == 'GeoShape':
143-
mcf['spatial']['datatype'] = 'vector'
144-
mcf['spatial']['geomtype'] = 'polygon'
145-
bt = geo['box'].split()
146-
bbox = bt[1], bt[0], bt[3], bt[2]
133+
if 'spatialCoverage' in md:
134+
sc = _get_list_or_dict(md['spatialCoverage'])
147135
else:
148-
bbox = [-180, -90, 180, 90]
136+
sc = _get_list_or_dict(md['spatial'])
149137

150-
mcf['identification']['extents']['spatial'].append({
151-
'bbox': bbox,
152-
'crs': crs
153-
})
138+
crs = 4326
154139

155-
if 'temporalCoverage' in md:
156-
begin, end = md['temporalCoverage'][0].split('/')
140+
if isinstance(sc, str):
141+
# simple location name
142+
mcf['identification']['extents']['spatial'].append({
143+
'description': sc
144+
})
145+
elif 'geo' in sc:
146+
geo = _get_list_or_dict(sc['geo'])
147+
148+
if geo['@type'] == 'GeoCoordinates':
149+
mcf['spatial']['datatype'] = 'vector'
150+
mcf['spatial']['geomtype'] = 'point'
151+
bbox = [geo['longitude'], geo['latitude'],
152+
geo['longitude'], geo['latitude']]
153+
elif geo['@type'] == 'GeoShape':
154+
mcf['spatial']['datatype'] = 'vector'
155+
mcf['spatial']['geomtype'] = 'polygon'
156+
bt = geo['box'].split()
157+
bbox = bt[1], bt[0], bt[3], bt[2]
158+
else:
159+
bbox = [-180, -90, 180, 90]
160+
161+
mcf['identification']['extents']['spatial'].append({
162+
'bbox': bbox,
163+
'crs': crs
164+
})
165+
166+
if 'temporalCoverage' in md or 'temporal' in md:
167+
if 'temporalCoverage' in md:
168+
tc = _get_list_or_dict(md['temporalCoverage'])
169+
else:
170+
tc = _get_list_or_dict(md['temporal'])
171+
beg_end = tc.split('/')
172+
begin = beg_end[0]
173+
end = ''
174+
if len(beg_end) > 1:
175+
end = beg_end[1]
157176
mcf['identification']['extents']['temporal'] = [{
158177
'begin': begin,
159178
'end': end
160179
}]
161180

162181
mcf['identification']['language'] = mcf['metadata']['language']
163-
mcf['identification']['title'] = md['name']
164-
mcf['identification']['abstract'] = md['description']
182+
mcf['identification']['title'] = md.get('name', md.get('title', ''))
183+
mcf['identification']['abstract'] = md.get('description',
184+
md.get('abstract', ''))
165185

166186
if 'dateCreated' in md:
167-
mcf['identification']['creation'] = md['datePublished']
187+
mcf['identification']['creation'] = md['dateCreated']
168188
if 'datePublished' in md:
169189
mcf['identification']['publication'] = md['datePublished'] # noqa
170190
if 'dateModified' in md:
@@ -173,19 +193,30 @@ def import_(self, metadata: str) -> dict:
173193
if 'version' in md:
174194
mcf['identification']['edition'] = md['version']
175195

176-
mcf['identification']['keywords'] = {
177-
'default': {
178-
'keywords': md['keywords']
196+
if 'keywords' in md:
197+
mcf['identification']['keywords'] = {
198+
'default': {
199+
'keywords': md['keywords']
200+
}
179201
}
180-
}
181202

182-
for dist in md['distribution']:
183-
mcf['distribution'][dist['name']] = {
184-
'name': dist['name'],
185-
'type': dist['encodingFormat'],
186-
'url': dist['contentUrl'],
187-
'rel': 'download',
188-
'function': 'download'
203+
if 'distribution' in md:
204+
for dist in md['distribution']:
205+
mcf['distribution'][dist['name']] = {
206+
'name': dist['name'],
207+
'type': dist['encodingFormat'],
208+
'url': dist['contentUrl'],
209+
'rel': 'download',
210+
'function': 'download'
211+
}
212+
213+
if 'url' in md:
214+
mcf['distribution']['landingPage'] = {
215+
'name': 'landing page',
216+
'type': 'text/html',
217+
'url': md['url'],
218+
'rel': 'alternate',
219+
'function': 'information'
189220
}
190221

191222
for ct in ['author', 'publisher', 'creator', 'provider', 'funder']:

0 commit comments

Comments
 (0)