Skip to content

Commit acfd080

Browse files
committed
handling cases with contexts for v1 and v2
1 parent 22c9d6e commit acfd080

File tree

1 file changed

+50
-19
lines changed

1 file changed

+50
-19
lines changed

pyclowder/extractors.py

Lines changed: 50 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from pyclowder.utils import CheckMessage, setup_logging
2424
import pyclowder.files
2525
import pyclowder.datasets
26+
from functools import reduce
2627

2728
clowder_version = int(os.getenv('CLOWDER_VERSION', '1'))
2829

@@ -229,7 +230,7 @@ def _get_extractor_info_v2(self):
229230
return current_extractor_info
230231

231232

232-
def get_metadata(self, content, resource_type, resource_id, server=None):
233+
def get_metadata(self, content, resource_type, resource_id, server=None, contexts=None):
233234
"""Generate a metadata field.
234235
235236
This will return a metadata dict that is valid JSON-LD. This will use the results as well as the information
@@ -254,33 +255,63 @@ def get_metadata(self, content, resource_type, resource_id, server=None):
254255
if not self._check_key(k, self.extractor_info['contexts']):
255256
logger.debug("Simple check could not find %s in contexts" % k)
256257
# TODO generate clowder2.0 extractor info
257-
if clowder_version == 2:
258+
if clowder_version == 2.0:
258259
new_extractor_info = self._get_extractor_info_v2()
259260
md = dict()
260261
md["file_version"] = 1
261-
md["context"] = self.extractor_info["contexts"][0]
262+
if contexts is not None:
263+
md["context"] = contexts
264+
else:
265+
md["context"] = {}
266+
if type(self.extractor_info['contexts'] == list):
267+
if len(self.extractor_info['contexts']) > 0:
268+
if len(self.extractor_info == 1):
269+
md["context"] = self.extractor_info["contexts"]
270+
else:
271+
# TODO is this necessary? should contexts should always be a list with one dictionary?
272+
current_contexts = self.extractor_info["contexts"]
273+
reduce(lambda a, b: dict(a, **b), current_contexts)
262274
md["context_url"] = context_url
263275
md["content"] = content
264276
md["contents"] = content
265277
md["extractor_info"] = new_extractor_info
266278
return md
267279
else:
268-
return {
269-
'@context': [context_url] + self.extractor_info['contexts'],
270-
'attachedTo': {
271-
'resourceType': resource_type,
272-
'id': resource_id
273-
},
274-
'agent': {
275-
'@type': 'cat:extractor',
276-
'extractor_id': '%sextractors/%s/%s' %
277-
(server, self.extractor_info['name'], self.extractor_info['version']),
278-
'version': self.extractor_info['version'],
279-
'name': self.extractor_info['name']
280-
},
281-
'content': content
282-
}
283-
280+
# TODO handle cases where contexts are either not available or are dynamnically generated
281+
if contexts is not None:
282+
md = {
283+
'@context': [context_url] + contexts,
284+
'attachedTo': {
285+
'resourceType': resource_type,
286+
'id': resource_id
287+
},
288+
'agent': {
289+
'@type': 'cat:extractor',
290+
'extractor_id': '%sextractors/%s/%s' %
291+
(server, self.extractor_info['name'], self.extractor_info['version']),
292+
'version': self.extractor_info['version'],
293+
'name': self.extractor_info['name']
294+
},
295+
'content': content
296+
}
297+
return md
298+
else:
299+
md = {
300+
'@context': [context_url] + self.extractor_info['contexts'],
301+
'attachedTo': {
302+
'resourceType': resource_type,
303+
'id': resource_id
304+
},
305+
'agent': {
306+
'@type': 'cat:extractor',
307+
'extractor_id': '%sextractors/%s/%s' %
308+
(server, self.extractor_info['name'], self.extractor_info['version']),
309+
'version': self.extractor_info['version'],
310+
'name': self.extractor_info['name']
311+
},
312+
'content': content
313+
}
314+
return md
284315
def _check_key(self, key, obj):
285316
if key in obj:
286317
return True

0 commit comments

Comments
 (0)