2323from pyclowder .utils import CheckMessage , setup_logging
2424import pyclowder .files
2525import pyclowder .datasets
26+ from functools import reduce
2627
2728clowder_version = int (os .getenv ('CLOWDER_VERSION' , '1' ))
2829
@@ -229,7 +230,7 @@ def _get_extractor_info_v2(self):
229230 return current_extractor_info
230231
231232
232- def get_metadata (self , content , resource_type , resource_id , server = None ):
233+ def get_metadata (self , content , resource_type , resource_id , server = None , contexts = None ):
233234 """Generate a metadata field.
234235
235236 This will return a metadata dict that is valid JSON-LD. This will use the results as well as the information
@@ -254,33 +255,63 @@ def get_metadata(self, content, resource_type, resource_id, server=None):
254255 if not self ._check_key (k , self .extractor_info ['contexts' ]):
255256 logger .debug ("Simple check could not find %s in contexts" % k )
256257 # TODO generate clowder2.0 extractor info
257- if clowder_version == 2 :
258+ if clowder_version == 2.0 :
258259 new_extractor_info = self ._get_extractor_info_v2 ()
259260 md = dict ()
260261 md ["file_version" ] = 1
261- md ["context" ] = self .extractor_info ["contexts" ][0 ]
262+ if contexts is not None :
263+ md ["context" ] = contexts
264+ else :
265+ md ["context" ] = {}
266+ if type (self .extractor_info ['contexts' ] == list ):
267+ if len (self .extractor_info ['contexts' ]) > 0 :
268+ if len (self .extractor_info == 1 ):
269+ md ["context" ] = self .extractor_info ["contexts" ]
270+ else :
271+ # TODO is this necessary? should contexts should always be a list with one dictionary?
272+ current_contexts = self .extractor_info ["contexts" ]
273+ reduce (lambda a , b : dict (a , ** b ), current_contexts )
262274 md ["context_url" ] = context_url
263275 md ["content" ] = content
264276 md ["contents" ] = content
265277 md ["extractor_info" ] = new_extractor_info
266278 return md
267279 else :
268- return {
269- '@context' : [context_url ] + self .extractor_info ['contexts' ],
270- 'attachedTo' : {
271- 'resourceType' : resource_type ,
272- 'id' : resource_id
273- },
274- 'agent' : {
275- '@type' : 'cat:extractor' ,
276- 'extractor_id' : '%sextractors/%s/%s' %
277- (server , self .extractor_info ['name' ], self .extractor_info ['version' ]),
278- 'version' : self .extractor_info ['version' ],
279- 'name' : self .extractor_info ['name' ]
280- },
281- 'content' : content
282- }
283-
280+ # TODO handle cases where contexts are either not available or are dynamnically generated
281+ if contexts is not None :
282+ md = {
283+ '@context' : [context_url ] + contexts ,
284+ 'attachedTo' : {
285+ 'resourceType' : resource_type ,
286+ 'id' : resource_id
287+ },
288+ 'agent' : {
289+ '@type' : 'cat:extractor' ,
290+ 'extractor_id' : '%sextractors/%s/%s' %
291+ (server , self .extractor_info ['name' ], self .extractor_info ['version' ]),
292+ 'version' : self .extractor_info ['version' ],
293+ 'name' : self .extractor_info ['name' ]
294+ },
295+ 'content' : content
296+ }
297+ return md
298+ else :
299+ md = {
300+ '@context' : [context_url ] + self .extractor_info ['contexts' ],
301+ 'attachedTo' : {
302+ 'resourceType' : resource_type ,
303+ 'id' : resource_id
304+ },
305+ 'agent' : {
306+ '@type' : 'cat:extractor' ,
307+ 'extractor_id' : '%sextractors/%s/%s' %
308+ (server , self .extractor_info ['name' ], self .extractor_info ['version' ]),
309+ 'version' : self .extractor_info ['version' ],
310+ 'name' : self .extractor_info ['name' ]
311+ },
312+ 'content' : content
313+ }
314+ return md
284315 def _check_key (self , key , obj ):
285316 if key in obj :
286317 return True
0 commit comments