Skip to content

Commit 7190785

Browse files
authored
Merge pull request #5 from clowder-framework/feature/add-tags-simple-extractor
tags added to simple extractor
2 parents c629530 + ed0348e commit 7190785

File tree

4 files changed

+38
-9
lines changed

4 files changed

+38
-9
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
88

99
### Added
1010
- Simple extractors now support datasets, can also create new datasets.
11+
- Ability to add tags from simple extractor to files and datasets.
1112

1213
## 2.2.3 - 2019-10-14
1314

pyclowder/datasets.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,25 @@ def submit_extractions_by_collection(connector, host, key, collectionid, extract
257257
submit_extractions_by_collection(connector, host, key, coll['id'], extractorname, recursive)
258258

259259

260+
def upload_tags(connector, host, key, datasetid, tags):
261+
"""Upload dataset tag to Clowder.
262+
263+
Keyword arguments:
264+
connector -- connector information, used to get missing parameters and send status updates
265+
host -- the clowder host, including http and port, should end with a /
266+
key -- the secret key to login to clowder
267+
datasetid -- the dataset that is currently being processed
268+
tags -- the tags to be uploaded
269+
"""
270+
271+
connector.status_update(StatusMessage.processing, {"type": "dataset", "id": datasetid}, "Uploading dataset tags.")
272+
273+
headers = {'Content-Type': 'application/json'}
274+
url = '%sapi/datasets/%s/tags?key=%s' % (host, datasetid, key)
275+
result = connector.post(url, headers=headers, data=json.dumps(tags),
276+
verify=connector.ssl_verify if connector else True)
277+
278+
260279
def upload_metadata(connector, host, key, datasetid, metadata):
261280
"""Upload dataset JSON-LD metadata to Clowder.
262281

pyclowder/extractors.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from pyclowder.connectors import RabbitMQConnector, HPCConnector, LocalConnector
2323
from pyclowder.utils import CheckMessage, setup_logging
2424
import pyclowder.files
25+
import pyclowder.datasets
2526

2627

2728
class Extractor(object):
@@ -342,7 +343,7 @@ def process_message(self, connector, host, secret_key, resource, parameters):
342343
try:
343344
# upload metadata to the processed file or dataset
344345
if 'metadata' in result.keys():
345-
self.logger.info("upload metadata")
346+
self.logger.debug("upload metadata")
346347
if type == 'file':
347348
metadata = self.get_metadata(result.get('metadata'), 'file', file_id, host)
348349
self.logger.debug(metadata)
@@ -356,20 +357,27 @@ def process_message(self, connector, host, secret_key, resource, parameters):
356357

357358
# upload previews to the processed file
358359
if 'previews' in result.keys():
359-
self.logger.info("upload previews")
360360
if type == 'file':
361361
for preview in result['previews']:
362362
if os.path.exists(str(preview)):
363363
preview = {'file': preview}
364-
self.logger.info("upload preview")
364+
self.logger.debug("upload preview")
365365
pyclowder.files.upload_preview(connector, host, secret_key, file_id, str(preview))
366366
else:
367367
# TODO: Add Clowder endpoint (& pyclowder method) to attach previews to datasets
368368
self.logger.error("previews not currently supported for resource type: %s" % type)
369369

370+
if 'tags' in result.keys():
371+
self.logger.debug("upload tags")
372+
tags = {"tags": result["tags"]}
373+
if type == 'file':
374+
pyclowder.files.upload_tags(connector, host, secret_key, file_id, tags)
375+
else:
376+
pyclowder.datasets.upload_tags(connector, host, secret_key, dataset_id, tags)
377+
370378
# upload output files to the processed file's parent dataset or processed dataset
371379
if 'outputs' in result.keys():
372-
self.logger.info("upload output files")
380+
self.logger.debug("upload output files")
373381
if type == 'file' or type == 'dataset':
374382
for output in result['outputs']:
375383
if os.path.exists(str(output)):
@@ -386,16 +394,16 @@ def process_message(self, connector, host, secret_key, resource, parameters):
386394
description = nds['description'] if 'description' in nds.keys() else ""
387395
new_dataset_id = pyclowder.datasets.create_empty(connector, host, secret_key, nds['name'],
388396
description)
389-
self.logger.info("created new dataset: %s" % new_dataset_id)
397+
self.logger.debug("created new dataset: %s" % new_dataset_id)
390398

391399
if 'metadata' in nds.keys():
392-
self.logger.info("upload metadata to new dataset")
400+
self.logger.debug("upload metadata to new dataset")
393401
metadata = self.get_metadata(nds.get('metadata'), 'dataset', new_dataset_id, host)
394402
self.logger.debug(metadata)
395403
pyclowder.datasets.upload_metadata(connector, host, secret_key, new_dataset_id, metadata)
396404

397405
if 'outputs' in nds.keys():
398-
self.logger.info("upload output files to new dataset")
406+
self.logger.debug("upload output files to new dataset")
399407
for output in nds['outputs']:
400408
if os.path.exists(str(output)):
401409
pyclowder.files.upload_to_dataset(connector, host, secret_key, new_dataset_id,

sample-extractors/simple-extractor/README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@ you have to write your extractor the normal way using [PyClowder](https://openso
1818

1919
To write an extractor using the Simple Extractor, you need to have your Python program available. The main function of
2020
this Python program is supposed to take an input file path as its parameter. It needs to return a Python dictionary that
21-
can contain either metadata information ("metadata"), details about file previews ("previews") or both. For example:
21+
can contain either metadata information ("metadata"), details about file previews ("previews"), tags for the file ("tags) or all. For example:
2222

2323
``` json
2424
{
2525
"metadata": dict(),
26-
"previews": array()
26+
"previews": array(),
27+
"tags": array()
2728
}
2829
```
2930

0 commit comments

Comments
 (0)