Merge pull request #5 from clowder-framework/feature/add-tags-simple-extractor

robkooper · web-flow · commit 7190785f90e4 · 2020-08-24T10:37:16.000-05:00
tags added to simple extractor
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 
 ### Added
 - Simple extractors now support datasets, can also create new datasets.
+- Ability to add tags from simple extractor to files and datasets.
 
 ## 2.2.3 - 2019-10-14
 
diff --git a/pyclowder/datasets.py b/pyclowder/datasets.py
@@ -257,6 +257,25 @@ def submit_extractions_by_collection(connector, host, key, collectionid, extract
             submit_extractions_by_collection(connector, host, key, coll['id'], extractorname, recursive)
 
 
+def upload_tags(connector, host, key, datasetid, tags):
+    """Upload dataset tag to Clowder.
+
+    Keyword arguments:
+    connector -- connector information, used to get missing parameters and send status updates
+    host -- the clowder host, including http and port, should end with a /
+    key -- the secret key to login to clowder
+    datasetid -- the dataset that is currently being processed
+    tags -- the tags to be uploaded
+    """
+
+    connector.status_update(StatusMessage.processing, {"type": "dataset", "id": datasetid}, "Uploading dataset tags.")
+
+    headers = {'Content-Type': 'application/json'}
+    url = '%sapi/datasets/%s/tags?key=%s' % (host, datasetid, key)
+    result = connector.post(url, headers=headers, data=json.dumps(tags),
+                            verify=connector.ssl_verify if connector else True)
+
+
 def upload_metadata(connector, host, key, datasetid, metadata):
     """Upload dataset JSON-LD metadata to Clowder.
 
diff --git a/pyclowder/extractors.py b/pyclowder/extractors.py
@@ -22,6 +22,7 @@
 from pyclowder.connectors import RabbitMQConnector, HPCConnector, LocalConnector
 from pyclowder.utils import CheckMessage, setup_logging
 import pyclowder.files
+import pyclowder.datasets
 
 
 class Extractor(object):
@@ -342,7 +343,7 @@ def process_message(self, connector, host, secret_key, resource, parameters):
         try:
             # upload metadata to the processed file or dataset
             if 'metadata' in result.keys():
-                self.logger.info("upload metadata")
+                self.logger.debug("upload metadata")
                 if type == 'file':
                     metadata = self.get_metadata(result.get('metadata'), 'file', file_id, host)
                     self.logger.debug(metadata)
@@ -356,20 +357,27 @@ def process_message(self, connector, host, secret_key, resource, parameters):
 
             # upload previews to the processed file
             if 'previews' in result.keys():
-                self.logger.info("upload previews")
                 if type == 'file':
                     for preview in result['previews']:
                         if os.path.exists(str(preview)):
                             preview = {'file': preview}
-                            self.logger.info("upload preview")
+                            self.logger.debug("upload preview")
                             pyclowder.files.upload_preview(connector, host, secret_key, file_id, str(preview))
                 else:
                     # TODO: Add Clowder endpoint (& pyclowder method) to attach previews to datasets
                     self.logger.error("previews not currently supported for resource type: %s" % type)
 
+            if 'tags' in result.keys():
+                self.logger.debug("upload tags")
+                tags = {"tags": result["tags"]}
+                if type == 'file':
+                    pyclowder.files.upload_tags(connector, host, secret_key, file_id, tags)
+                else:
+                    pyclowder.datasets.upload_tags(connector, host, secret_key, dataset_id, tags)
+
             # upload output files to the processed file's parent dataset or processed dataset
             if 'outputs' in result.keys():
-                self.logger.info("upload output files")
+                self.logger.debug("upload output files")
                 if type == 'file' or type == 'dataset':
                     for output in result['outputs']:
                         if os.path.exists(str(output)):
@@ -386,16 +394,16 @@ def process_message(self, connector, host, secret_key, resource, parameters):
                         description = nds['description'] if 'description' in nds.keys() else ""
                         new_dataset_id = pyclowder.datasets.create_empty(connector, host, secret_key, nds['name'],
                                                                          description)
-                        self.logger.info("created new dataset: %s" % new_dataset_id)
+                        self.logger.debug("created new dataset: %s" % new_dataset_id)
 
                         if 'metadata' in nds.keys():
-                            self.logger.info("upload metadata to new dataset")
+                            self.logger.debug("upload metadata to new dataset")
                             metadata = self.get_metadata(nds.get('metadata'), 'dataset', new_dataset_id, host)
                             self.logger.debug(metadata)
                             pyclowder.datasets.upload_metadata(connector, host, secret_key, new_dataset_id, metadata)
 
                         if 'outputs' in nds.keys():
-                            self.logger.info("upload output files to new dataset")
+                            self.logger.debug("upload output files to new dataset")
                             for output in nds['outputs']:
                                 if os.path.exists(str(output)):
                                     pyclowder.files.upload_to_dataset(connector, host, secret_key, new_dataset_id,
diff --git a/sample-extractors/simple-extractor/README.md b/sample-extractors/simple-extractor/README.md
@@ -18,12 +18,13 @@ you have to write your extractor the normal way using [PyClowder](https://openso
 
 To write an extractor using the Simple Extractor, you need to have your Python program available. The main function of 
 this Python program is supposed to take an input file path as its parameter. It needs to return a Python dictionary that 
-can contain either metadata information ("metadata"), details about file previews ("previews") or both. For example:
+can contain either metadata information ("metadata"), details about file previews ("previews"), tags for the file ("tags) or all. For example:
 
 ``` json
 {   
     "metadata": dict(),
-    "previews": array() 
+    "previews": array(), 
+    "tags": array()
 }
 ```
 

Original file line number	Diff line number	Diff line change
`@@ -18,12 +18,13 @@ you have to write your extractor the normal way using [PyClowder](https://openso`
`18`	`18`
`19`	`19`	`To write an extractor using the Simple Extractor, you need to have your Python program available. The main function of`
`20`	`20`	`this Python program is supposed to take an input file path as its parameter. It needs to return a Python dictionary that`
`21`		`-can contain either metadata information ("metadata"), details about file previews ("previews") or both. For example:`
	`21`	`+can contain either metadata information ("metadata"), details about file previews ("previews"), tags for the file ("tags) or all. For example:`
`22`	`22`
`23`	`23`	``` json
`24`	`24`	`{`
`25`	`25`	`"metadata": dict(),`
`26`		`- "previews": array()`
	`26`	`+ "previews": array(),`
	`27`	`+ "tags": array()`
`27`	`28`	`}`
`28`	`29`	```
`29`	`30`