Merge remote-tracking branch 'origin/master' into pypi

robkooper · robkooper · commit e59122676940 · 2018-04-02T09:26:19.000-05:00
diff --git a/.zenodo.json b/.zenodo.json
@@ -0,0 +1,47 @@
+{
+    "title": "Python wrappers for use with the Clowder system", 
+    "description": "This library makes it easier to interact with clowder and to create extractors as well as code that will interact with clowder. The library will encapsulate the calls to the REST API. For extractors the library will provide the developer with an easy to extend framework to create new extractors.",
+    "creators": [
+        {
+            "affiliation": "National Center for Supercomputing Applications",
+            "name": "Rob Kooper",
+            "orcid": "0000-0002-5781-7287"
+        },
+        {
+            "affiliation": "National Center for Supercomputing Applications",
+            "name": "Max Burnette"
+        },
+        {
+            "affiliation": "National Center for Supercomputing Applications",
+            "name": "Sandeep Satheesan"
+        },
+        {
+            "affiliation": "National Center for Supercomputing Applications",
+            "name": "Bing Zhang"
+        },
+        {
+            "affiliation": "University of Illinois at Urbana-Champaign",
+            "name": "Todd Nicholson"
+        },
+        {
+            "affiliation": "National Center for Supercomputing Applications",
+            "name": "Indira Gutierrez"
+        },
+        {
+            "affiliation": "National Center for Supercomputing Applications",
+            "name": "Kenton McHenry"
+        },
+        {
+            "name": "Ward Poelmans"
+        }
+    ],
+    "upload_type": "software",
+    "license": "NCSA",
+    "access_right": "open",
+    "keywords": [
+        "json",
+        "python",
+        "clowder"
+    ],
+    "notes": "Development is supported by Brown Dog (NSF #1261582)"
+}
diff --git a/Dockerfile b/Dockerfile
@@ -1,6 +1,14 @@
 FROM ubuntu:16.04
 MAINTAINER Rob Kooper <kooper@illinois.edu>
 
+# environment variables
+ENV RABBITMQ_URI="" \
+    RABBITMQ_EXCHANGE="clowder" \
+    RABBITMQ_QUEUE="" \
+    REGISTRATION_ENDPOINTS="https://clowder.ncsa.illinois.edu/extractors" \
+    MAIN_SCRIPT=""
+
+# install python
 RUN apt-get -q -q update && apt-get install -y --no-install-recommends \
         netcat \
         python \
@@ -9,9 +17,13 @@ RUN apt-get -q -q update && apt-get install -y --no-install-recommends \
     && rm -rf /var/lib/apt/lists/* \
     && adduser --system clowder
 
+# instal pyclowder2
 COPY pyclowder /tmp/pyclowder/pyclowder
 COPY setup.py requirements.txt /tmp/pyclowder/
 
 RUN pip install --upgrade  -r /tmp/pyclowder/requirements.txt \
     && pip install --upgrade /tmp/pyclowder \
     && rm -rf /tmp/pyclowder
+
+# change folder
+WORKDIR /home/clowder/
diff --git a/Dockerfile.onbuild b/Dockerfile.onbuild
@@ -0,0 +1,26 @@
+FROM clowder/pyclowder:2
+
+# copy all files
+ONBUILD ADD . /home/clowder/
+
+# install any packages
+#ONBUILD COPY packages.apt /home/clowder/
+ONBUILD RUN if [ -e packages.apt ]; then \
+                apt-get -q -q update \
+                && xargs apt-get -y install --no-install-recommends < packages.apt \
+                && rm -rf /var/lib/apt/lists/*; \
+            fi
+
+# install any python packages
+#ONBUILD COPY requirements.txt /home/clowder/
+ONBUILD RUN if [ -e requirements.txt ]; then \
+                pip install --no-cache-dir -r requirements.txt; \
+            fi
+
+# switch to user clowder last minute
+ONBUILD USER clowder
+
+# command to run when starting container
+COPY entrypoint.sh /home/clowder/
+ENTRYPOINT ["/home/clowder/entrypoint.sh"]
+CMD ["extractor"]
diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+[![DOI](https://zenodo.org/badge/126513159.svg)](https://zenodo.org/badge/latestdoi/126513159)
+
 This repository contains the next generation of pyClowder. This library makes it easier to interact with clowder and
 to create extractors.
 
@@ -8,6 +10,21 @@ created using extractors. To make it easy to create these extractors in python w
 Besides wrapping often used api calls in convenient python calls, we have also added some code to make it easy to
 create new extractors.
 
+## Setup
+
+Install pyClowder2 on your system by cloning this repo:
+
+```
+git clone https://opensource.ncsa.illinois.edu/bitbucket/scm/cats/pyclowder2.git
+cd pyclowder2
+pip install -r requirements.txt
+python setup.py install
+```
+or directly from Bitbucket:
+```
+pip install -r https://opensource.ncsa.illinois.edu/bitbucket/projects/CATS/repos/pyclowder2/raw/requirements.txt git+https://opensource.ncsa.illinois.edu/bitbucket/scm/cats/pyclowder2.git
+```
+
 ## Example Extractor
 
 Following is an example of the WordCount extractor. This example will allow the user to specify from the command line
diff --git a/docker.sh b/docker.sh
@@ -1,89 +1,17 @@
 #!/bin/sh
 
-# variables that can be set
-# DEBUG   : set to echo to print command and not execute
-# PUSH    : set to push to push, anthing else not to push. If not set
-#           the program will push if master or develop.
-# PROJECT : the project to add to the image, default is NCSA
-# VERSION : the list of tags to use, if not set this will be 2
-
 #DEBUG=echo
 
-# set default for clowder
-PROJECT=${PROJECT:-"clowder"}
-
-RM=${RM:-"rm"}
-
-# find out version and if we should push
-BRANCH="$(git rev-parse --abbrev-ref HEAD)"
-VERSION=${VERSION:-"2"}
-if [ "$BRANCH" = "master" ]; then
-  PUSH=${PUSH:-"push"}
-else
-  PUSH=${PUSH:-""}
-fi
-
-# keep track of which latest amde
-LATEST=""
-
-# helper to create the docker container
-# $1 - folder that contains the Dockerfile
-# $2 - name of docker image
-# $3 - name of Dockerfile
-create() {
-  if [ -z "$1" ]; then echo "Missing repo/Dockerfile name."; exit -1; fi
-  if [ -z "$2" ]; then echo "Missing name for $1."; exit -1; fi
-
-  DOCKERFILE=${3:-"$1/Dockerfile"}
-
-  # create image using temp id
-  local ID=$(uuidgen)
-  ${DEBUG} docker build  --tag $$ --file ${DOCKERFILE} $1
-  if [ $? -ne 0 ]; then
-    echo "FAILED build of $1/${DOCKERFILE}"
-    exit -1
-  fi
-
-  # tag all versions
-  for v in $VERSION; do
-    if [ "$PROJECT" = "" ]; then
-      ${DEBUG} docker tag $$ ${2}:${v}
-    else
-      for p in ${PROJECT}; do
-        NAME=$2
-        ${DEBUG} docker tag $$ ${p}/${NAME}:${v}
-        if [ "$PUSH" = "push" ]; then
-          ${DEBUG} docker push ${p}/${NAME}:${v}
-        fi
-      done
-    fi
-  done
-
-  # tag version as latest, but don't push
-  if [ ! "$BRANCH" = "master" ]; then
-    if [ "$PROJECT" = "" ]; then
-      ${DEBUG} docker tag $$ ${2}:latest
-      LATEST="$LATEST ${2}:latest"
-    else
-      for p in ${PROJECT}; do
-        NAME=$2
-        ${DEBUG} docker tag $$ ${p}/${NAME}:latest
-        LATEST="$LATEST ${p}/${NAME}:latest"
-      done
-    fi
-  fi
+# build docker container
+${DEBUG} docker build  --tag clowder/pyclowder:2 .
+${DEBUG} docker build  --tag clowder/pyclowder:onbuild --file Dockerfile.onbuild .
 
-  # delete image with temp id
-  ${DEBUG} docker rmi $$
-}
+# build sample extractors
+${DEBUG} docker build  --tag clowder/extractors-wordcount:2 sample-extractors/wordcount
 
-# Create the docker containers
-create "."                           "pyclowder"
-create "sample-extractors/wordcount" "extractors-wordcount"
 
-# remove latest tags
-if [ "$RM" = "rm" ]; then
-    for r in $LATEST; do
-      ${DEBUG} docker rmi ${r}
-    done
+if [ "$(git rev-parse --abbrev-ref HEAD)" = "master" ]; then
+  ${DEBUG} docker push clowder/pyclowder:2
+  ${DEBUG} docker push clowder/pyclowder:onbuild
+  ${DEBUG} docker push clowder/extractors-wordcount:2
 fi
diff --git a/entrypoint.sh b/entrypoint.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+set -e
+
+# rabbitmq
+if [ "${RABBITMQ_URI}" == "" ]; then
+
+    # configure RABBITMQ_URI if started using docker-compose or --link flag
+    if [ -n "${RABBITMQ_PORT_5672_TCP_ADDR}" ]; then
+        RABBITMQ_URI="amqp://${RABBITMQ_PORT_5672_TCP_ADDR}:${RABBITMQ_PORT_5672_TCP_PORT}/%2F"
+    fi
+fi
+
+# start server if asked
+if [ "$1" = 'extractor' ]; then
+    # make sure main script exists
+    if [ "${MAIN_SCRIPT}" == "" ]; then
+        echo "No main script specified, can not run code."
+        exit -1
+    fi
+    if [ ! -e "${MAIN_SCRIPT}" ]; then
+        echo "Main script specified does not exist."
+        exit -1
+    fi
+    chmod 755 "${MAIN_SCRIPT}"
+
+    # check to make sure rabbitmq is up
+    if [ "${RABBITMQ_PORT_5672_TCP_ADDR}" != "" ]; then
+        # start extractor after rabbitmq is up
+        for i in `seq 1 10`; do
+            if nc -z ${RABBITMQ_PORT_5672_TCP_ADDR} ${RABBITMQ_PORT_5672_TCP_PORT} ; then
+                break
+            fi
+            sleep 1
+        done
+    fi
+
+    # launch extractor and see what happens
+    exec "./${MAIN_SCRIPT}"
+fi
+
+exec "$@"
diff --git a/pyclowder/collections.py b/pyclowder/collections.py
@@ -7,6 +7,7 @@
 import logging
 import requests
 
+from pyclowder.client import ClowderClient
 from pyclowder.utils import StatusMessage
 
 
@@ -141,3 +142,73 @@ def upload_preview(connector, host, key, collectionid, previewfile, previewmetad
     result.raise_for_status()
 
     return previewid
+
+
+class CollectionsApi(object):
+    """
+        API to manage the REST CRUD endpoints for collections
+    """
+    def __init__(self, client=None, host=None, key=None, username=None, password=None):
+        """Set client if provided otherwise create new one"""
+        if client:
+            self.api_client = client
+        else:
+            self.client = ClowderClient(host=host, key=key, username=username, password=password)
+
+    def create(self, name, description, parent_id, space_id):
+        """Create a new collection in Clowder.
+
+        Keyword arguments:
+        connector -- connector information, used to get missing parameters and send status updates
+        host -- the clowder host, including http and port, should end with a /
+        key -- the secret key to login to clowder
+        collectionname -- name of new dataset to create
+        description -- description of new dataset
+        parentid -- id of parent collection
+        spaceid -- id of the space to add dataset to
+        """
+
+        if parent_id:
+            if space_id:
+                body = {
+                    "name": name,
+                    "description": description,
+                    "parentId": [parent_id],
+                    "space": space_id
+                }
+                result = self.client.post("/collections/newCollectionWithParents", body)
+            else:
+                body = {
+                    "name": name,
+                    "description": description,
+                    "parentId": [parent_id],
+                }
+                result = self.client.post("/collections/newCollectionWithParent", body)
+        else:
+            if space_id:
+                body = {
+                    "name": name,
+                    "description": description,
+                    "space": space_id
+                }
+                result = self.client.post("/collections", body)
+            else:
+                body = {
+                    "name": name,
+                    "description": description,
+                }
+                result = self.client.post("/collections", body)
+        result.raise_for_status()
+
+        collection_id = result.json()['id']
+        logging.debug("collection id = [%s]", collection_id)
+
+        return collection_id
+
+    def get_all_collections(self):
+        """
+        Get All Collections in Clowder
+
+        :return: List of collections in Clowder
+        """
+        return self.client.get("/collections")
diff --git a/pyclowder/files.py b/pyclowder/files.py
@@ -188,7 +188,7 @@ def upload_metadata(connector, host, key, fileid, metadata):
 
 
 # pylint: disable=too-many-arguments
-def upload_preview(connector, host, key, fileid, previewfile, previewmetadata):
+def upload_preview(connector, host, key, fileid, previewfile, previewmetadata, preview_mimetype=None):
     """Upload preview to Clowder.
 
     Keyword arguments:
@@ -199,6 +199,8 @@ def upload_preview(connector, host, key, fileid, previewfile, previewmetadata):
     previewfile -- the file containing the preview
     previewmetadata -- any metadata to be associated with preview, can contain a section_id
                     to indicate the section this preview should be associated with.
+    preview_mimetype -- (optional) MIME type of the preview file. By default, this is obtained from the
+                    file itself and this parameter can be ignored. E.g. 'application/vnd.clowder+custom+xml'
     """
 
     connector.status_update(StatusMessage.processing, {"type": "file", "id": fileid}, "Uploading file preview.")
@@ -209,7 +211,13 @@ def upload_preview(connector, host, key, fileid, previewfile, previewmetadata):
     # upload preview
     url = '%sapi/previews?key=%s' % (host, key)
     with open(previewfile, 'rb') as filebytes:
-        result = connector.post(url, files={"File": filebytes}, verify=connector.ssl_verify if connector else True)
+        # If a custom preview file MIME type is provided, use it to generate the preview file object.
+        if preview_mimetype is not None:
+            result = connector.post(url, files={"File": (os.path.basename(previewfile), filebytes, preview_mimetype)},
+                                    verify=connector.ssl_verify if connector else True)
+        else:
+            result = connector.post(url, files={"File": filebytes}, verify=connector.ssl_verify if connector else True)
+
     previewid = result.json()['id']
     logger.debug("preview id = [%s]", previewid)
 
diff --git a/pyclowder/sections.py b/pyclowder/sections.py
diff --git a/sample-extractors/wordcount/Dockerfile b/sample-extractors/wordcount/Dockerfile
diff --git a/sample-extractors/wordcount/entrypoint.sh b/sample-extractors/wordcount/entrypoint.sh