diff --git a/.circleci/config.yml b/.circleci/config.yml
index 99f7692..c8ea89b 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -55,7 +55,6 @@ jobs:
# - slack/status:
# fail_only: false
-# cli build
cli-build:
executor: docker/docker
steps:
@@ -75,30 +74,55 @@ jobs:
- slack/status:
fail_only: false
-# clients build
- client-build:
+ onestop-s3-handler-build:
executor: docker/docker
steps:
- setup_remote_docker
- checkout
- docker/check
- docker/build:
- path: onestop-python-client
+ path: scripts/sqs-to-registry
+ image: cedardevs/onestop-s3-handler
+ tag: latest
+ - docker/push:
+ image: cedardevs/onestop-s3-handler
+ tag: latest
+ - slack/status:
+ fail_only: false
+
+ onestop-sme-build:
+ executor: docker/docker
+ steps:
+ - setup_remote_docker
+ - checkout
+ - docker/check
+ - docker/build:
+ path: scripts/sme/
+ image: cedardevs/onestop-sme
+ tag: latest
+ - docker/push:
+ image: cedardevs/onestop-sme
+ tag: latest
+ - slack/status:
+ fail_only: false
+
+ onestop-python-client-build:
+ executor: docker/docker
+ steps:
+ - setup_remote_docker
+ - checkout
+ - docker/check
+ - docker/build:
+ path: ./
image: cedardevs/onestop-python-client
- tag: ${CIRCLE_BRANCH}-SNAPSHOT
- - run:
- name: "What branch am I on now?"
- command: echo $CIRCLE_BRANCH
-#no need to push this image yet
+ tag: latest
- docker/push:
image: cedardevs/onestop-python-client
- tag: ${CIRCLE_BRANCH}-SNAPSHOT
+ tag: latest
- slack/status:
fail_only: false
- # Base test configuration for Go library tests Each distinct version should
- # inherit this base, and override (at least) the container image used.
- python-client-test: &python-client-test
+ onestop-python-client-test: &python-client-test
executor: python/default
steps: &steps
- checkout
@@ -107,10 +131,16 @@ jobs:
app-dir: ./onestop-python-client
pkg-manager: pip
- run:
- name: "Run util tests"
+ name: "Run unit tests"
command: >
cd onestop-python-client/;
- python -m unittest tests/util/*.py
+ python -m unittest discover -s test/unit
+# This is commented out only because the OneStop we have running on cedardevs doesn't have its registry exposed. You can only reach it via sshing to another machine.
+# - run:
+# name: "Run integration tests"
+# command: >
+# cd onestop-python-client/;
+# python -m unittest discover -s test/integration
orbs:
slack: circleci/slack@3.4.2
@@ -121,9 +151,15 @@ version: 2.1
workflows:
main:
jobs:
-# - "latest"
# - cli-test
# - cli-build
-# - client-build
- - python-client-test
-
+ - onestop-python-client-test
+ - onestop-python-client-build:
+ requires:
+ - onestop-python-client-test
+ - onestop-sme-build:
+ requires:
+ - onestop-python-client-build
+ - onestop-s3-handler-build:
+ requires:
+ - onestop-python-client-build
diff --git a/Dockerfile b/Dockerfile
index e5ec186..a906511 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,10 +1,15 @@
FROM python:3.8
+
COPY ./onestop-python-client /onestop-python-client
COPY ./scripts /scripts
+
RUN apt-get update
RUN pip install --upgrade pip
-RUN pip install ./onestop-python-client
RUN pip install -r ./onestop-python-client/requirements.txt
+# Needed for scripts - do here since directory out of scope when in scripts/* dockerfiles.
+# Unsure if possible this isn't latest build, like doing pip install before this is built.
+RUN pip install ./onestop-python-client
+
#Base image stays up for dev access
CMD tail -f /dev/null
diff --git a/docs/README.md b/docs/README.md
index a8f99a1..088a4ec 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -17,13 +17,42 @@
## Table of contents
+* [onestop-python-client](#onestop-python-client)
+* [Python Scripts](#python-scripts)
+* [Helm](#helm)
+* [CLI](#cli)
+* [Build Pipeline and Test Execution](build-pipeline)
-- [Project Overview](#project-overview)
-- [Navigating The Documentation](#navigating-the-documentation)
- - [By User Type](#by-user-type)
- - [By Project Component](#by-project-component)
-- [External Documentation](#docs/cli/developer/quickstart.md)
+This OneStop-clients project is a collection of clients to aid in communicating with OneStop and directly with the cloud.
-## Project Overview
-OneStop-clients is an open-sourced commandline interface and subject matter consumer clients ...
+## [onestop-python-client](onestop-python-client)
+The onestop-python-client is a tool for subject matter experts (SME) to publish and consume metadata to and from OneStop as well as directly to the cloud.
+This would enable someone to feed data into OneStop, have OneStop digest it, and then read it out via a python script.
+[onestop-python-client](onestop-python-client) - More details.
+
+Additional information:
+* [onestop-test-data repository readme](https://github.com/cedardevs/onestop-test-data/blob/master/README.md) - loading test data into OneStop.
+* [OneStop documentation](https://cedardevs.github.io/onestop/) - OneStop documentation.
+
+## [Python Scripts](scripts)
+There are some sample python scripts that use the onestop-python-client in the scripts directory.
+
+[python scripts](scripts) - More details.
+
+## [Helm](helm)
+There is a helm directory full of helm charts to create different kubernetes containers which each contain from this repository the onestop-python-client code and the scripts directory.
+ They have python installed so that a SME user could execute scripts from within.
+
+[Helm](helm) - More details.
+
+## [CLI](cli)
+The CLI is an open-sourced commandline interface for OneStop's search API.
+
+* [Developer Quickstart](cli/developer-quickstart)
+
+* [Public User Quickstart](cli/public-user-quickstart)
+
+* [SCDR Files](cli/scdr-files)
+
+[CLI](cli) - More details.
\ No newline at end of file
diff --git a/docs/build-pipeline.md b/docs/build-pipeline.md
new file mode 100644
index 0000000..9f22549
--- /dev/null
+++ b/docs/build-pipeline.md
@@ -0,0 +1,42 @@
+
+
+
+# Build Pipeline and Test Execution
+
+## Table of Contents
+* [CircleCI](#circleci)
+* [Building Manually](#building-manually)
+* [Test Execution](#test-execution)
+
+## CircleCI
+Currently, this project uses CircleCI to build the multiple images needed. If you example the circleci configuration file you will see what tests it executes and images it builds with what tags.
+
+## Building Manually
+* If you change the onestop-python-client code then run this, from the project root:
+
+```
+docker build . -t cedardevs/onestop-python-client:latest
+```
+
+* If you modify just the scripts then run this (only need to do the one relevant for your script), from the project root:
+
+```
+docker build ./scripts/sqs-to-registry -t cedardevs/onestop-s3-handler:latest
+```
+
+```
+docker build ./scripts/sme/ -t cedardevs/onestop-sme:latest
+```
+
+## Test Execution
+To execute the onestop-python-client tests via python's unittest execute this from the onestop-python-client directory:
+
+```
+python3 -m unittest discover
+```
+
+If you wish to run a specific test file, here's an example:
+
+```
+python -m unittest test/unit/util/test_S3MessageAdapter.py
+```
\ No newline at end of file
diff --git a/docs/cli/README.md b/docs/cli/README.md
new file mode 100644
index 0000000..7b4521b
--- /dev/null
+++ b/docs/cli/README.md
@@ -0,0 +1,7 @@
+
+
+
+## Table of contents
+* [Public User Quickstart](public-user-quickstart)
+* [SCDR Files](scdr-files)
+* [Developer](developer-quickstart)
\ No newline at end of file
diff --git a/docs/cli/developer/quickstart.md b/docs/cli/developer-quickstart.md
similarity index 95%
rename from docs/cli/developer/quickstart.md
rename to docs/cli/developer-quickstart.md
index 7fde3b9..e2d70dc 100644
--- a/docs/cli/developer/quickstart.md
+++ b/docs/cli/developer-quickstart.md
@@ -1,4 +1,9 @@
-# OneStop CLI tool for developers
+
+
+# OneStop CLI tool for Developers
+
+## Table of Contents
+
The `onestop-cli` tool provides a convenient command line interface for the OneStop search API. This tool is partly generated from the OpenAPI spec in the search module. We have added custom middleware for convenient syntax for frequently used filters and queries.
diff --git a/docs/cli/public-user/quickstart.md b/docs/cli/public-user-quickstart.md
similarity index 78%
rename from docs/cli/public-user/quickstart.md
rename to docs/cli/public-user-quickstart.md
index ceae673..5a93c30 100644
--- a/docs/cli/public-user/quickstart.md
+++ b/docs/cli/public-user-quickstart.md
@@ -1,13 +1,35 @@
+
+
# OneStop CLI tool
+## Table of Contents
+* [OneStop Info](#onestop-info)
+* [Installation](#installation)
+ * [Requirements](#requirements)
+ * [Install and run using a docker container (golang not required)](#install-and-run-using-a-docker-container-golang-not-required)
+ * [Download and use as Go package](#download-and-use-as-go-package)
+* [Configuration](#configuration)
+* [Usage](#usage)
+ * [Verbose](#verbose)
+ * [Run against a test or local API](#run-against-a-test-or-local-api)
+ * [Get](#get)
+ * [Search](#search)
+ * [by identifier](#by-identifier)
+ * [by parent identifier](#by-parent-identifier)
+ * [by date](#by-date)
+ * [by geometry](#by-geometry)
+ * [combinations](#combinations)
+
The `onestop-cli` tool provides a convenient command line interface for the OneStop search API.
+There are three primary methods to search the data contained within onestop via the command line interface (CLI) tool.
+If you're new to the OneStop ecosystem, you should interact with one or both of the running instances of the UI to get some familiarity with basic search features. Then if you choose to be more of a power user, we encourage you to explore the ins and outs of our API and CLI.
-Read the [OneStop OpenAPI spec 2.0.0](https://app.swaggerhub.com/apis/cedardevs/one-stop_search_api/2.0.0).
-Or check the [OneStop OpenAPI spec 2.4.0](https://app.swaggerhub.com/apis/cedarbot/OneStop/2.4.0).
+## OneStop Info
+* [OneStop OpenAPI spec 2.0.0](https://app.swaggerhub.com/apis/cedardevs/one-stop_search_api/2.0.0)
+* [OneStop OpenAPI spec 2.4.0](https://app.swaggerhub.com/apis/cedarbot/OneStop/2.4.0)
## Installation
-
-### Requirements -
+### Requirements
Either golang, or docker. Direct downloads of binaries will be available in the future.
@@ -157,6 +179,3 @@ Longhand query, including the `--verbose` flag to provide more logging:
For complex query and filter structure, refer to the [short hand documentation](https://github.com/danielgtaylor/openapi-cli-generator/tree/master/shorthand).
Note: As it is now, you cannot combine the flags with json shorthand. e.g. This will not work - `onestop searchcollection --area="POLYGON(( 22.686768 34.051522, 30.606537 34.051522, 30.606537 41.280903, 22.686768 41.280903, 22.686768 34.051522 ))" --query="satellite" filters[]{ type:datetime, after:2017-01-01T00:00:00Z, before:2017-02-01T00:00:00Z} `
-
-
-
diff --git a/docs/cli/public-user/scdr-files.md b/docs/cli/scdr-files.md
similarity index 98%
rename from docs/cli/public-user/scdr-files.md
rename to docs/cli/scdr-files.md
index b54ca8d..a01e1de 100644
--- a/docs/cli/public-user/scdr-files.md
+++ b/docs/cli/scdr-files.md
@@ -4,8 +4,6 @@
## scdr-files configurations
-Config locations -
-
Users can supply a configuration to map scdr-file type short names to OneStop IDs. See default config in [default config](cli/scdr-files-config.yaml). File can be yaml or json, but must be named "scdr-files-config" and placed in one of the following locations- project directory, current working directory, /etc/scdr-files/, or $HOME/.scdr-files.
diff --git a/docs/helm.md b/docs/helm.md
new file mode 100644
index 0000000..328cc8e
--- /dev/null
+++ b/docs/helm.md
@@ -0,0 +1,43 @@
+
+
+
+# Helm
+
+## Table of Contents
+* [Intro](#intro)
+* [Helm Configuration](#helm-configuration)
+* [Create and Start the Script Container](#create-and-start-the-script-container)
+
+## Intro
+This project has a helm directory which is set up to pull a onestop-python-client image (specified in the image section in `helm//values.yml`) and create a kubernetes container with that image inside. The container should be able to communicate to the configured OneStop stack (specified in the conf section in `helm//values.yml`).
+ It also copies the onestop-python-client and scripts directories into the container.
+
+## Helm Configuration
+The helm charts are setup to create a configuration file from the template at `helm//values.yml` and copy it to `/etc/config/config.yml` within the container. You don't have to use this file but most likely one will be necessary in a location where the scripts can access it.
+
+Please see the [onestop-python-client configuration](onestop-python-client#configuration) section for configuration information.
+
+Please see the [scripts](scripts) documentation for information on how to pass in a configuration file via CLI and execute the scripts.
+
+## Create and Start the Script Container
+The helm install command, done from the root of this repository, will use the charts in the helm directory to create the specified container.
+
+In this example we will create the `sme` using the helm charts and configuration information in this repo from `helm/onestop-sqs-consumer`
+1. cd to the root of this project
+1. `helm uninstall sme`
+1. `helm install sme helm/onestop-sqs-consumer`
+
+To check the container status execute `kubectl get pods` and look for the pod with the expected name, as defined by the `name` field in the `helm//Chart.yaml`:
+
+```
+(base) ~/repo/onestop-clients 07:00 PM$ kubectl get pods
+NAME READY STATUS RESTARTS AGE
+sme-onestop-sqs-consumer-5c678675f7-q2s7h 0/1 Pending 0 26s
+```
+If it isn't in a 'Running' state within about 10 seconds then something is probably wrong. If it hasn't crashed yet (indicated by a STATUS of CrashBackLoop) then one possibility is a connection timeout trying to connect to a resource.
+
+Once the container is running you can exec into the container (much like "sshing") via this command, use the NAME from the `kubectl get pods` command:
+
+```
+kubectl exec --stdin --tty sme-onestop-sqs-consumer-5c678675f7-q2s7h -- /bin/bash
+```
diff --git a/docs/images/cedar_devs_logo.png b/docs/images/cedar_devs_logo.png
new file mode 100644
index 0000000..cd4b7fc
Binary files /dev/null and b/docs/images/cedar_devs_logo.png differ
diff --git a/docs/onestop-python-client.md b/docs/onestop-python-client.md
new file mode 100644
index 0000000..e447bc1
--- /dev/null
+++ b/docs/onestop-python-client.md
@@ -0,0 +1,80 @@
+
+
+
+# OneStop Clients
+
+## Table of Contents
+* [Prerequisites](#prerequisites)
+* [Credentials](#credentials)
+* [Configuration](#configuration)
+* [Usage](#usage)
+* [How to manually publish a new version of this client](#how-to-manually-publish-a-new-version-of-this-client)
+
+This python package provides an API to connect to OneStop's event stream (aka Inventory Manager). There are several utility modules in the onestop-python-client for posting to Registry or using kafka publishing/consuming to OneStop. There are also some cloud specific utility classes.
+
+## Prerequisites
+If you need to bring up the OneStop stack, see the [OneStop quickstart documentation](https://github.com/cedardevs/onestop/blob/master/docs/developer/quickstart.md#quick-start-kubernetes--helm--skaffold)
+
+## Credentials
+Copy the `onestop-python-client/config/credentials-template.yml` to a file and fill out the information you will need. If you are using a helm container then copy it to that container.
+
+## Configuration
+Here are some configuration values and what they represent. You don't need everything, it depends on what onestop-python-client classes you are using.
+If you are using the helm generated configuration file then look in the [helm configuration section](helm#helm-configuration) for what file to modify.
+
+* _metadata_type - should be granule or collection, depending on what you are sending/receiving.
+* schema_registry, registry_base_url, and onestop_base_url - set to what you are communicating with, especially if not on cedar-devs talking to its OneStop.
+* AWS section - there's several config values for AWS you probably need to change, many are set to testing values.
+* Kafka section - There is a whole Kafka section that if you are using kafka you might need to adjust this. This isn't perhaps the most preferred way to submit to OneStop. [OneStop Kafka Topics](https://github.com/cedardevs/onestop/blob/master/kafka-common/src/main/java/org/cedar/onestop/kafka/common/constants/Topics.java) are defined here on how they get named if you do need to listen to a topic. It isn't created until information is published to it (be it via OneStop or these scripts).
+* log_level - If you are troubleshooting or just want to see a more granular log level set this to DEBUG.
+
+## Usage
+Once you have the OneStop stack (or your own kafka broker + schema registry) running, you are ready to install this package and start consuming messages.
+
+The `onestop_client` can be downloaded via pip, like so-
+
+`python3 -m pip install onestop-python-client-cedardevs`
+
+To test the import, try-
+
+```
+$ python3
+>>> import onestop_client
+```
+
+Look here for more information on executing [scripts](scripts).
+
+## How to manually publish a new version of this client
+See the [build pipeline](build-pipeline) for how these images are automatically published.
+
+First you will need to setup your credentials. Create $HOME/.pypirc and update it with the cedardevs username, pw, and token. It will look like the following-
+```
+[pypi]
+ username = __token__
+ password =
+```
+You'll need a couple tools to create the distribution and then publish it. To install these tools, run the following command-
+
+```
+python3 -m pip install --user --upgrade setuptools wheel twine
+```
+Note: make sure the version on the setup file is changed
+
+To build the new distribution-
+```
+python3 setup.py sdist bdist_wheel
+```
+
+That should create/update the dist/ directory.
+
+Now to push that to the PyPi repo-
+
+```
+python3 -m twine upload dist/*
+```
+
+#### Install onestop-python-client-cedardevs package
+
+```
+pip install onestop-python-client-cedardevs
+```
diff --git a/docs/public-user.md b/docs/public-user.md
deleted file mode 100644
index c473edb..0000000
--- a/docs/public-user.md
+++ /dev/null
@@ -1,17 +0,0 @@
-
-
-
-# Public User Navigation Guide
-As a public user of OneStop-cli, there are three primary methods to search the data contained within onestop via the command line interface (CLI) tool. If you're new to the OneStop ecosystem, you should interact with one or both of the running instances of the UI to get some familiarity with basic search features. Then if you choose to be more of a power user, we encourage you to explore the ins and outs of our API and and CLI.
-
-Take a look at our navigational suggestions below, or simply click the `Next` link at the bottom of the page to start at the top and work your way down.
-
-## Table of Contents
-* Command Line Interface
- - [Developer Quick Start](cli/developer/quickstart.md)
- - public user
- - [Quick Start](cli/public-user/quickstart.md)
- - [scdr-files](cli/public-user/scdr-files.md)
-
-
-
\ No newline at end of file
diff --git a/docs/scripts.md b/docs/scripts.md
new file mode 100644
index 0000000..43db8ca
--- /dev/null
+++ b/docs/scripts.md
@@ -0,0 +1,66 @@
+
+
+
+# Python Scripts for onestop-python-client
+
+## Table of Contents
+* [Usage](#usage)
+* [Setup](#setup)
+ * [Helm](#helm)
+ * [Manually Setup Python Environment](#manually-setup-python-environment)
+* [Load Data into OneStop](#load-data-into-onestop)
+ * [onestop-test-data repository](#onestop-test-data-repositoryhttpsgithubcomcedardevsonestop-test-data)
+ * [osim-deployment repository](#osim-deployment-repositoryhttpsgithubcomcedardevsosim-deployment)
+* [OneStop Quickstart](https://cedardevs.github.io/onestop/developer/quickstart)
+
+## Usage
+Depending on what the script's imports are you may have to install some dependencies via `pip install ...`.
+Once ready to execute a script go to the root directory of this project. An example command might be:
+
+`python scripts/sme/sme.py -cred cred.yml`
+
+NOTE:
+ * For some scripts you need a credentials file manually and specify the relative location on the command-line via `-cred`
+ * The default configuration is set to the location helm will create it, `/etc/config/config.yml`. If you need to specify a different one use the `-conf` command line argument. [Configuration](helm) information is spelled out for helm, since some values you may have to modify if using helm.
+
+## Setup
+To use the onestop-python-client there are two options:
+* Use our [Helm](helm) charts (Preferred and easiest way)
+* Or manually set up your python environment
+
+### Helm
+It is recommended to use our helm charts to create the script container. Go [here](helm) for more information.
+
+### Manually Setup Python Environment
+* Install conda (miniconda works).
+* Restart terminal or source files to recognize conda commands.
+* Create a new conda environment and activate it (not convinced you need this)
+ * `conda create -n onestop-clients python=3`
+ * `conda activate onestop-clients`
+ * `pip install setuptools`
+
+* Install any libraries needed by your script
+ * Ex: `pip install PyYaml`
+
+* Install onestop-python-client:
+ 1. `pip uninstall onestop-python-client-cedardevs`
+ 1. [Build the onestop-python-client](build-pipeline) if you have modified the code, otherwise it will access the image on github.
+ 1. `pip install ./onestop-python-client`
+
+ To test the import, try this. It shouldn't give an error:
+
+ ```
+ $ python3
+ >>> import onestop_client
+ ```
+
+## Load Data into OneStop
+There are several repositories to aid in loading data into a OneStop. Please read the appropriate repository's readme for accurate and up to date usage information.
+
+### [onestop-test-data repository](https://github.com/cedardevs/onestop-test-data)
+ `./upload.sh demo http://localhost/onestop/api/registry`
+
+### [osim-deployment repository](https://github.com/cedardevs/osim-deployment)
+ From the osim-deployment repository there is a staging-scripts directory with scripts for loading some data:
+
+ `./copyS3objects.sh -max_files=5 copy-config/archive-testing-demo-csb.sh`
diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml
index 20557a0..f5a24fb 100644
--- a/helm/onestop-sqs-consumer/values.yaml
+++ b/helm/onestop-sqs-consumer/values.yaml
@@ -7,7 +7,7 @@ replicaCount: 1
image:
repository: cedardevs/onestop-sme
tag: latest
- pullPolicy: IfNotPresent
+ pullPolicy: Always
imagePullSecrets: []
nameOverride: ""
@@ -57,35 +57,51 @@ config: |-
log_level: INFO
# AWS config values
- sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs
- sqs_max_polls: 100
+ sqs_name: cloud-archive-client-sqs
s3_region: us-east-2
s3_bucket: archive-testing-demo
+ sqs_max_polls: 100
#AWS config values for 2nd vault in different region
vault_name: archive-vault-new
s3_region2: us-east-2
- s3_bucket2: noaa-nccf-dev-archive
+ s3_bucket2: archive-testing-testing-test
#CSB stream config
format: csv
headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER
- type: COLLECTION
collection_id: fdb56230-87f4-49f2-ab83-104cfd073177
access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com
#access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com
- file_identifier_prefix: "gov.noaa.ncei.csb:"
+ file_id_prefix: "gov.noaa.ncei.csb:"
# COLLECTION or GRANULE
- metadata_type: granule
- registry_base_url: http://onestop-registry:80
- onestop_base_url: http://onestop-search:8080
+ kafka_consumer_metadata_type: GRANULE
+ kafka_publisher_metadata_type: GRANULE
+ s3_message_adapter_metadata_type: COLLECTION
- security:
- enabled: True
+ registry_base_url: http://os-registry:80
+ onestop_base_url: http://os-search:8080
prefixMap:
NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177'
NESDIS/H8: '0fad03df-0805-434a-86a6-7dc42d68480e'
NESDIS/GOES: '11111111-1111-1111-1111-111111111111'
- NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6'
\ No newline at end of file
+ NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6'
+
+ # Kafka config values
+ brokers: cp-cp-kafka:9092
+ schema_registry: http://cp-cp-schema-registry:8081
+ collection_topic_publish: psi-granules-by-collection
+ granule_topic_publish: psi-granule-parsed
+ collection_topic_consume: psi-collection-input-unknown
+ granule_topic_consume: psi-granule-input-unknown
+ group_id: sme-test
+ auto_offset_reset: earliest
+ security:
+ # True/False
+ enabled: False
+ # If security is enabled then need these:
+ caLoc: /etc/pki/tls/cert.pem
+ keyLoc: /etc/pki/tls/private/kafka-user.key
+ certLoc: /etc/pki/tls/certs/kafka-user.crt
\ No newline at end of file
diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml
index 924f62f..6016adc 100644
--- a/helm/sme-chart/values.yaml
+++ b/helm/sme-chart/values.yaml
@@ -1,7 +1,7 @@
image:
repository: cedardevs/onestop-e2e-demo
tag: latest
- pullPolicy: IfNotPresent
+ pullPolicy: Always
secret:
registry_username:
@@ -14,29 +14,52 @@ config: |-
log_level: INFO
# AWS config values
- sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs
- sqs_max_polls: 100
+ sqs_name: cloud-archive-client-sqs
s3_region: us-east-2
s3_bucket: archive-testing-demo
+ sqs_max_polls: 100
#AWS config values for 2nd vault in different region
vault_name: archive-vault-new
s3_region2: us-east-2
- s3_bucket2: noaa-nccf-dev-archive
+ s3_bucket2: archive-testing-testing-test
#CSB stream config
format: csv
headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER
- type: COLLECTION
collection_id: fdb56230-87f4-49f2-ab83-104cfd073177
access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com
#access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com
- file_identifier_prefix: "gov.noaa.ncei.csb:"
+ file_id_prefix: "gov.noaa.ncei.csb:"
# COLLECTION or GRANULE
- metadata_type: granule
- registry_base_url: http://onestop-registry:80
- onestop_base_url: http://onestop-search:8080
+ kafka_consumer_metadata_type: GRANULE
+ kafka_producer_metadata_type: GRANULE
+ web_publisher_metadata_type: GRANULE
+ s3_message_adapter_metadata_type: COLLECTION
+
+ registry_base_url: http://os-registry:80
+ onestop_base_url: http://os-search:8080
+
+ prefixMap:
+ NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177'
+ NESDIS/H8: '0fad03df-0805-434a-86a6-7dc42d68480e'
+ NESDIS/GOES: '11111111-1111-1111-1111-111111111111'
+ NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6'
+ # Kafka config values
+ brokers: cp-cp-kafka:9092
+ schema_registry: http://cp-cp-schema-registry:8081
+ collection_topic_publish: psi-granules-by-collection
+ granule_topic_publish: psi-granule-parsed
+ collection_topic_consume: psi-collection-input-unknown
+ granule_topic_consume: psi-granule-input-unknown
+ group_id: sme-test
+ auto_offset_reset: earliest
security:
- enabled: True
\ No newline at end of file
+ # True/False
+ enabled: False
+ # If security is enabled then need these:
+ caLoc: /etc/pki/tls/cert.pem
+ keyLoc: /etc/pki/tls/private/kafka-user.key
+ certLoc: /etc/pki/tls/certs/kafka-user.crt
\ No newline at end of file
diff --git a/kubernetes/pyconsumer-pod.yaml b/kubernetes/pyconsumer-pod.yaml
index fed2258..e6ac5c5 100644
--- a/kubernetes/pyconsumer-pod.yaml
+++ b/kubernetes/pyconsumer-pod.yaml
@@ -70,19 +70,21 @@ data:
csb:
format: csv
headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER
- type: COLLECTION
collection_id: fdb56230-87f4-49f2-ab83-104cfd073177
- psi_registry_url: https://cedardevs.org/
+ registry_base_url: https://cedardevs.org/
access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com
#access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com
file_identifier_prefix: "gov.noaa.ncei.csb:"
# Web Publisher
web:
- # COLLECTION or GRANULE
- metadata_type: granule
registry_base_url: https://cedardevs.org/onestop/registry-api
onestop_base_url: https://cedardevs.org/onestop/search-api
security:
- enabled: True
\ No newline at end of file
+ enabled: True
+
+ # COLLECTION or GRANULE
+ kafka_consumer_metadata_type: GRANULE
+ kafka_publisher_metadata_type: GRANULE
+ s3_message_adapter_metadata_type: COLLECTION
\ No newline at end of file
diff --git a/onestop-python-client/README.md b/onestop-python-client/README.md
deleted file mode 100644
index 77986a6..0000000
--- a/onestop-python-client/README.md
+++ /dev/null
@@ -1,180 +0,0 @@
-# OneStop Clients
-
-This python package provides an API to connect to OneStop's event stream (aka Inventory Manager). At this early stage there is only a single module for consuming messages from the kafka brokers that back OneStop.
-## AWS Credentials
-Copy credentials-template.yml to credentials.yml and insert your ACCESS_KEY and SECRET_KEY
-
-## KafkaPublisher
-Relies on fastavro <1.0 and confluent-kafka <1.5
-
-## prerequisites
-You will need a kafka broker and a schema-registry running to test this package. To bring up the OneStop stack, see the [OneStop quickstart documentation](https://github.com/cedardevs/onestop/blob/master/docs/developer/quickstart.md#quick-start-kubernetes--helm--skaffold)
-
-## usage
-Once you have the OneStop stack (or your own kafka broker + schema registry) running, you are ready to install the package and start consuming messages.
-
-The `onestop_client` can be downloaded via pip, like so-
-
-`python3 -m pip install onestop-python-client-cedardevs`
-
-To test the import, try-
-
-```
-$ python3
->>> import onestop_client
-```
-
-Now we are ready to try a script. Our first example, [smeFunc.py](#examples/smeFunc.py), imports our onestop_client package, and passes to it the id, topic, and message handler function. Our library then handles the work to connect to kafka and deserialize the message.
-
-Here is how to run it in k8s so that it can connect to the kafka broker and schema registry-
-```
-kubectl apply -f examples/pyconsumer-pod.yml
-```
-
-At the moment, that pod will tail -f /dev/null to stay open so you can exec into the container with -
-`
-kubectl exec -it pod/pyconsumer -- bash
-`
-# In the container
-Manually add smeFunc.py
-Install requests library
->pip install requests
-
-# In the cluster load some test data into the cluster
-./upload.sh IM /Users/dneufeld/repos/onestop-test-data/DEM http://localhost/registry
-
-#Test it out using cli args
-python smeFunc.py -cmd consume -b onestop-dev-cp-kafka:9092 -s http://onestop-dev-cp-schema-registry:8081 -t psi-registry-collection-parsed-changelog -g sme-test -o earliest
-
-
-python smeFunc.py -cmd produce -b onestop-dev-cp-kafka:9092 -s http://onestop-dev-cp-schema-registry:8081 -t psi-collection-input-unknown
-
-Or you can use env vars available so you can run this -
-```
-python ./smeFunc.py -b $KAFKA_BROKERS -s $SCHEMA_REGISTRY -t $TOPIC -g $GROUP_ID -o $OFFSET
-```
-
-# packaing and publishing new version
-=======
-The general purpose of this python package is to provide an API to connect to OneStop's event stream (aka Inventory Manager).
-This would enable someone to feed data into OneStop, have OneStop digest it, and then read it out via a python script, such as the example [smeFunc.py](#examples/smeFunc.py).
-See the OneStop readme for an example of loading test data into OneStop.
-At this early stage there is only a single module for consuming messages from the kafka brokers that back OneStop.
-
-## Prerequisites
-1. Since you will need a kafka broker and a schema-registry running you will need OneStop and start it up
- [OneStop quickstart documentation](https://github.com/cedardevs/onestop/blob/master/docs/developer/quickstart.md#quick-start-kubernetes--helm--skaffold)
- Setup and start up the OneStop stack
-
-2. Install this python-client and other dependencies via pip
- `pip install -r requirements.txt`
-
- To test the import, try this and it shouldn't give an error:
-
- ```
- $ python3
- >>> import onestop_client
- ```
-
-Now you are ready to start consuming messages.
-
-## Load Test Data
-If you need to load test data then look in the OneStop repo's [OneStop quickstart documentation](https://github.com/cedardevs/onestop/blob/master/docs/developer/quickstart.md#quick-start-kubernetes--helm--skaffold)
-for information on loading test data.
-
-## Example
-
-Our first example, [smeFunc.py](#examples/smeFunc.py), imports our onestop_client package, and passes to it the id, topic, and message handler function.
-Our library then handles the work to connect to kafka and deserialize the message.
-
-1. Here is how to run it in k8s so that the python script can connect to the kafka broker and schema registry:
- ```
- kubectl apply -f examples/pyconsumer-pod.yml
- ```
-
-1. Run this so you can exec the python script within the container:
-
- ```
- kubectl exec -it pyconsumer bash
- ```
-
-1. Then there should be environment variables (you can verify via `echo $OFFSET`) available so you can run this:
-
- ```
- python ./smeFunc.py -b $KAFKA_BROKERS -s $SCHEMA_REGISTRY -t $TOPIC -g $GROUP_ID -o $OFFSET
- ```
-
- If not some sensible defaults are in pyconsumer-pod.yml:
-
- ```
- python ./smeFunc.py -b onestop-dev-cp-kafka:9092 -s http://onestop-dev-cp-schema-registry:8081 -t psi-registry-granule-parsed-changelo21` -g sme-test -o earliest
- ```
-
- NOTE:
- If an error prints out of `ERROR Message handler failed: 'NoneType' object is not subscriptable` that implies the data it was traversing does not have one of the requested values.
-
- Example: If this was in the python script you ran `print(value['fileInformation']['name'])` but the data does not have a value of `fileInformation` it will throw that error.
-
- To fix this you can simply remove ['fileInformation']
-
-## How to publish a new version of this client
->>>>>>> master:python-client/README.md
-First you will need to setup your credentials. Create $HOME/.pypirc and update it with the cedardevs username, pw, and token. It will look like the following-
-```
-[pypi]
- username = __token__
- password =
-```
-You'll need a couple tools to create the distribution and then publish it. To install these tools, run the following command-
-
-```
-python3 -m pip install --user --upgrade setuptools wheel twine
-```
-Note: make sure the version on the setup file is changed
-
-To build the new distribution-
-```
-python3 setup.py sdist bdist_wheel
-```
-
-That should create/update the dist/ directory.
-
-Now to push that to the PyPi repo-
-
-```
-python3 -m twine upload dist/*
-```
-
-#### Install onestop-python-client-cedardevs package
-
-```
-pip install onestop-python-client-cedardevs
-```
-
-importing onestop-python-client-cedardevs package
-
-producer module have the following functions to import
- produce: initiate sending a message to Kafka
- list_topics: Request list of topics from cluster
- produce_raw_message: Uses user's inputs to construct a structured input value
- produce_and_publish_raw_collection: raw collection input value and key to initiate sending message to Kafka
- produce_and_publish_raw_granule: raw granule input value and key to initiate sending message to Kafka
-
- ```
-
-from onestop.producer import ...
-
-```
-
-consumer module have the following functions to import:
- consume: consume messages from a given topic
-
-```
-
-from onestop.consumer import ...
-
-```
-
-##Docker
-docker build --tag cedardevs/onestop-pyconsumer:latest
-docker push cedardevs/onestop-pyconsumer:latest
\ No newline at end of file
diff --git a/onestop-python-client/config/aws-util-config-dev.yml b/onestop-python-client/config/aws-util-config-dev.yml
index ee1ad95..2fdb5c1 100644
--- a/onestop-python-client/config/aws-util-config-dev.yml
+++ b/onestop-python-client/config/aws-util-config-dev.yml
@@ -1,11 +1,12 @@
# Example config values for osim client
-log_level: INFO
# AWS config values
sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs
+sqs_name: 'foobar'
sqs_max_polls: 2
s3_region: "us-east-2"
s3_bucket: archive-testing-demo
+s3_key: 'ABI-L1b-RadF/2019/298/15/OR_ABI-L1b-RadF-M6C15_G16_s20192981500369_e20192981510082_c20192981510166.nc'
#AWS config values for 2nd vault in different region
vault_name: archive-vault-new
diff --git a/onestop-python-client/config/credentials-template.yml b/onestop-python-client/config/credentials-template.yml
index 006e175..0fe300c 100644
--- a/onestop-python-client/config/credentials-template.yml
+++ b/onestop-python-client/config/credentials-template.yml
@@ -1,4 +1,4 @@
-#Copy me as credentials.yml and update with values and exclude the file from git
+# This is a template to use for confidential information. Do not edit this file, but copy it to a different location.
#NESDIS-SANDBOX
sandbox:
access_key: access_key_value_here
@@ -8,6 +8,3 @@ sandbox:
registry:
username: rw_user
password: rw_user_pwd
-
-
-
diff --git a/onestop-python-client/config/csb-data-stream-config-template.yml b/onestop-python-client/config/csb-data-stream-config-template.yml
index 887c9be..07ab823 100644
--- a/onestop-python-client/config/csb-data-stream-config-template.yml
+++ b/onestop-python-client/config/csb-data-stream-config-template.yml
@@ -1,7 +1,10 @@
-log_level: INFO
+# COLLECTION or GRANULE
+kafka_consumer_metadata_type: COLLECTION
+kafka_publisher_metadata_type: COLLECTION
+s3_message_adapter_metadata_type: COLLECTION
+
format: csv
headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER
-type: COLLECTION
collection_id: fdb56230-87f4-49f2-ab83-104cfd073177
#registry_base_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com
registry_base_url: http://localhost/onestop/api/registry
@@ -9,7 +12,7 @@ registry_base_url: http://localhost/onestop/api/registry
onestop_base_url: http://localhost/onestop/api/search/search
access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com
#access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com
-file_identifier_prefix: "gov.noaa.ncei.csb:"
+file_id_prefix: "gov.noaa.ncei.csb:"
prefixMap:
NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177'
diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py
index e45d6cc..c064dd9 100644
--- a/onestop-python-client/onestop/KafkaConsumer.py
+++ b/onestop-python-client/onestop/KafkaConsumer.py
@@ -1,11 +1,9 @@
-import logging
-import yaml
-
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.error import KafkaError
from confluent_kafka import DeserializingConsumer
from confluent_kafka.schema_registry.avro import AvroDeserializer
from confluent_kafka.serialization import StringDeserializer
+from onestop.util.ClientLogger import ClientLogger
class KafkaConsumer:
"""
@@ -13,124 +11,114 @@ class KafkaConsumer:
Attributes
----------
- conf: yaml file
- kafka-publisher-config-dev.yml
- logger: Logger object
- utilizes python logger library and creates logging for our specific needs
- logger.info: Logger object
- logging statement that occurs when the class is instantiated
- metadata_type: str
- type of metadata (COLLECTION or GRANULE)
- brokers: str
- brokers (kubernetes service)
- group_id: str
- Client group id string. All clients sharing the same group.id belong to the same group
- auto_offset_reset: str
- Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error)
- schema_registry: str
- schema registry (kubernetes service)
- security: boolean
- defines if security is in place
- collection_topic: str
- collection topic you want to consume
- granule_topic: str
- granule topic you want to consume
+ metadata_type: str
+ type of metadata (COLLECTION or GRANULE)
+ brokers: str
+ brokers (kubernetes service)
+ group_id: str
+ Client group id string. All clients sharing the same group.id belong to the same group
+ auto_offset_reset: str
+ Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error)
+ schema_registry: str
+ schema registry (kubernetes service)
+ security_enabled: boolean
+ Whether to use security for the kafka schema registry client.
+ security_caLoc: str
+ Kafka schema registry certification authority (CA) file location.
+ security_keyLoc: str
+ Kafka schema registry client's private key file location.
+ security_certLoc: str
+ Kafka schema registry client's public key file location.
+ collection_topic_consume: str
+ collection topic you want to consume
+ granule_topic_consume: str
+ granule topic you want to consume
+ logger: Logger object
+ utilizes python logger library and creates logging for our specific needs
Methods
-------
- get_logger(log_name, create_file)
- creates logger file
-
- register_client()
- registers to schema registry client based on configs
+ register_client()
+ registers to schema registry client based on configs
- create_consumer(registry_client)
- subscribes to topic defined in configs and creates a consumer to deserialize messages from topic
+ connect()
+ utilizes register_client() and create_consumer(registry_client) to connect to schema registry and allow for consumption of topics
- connect()
- utilizes register_client() and create_consumer(registry_client) to connect to schema registry and allow for consumption of topics
+ create_consumer(registry_client)
+ subscribes to topic defined in configs and creates a consumer to deserialize messages from topic
- consume(metadata_consumer, handler)
- asynchronously polls for messages in the connected topic, results vary depending on the handler function that is passed into it
+ consume(metadata_consumer, handler)
+ asynchronously polls for messages in the connected topic, results vary depending on the handler function that is passed into it
"""
- conf = None
-
- def __init__(self, conf_loc):
- with open(conf_loc) as f:
- self.conf = yaml.load(f, Loader=yaml.FullLoader)
-
- self.logger = self.get_logger(self.__class__.__name__, False)
- self.logger.info("Initializing " + self.__class__.__name__)
- self.metadata_type = self.conf['metadata_type']
- self.brokers = self.conf['brokers']
- self.group_id = self.conf['group_id']
- self.auto_offset_reset = self.conf['auto_offset_reset']
- self.schema_registry = self.conf['schema_registry']
- self.security = self.conf['security']['enabled']
-
- self.collection_topic = self.conf['collection_topic_consume']
- self.granule_topic = self.conf['granule_topic_consume']
-
- if self.metadata_type not in ['COLLECTION', 'GRANULE']:
- raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'")
- def get_logger(self, log_name, create_file):
+ def __init__(self, kafka_consumer_metadata_type, brokers, group_id, auto_offset_reset, schema_registry, security, collection_topic_consume, granule_topic_consume, log_level = 'INFO', **wildargs):
"""
- Utilizes python logger library and creates logging
-
- :param log_name: str
- name of log to be created
- :param create_file: boolean
- defines whether of not you want a logger file to be created
-
- :return: Logger object
+ Attributes
+ ----------
+ kafka_consumer_metadata_type: str
+ type of metadata (COLLECTION or GRANULE)
+ brokers: str
+ brokers (kubernetes service)
+ group_id: str
+ Client group id string. All clients sharing the same group.id belong to the same group
+ auto_offset_reset: str
+ Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error)
+ schema_registry: str
+ schema registry (kubernetes service) URL
+ security: dict
+ enabled boolean: Whether to use security for kafka schema registry client.
+ caLoc str: Kafka schema registry certification authority (CA) file location.
+ keyLoc str: Kafka schema registry client's private key file location.
+ certLoc str: Kafka schema registry client's public key file location.
+
+ collection_topic_consume: str
+ collection topic you want to consume
+ granule_topic_consume: str
+ granule topic you want to consume
+ log_level: str
+ What log level to use for this class
"""
- # create logger
- log = logging.getLogger()
+ self.metadata_type = kafka_consumer_metadata_type.upper()
+ self.brokers = brokers
+ self.group_id = group_id
+ self.auto_offset_reset = auto_offset_reset
+ self.schema_registry = schema_registry
+ self.security_enabled = security['enabled']
- # create formatter and add it to the handlers
- formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ if self.security_enabled:
+ self.security_caLoc = security['caLoc']
+ self.security_keyLoc = security['keyLoc']
+ self.security_certLoc = security['certLoc']
- if self.conf['log_level'] == "DEBUG":
- log.setLevel(level=logging.DEBUG)
- else:
- if self.conf['log_level'] == "INFO":
- log.setLevel(level=logging.INFO)
- else:
- log.setLevel(level=logging.ERROR)
+ self.collection_topic = collection_topic_consume
+ self.granule_topic = granule_topic_consume
- fh = None
- if create_file:
- # create file handler for logger.
- fh = logging.FileHandler(log_name)
- fh.setFormatter(formatter)
-
- # create console handler for logger.
- ch = logging.StreamHandler()
- ch.setFormatter(formatter)
+ if self.metadata_type not in ['COLLECTION', 'GRANULE']:
+ raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type))
- # add handlers to logger.
- if create_file:
- log.addHandler(fh)
+ self.log_level = log_level
+ self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False)
+ self.logger.info("Initializing " + self.__class__.__name__)
- log.addHandler(ch)
- return log
+ if wildargs:
+ self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs))
def register_client(self):
"""
Registers to schema registry client based on configs
:return: SchemaRegistryClient (confluent kafka library)
- """
- reg_conf = {'url': self.schema_registry}
+ """
+ conf = {'url': self.schema_registry}
- if self.security:
- reg_conf['ssl.ca.location'] = self.conf['security']['caLoc']
- reg_conf['ssl.key.location'] = self.conf['security']['keyLoc']
- reg_conf['ssl.certificate.location'] = self.conf['security']['certLoc']
+ if self.security_enabled:
+ conf['ssl.ca.location'] = self.security_caLoc
+ conf['ssl.key.location'] = self.security_keyLoc
+ conf['ssl.certificate.location'] = self.security_certLoc
- registry_client = SchemaRegistryClient(reg_conf)
+ self.logger.info("Creating SchemaRegistryClient with configuration:"+str(conf))
+ registry_client = SchemaRegistryClient(conf)
return registry_client
def connect(self):
@@ -152,33 +140,38 @@ def create_consumer(self, registry_client):
:return: DeserializingConsumer object
"""
- metadata_schema = None
topic = None
if self.metadata_type == "COLLECTION":
- metadata_schema = registry_client.get_latest_version(self.collection_topic + '-value').schema.schema_str
topic = self.collection_topic
if self.metadata_type == "GRANULE":
- metadata_schema = registry_client.get_latest_version(self.granule_topic + '-value').schema.schema_str
topic = self.granule_topic
- metadata_deserializer = AvroDeserializer(metadata_schema, registry_client)
+ self.logger.debug("topic: "+str(topic))
+
+ # This topic naming scheme is how OneStop creates the topics.
+ latest_schema = registry_client.get_latest_version(topic + '-value')
- consumer_conf = {'bootstrap.servers': self.brokers}
+ metadata_schema = latest_schema.schema.schema_str
+ self.logger.debug("metadata_schema: "+metadata_schema)
- if self.security:
- consumer_conf['security.protocol'] = 'SSL'
- consumer_conf['ssl.ca.location'] = self.conf['security']['caLoc']
- consumer_conf['ssl.key.location'] = self.conf['security']['keyLoc']
- consumer_conf['ssl.certificate.location'] = self.conf['security']['certLoc']
+ metadata_deserializer = AvroDeserializer(schema_str=metadata_schema, schema_registry_client=registry_client)
+ conf = {
+ 'bootstrap.servers': self.brokers,
+ 'key.deserializer': StringDeserializer('utf-8'),
+ 'value.deserializer': metadata_deserializer,
+ 'group.id': self.group_id,
+ 'auto.offset.reset': self.auto_offset_reset
+ }
- meta_consumer_conf = consumer_conf
- meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8')
- meta_consumer_conf['value.deserializer'] = metadata_deserializer
- meta_consumer_conf['group.id'] = self.group_id
- meta_consumer_conf['auto.offset.reset'] = self.auto_offset_reset
+ if self.security_enabled:
+ conf['security.protocol'] = 'SSL'
+ conf['ssl.ca.location'] = self.security_caLoc
+ conf['ssl.key.location'] = self.security_keyLoc
+ conf['ssl.certificate.location'] = self.security_certLoc
- metadata_consumer = DeserializingConsumer(meta_consumer_conf)
+ self.logger.debug("Deserializing conf: "+str(conf))
+ metadata_consumer = DeserializingConsumer(conf)
metadata_consumer.subscribe([topic])
return metadata_consumer
@@ -195,22 +188,19 @@ def consume(self, metadata_consumer, handler):
"""
self.logger.info('Consuming from topic')
while True:
- try:
- msg = metadata_consumer.poll(10)
+ msg = metadata_consumer.poll(10)
+ self.logger.debug("Message received: "+str(msg))
- if msg is None:
- print('No Messages')
- continue
+ if msg is None:
+ self.logger.info('No Messages')
+ continue
- key = msg.key()
- value = msg.value()
+ key = msg.key()
+ value = msg.value()
+ self.logger.debug('Message key=%s'%key)
+ self.logger.debug('Message value=%s'%value)
+ handler(key, value, self.log_level)
- except KafkaError:
- raise
- try:
- handler(key, value)
- except Exception as e:
- self.logger.error("Message handler failed: {}".format(e))
- break
+ self.logger.debug("Closing metadata_consumer")
metadata_consumer.close()
diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py
index d357de8..9206fe0 100644
--- a/onestop-python-client/onestop/KafkaPublisher.py
+++ b/onestop-python-client/onestop/KafkaPublisher.py
@@ -1,13 +1,11 @@
-import logging
-from uuid import UUID
import json
-import yaml
+from uuid import UUID
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.error import KafkaError
from confluent_kafka import SerializingProducer
from confluent_kafka.schema_registry.avro import AvroSerializer
-
+from onestop.util.ClientLogger import ClientLogger
class KafkaPublisher:
"""
@@ -15,114 +13,98 @@ class KafkaPublisher:
Attributes
----------
- conf: yaml file
- config/kafka-publisher-config-dev.yml
- logger: Logger object
- utilizes python logger library and creates logging for our specific needs
- logger.info: Logger object
- logging statement that occurs when the class is instantiated
- metadata_type: str
- type of metadata (COLLECTION or GRANULE)
- brokers: str
- brokers (kubernetes service)
- schema_registry: str
- schema registry (kubernetes service)
- security: boolean
- defines if security is in place
- collection_topic: str
- collection topic you want to consume
- granule_topic: str
- granule topic you want to consume
+ metadata_type: str
+ type of metadata (COLLECTION or GRANULE)
+ brokers: str
+ brokers (kubernetes service)
+ schema_registry: str
+ schema registry (kubernetes service)
+ security_enabled: boolean
+ defines if security is in place
+ security_caLoc: str
+ Kafka schema registry certification authority (CA) file location.
+ security_keyLoc: str
+ Kafka schema registry client's private key file location.
+ security_certLoc: str
+ Kafka schema registry client's public key file location.
+ collection_topic: str
+ collection topic you want to produce to
+ granule_topic: str
+ granule topic you want to produce to
+ logger: Logger object
+ utilizes python logger library and creates logging for our specific needs
Methods
-------
- get_logger(log_name, create_file)
- creates logger file
-
- register_client()
- registers to schema registry client based on configs
+ register_client()
+ registers to schema registry client based on configs
- create_producer(registry_client)
- creates a SerializingProducer object to produce to kafka topic
+ create_producer(registry_client)
+ creates a SerializingProducer object to produce to kafka topic
- connect()
- utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics
+ connect()
+ utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics
- publish_collection(collection_producer, collection_uuid, content_dict, method)
- Publish collection to collection topic
+ publish_collection(collection_producer, collection_uuid, content_dict, method)
+ Publish collection to collection topic
- publish_granule(granule_producer, record_uuid, collection_uuid, content_dict)
- Publish granule to granule topic
+ publish_granule(granule_producer, collection_uuid, content_dict)
+ Publish granule to granule topic
"""
- conf = None
-
- def __init__(self, conf_loc):
-
- with open(conf_loc) as f:
- self.conf = yaml.load(f, Loader=yaml.FullLoader)
-
- self.logger = self.get_logger(self.__class__.__name__, False)
- self.logger.info("Initializing " + self.__class__.__name__)
- self.metadata_type = self.conf['metadata_type']
- self.brokers = self.conf['brokers']
- self.schema_registry = self.conf['schema_registry']
- self.security = self.conf['security']['enabled']
- self.collection_topic = self.conf['collection_topic_produce']
- self.granule_topic = self.conf['granule_topic_produce']
-
- if self.metadata_type not in ['COLLECTION', 'GRANULE']:
- raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'")
-
- def get_logger(self, log_name, create_file):
+ def __init__(self, kafka_publisher_metadata_type, brokers, schema_registry, security, collection_topic_publish, granule_topic_publish, log_level='INFO', **wildargs):
"""
- Utilizes python logger library and creates logging
-
- :param log_name: str
- name of log to be created
- :param create_file: boolean
- defines whether of not you want a logger file to be created
-
- :return: Logger object
+ Attributes
+ ----------
+ kafka_publisher_metadata_type: str
+ type of metadata (COLLECTION or GRANULE)
+ brokers: str
+ brokers (kubernetes service)
+ group_id: str
+ Client group id string. All clients sharing the same group.id belong to the same group
+ auto_offset_reset: str
+ Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error)
+ schema_registry: str
+ schema registry (kubernetes service) URL
+ security: dict
+ enabled boolean: Whether to use security for kafka schema registry client.
+ caLoc str: Kafka schema registry certification authority (CA) file location.
+ keyLoc str: Kafka schema registry client's private key file location.
+ certLoc str: Kafka schema registry client's public key file location.
+
+ collection_topic: str
+ collection topic you want to produce to
+ granule_topic: str
+ granule topic you want to produce to
"""
+ self.metadata_type = kafka_publisher_metadata_type.upper()
+ self.brokers = brokers
+ self.schema_registry = schema_registry
+ self.security_enabled = security['enabled']
- # create logger
- log = logging.getLogger()
-
- # create formatter and add it to the handlers
- formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-
- if self.conf['log_level'] == "DEBUG":
- log.setLevel(level=logging.DEBUG)
- else:
- if self.conf['log_level'] == "INFO":
- log.setLevel(level=logging.INFO)
- else:
- log.setLevel(level=logging.ERROR)
+ if self.security_enabled:
+ self.security_caLoc = security['caLoc']
+ self.security_keyLoc = security['keyLoc']
+ self.security_certLoc = security['certLoc']
- fh = None
- if create_file:
- # create file handler for logger.
- fh = logging.FileHandler(log_name)
- fh.setFormatter(formatter)
+ self.collection_topic = collection_topic_publish
+ self.granule_topic = granule_topic_publish
- # create console handler for logger.
- ch = logging.StreamHandler()
- ch.setFormatter(formatter)
+ if self.metadata_type not in ['COLLECTION', 'GRANULE']:
+ raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type))
- # add handlers to logger.
- if create_file:
- log.addHandler(fh)
+ self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False)
+ self.logger.info("Initializing " + self.__class__.__name__)
- log.addHandler(ch)
- return log
+ if wildargs:
+ self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs))
def connect(self):
"""
Utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics
:return: SerializingProducer Object
- based on config values
+ based on initial constructor values
"""
registry_client = self.register_client()
metadata_producer = self.create_producer(registry_client)
@@ -137,10 +119,10 @@ def register_client(self):
reg_conf = {'url': self.schema_registry}
- if self.security:
- reg_conf['ssl.ca.location'] = self.conf['security']['caLoc']
- reg_conf['ssl.key.location'] = self.conf['security']['keyLoc']
- reg_conf['ssl.certificate.location'] = self.conf['security']['certLoc']
+ if self.security_enabled:
+ reg_conf['ssl.ca.location'] = self.security_caLoc
+ reg_conf['ssl.key.location'] = self.security_keyLoc
+ reg_conf['ssl.certificate.location'] = self.security_certLoc
registry_client = SchemaRegistryClient(reg_conf)
return registry_client
@@ -153,34 +135,38 @@ def create_producer(self, registry_client):
get this from register_client()
:return: SerializingProducer Object
- based on config values
+ based on initial constructor values
"""
- metadata_schema = None
+ topic = None
if self.metadata_type == "COLLECTION":
- metadata_schema = registry_client.get_latest_version(self.collection_topic + '-value').schema.schema_str
+ topic = self.collection_topic
if self.metadata_type == "GRANULE":
- metadata_schema = registry_client.get_latest_version(self.granule_topic + '-value').schema.schema_str
+ topic = self.granule_topic
+ self.logger.debug("topic: "+str(topic))
- metadata_serializer = AvroSerializer(metadata_schema, registry_client)
- producer_conf = {'bootstrap.servers': self.brokers}
+ metadata_schema = registry_client.get_latest_version(topic + '-value').schema.schema_str
+ self.logger.debug("metadata_schema: "+metadata_schema)
- if self.security:
- producer_conf['security.protocol'] = 'SSL'
- producer_conf['ssl.ca.location'] = self.conf['security']['caLoc']
- producer_conf['ssl.key.location'] = self.conf['security']['keyLoc']
- producer_conf['ssl.certificate.location'] = self.conf['security']['certLoc']
+ metadata_serializer = AvroSerializer(schema_str=metadata_schema, schema_registry_client=registry_client)
+ conf = {
+ 'bootstrap.servers': self.brokers,
+ 'value.serializer': metadata_serializer}
- meta_producer_conf = producer_conf
- meta_producer_conf['value.serializer'] = metadata_serializer
+ if self.security_enabled:
+ conf['security.protocol'] = 'SSL'
+ conf['ssl.ca.location'] = self.security_caLoc
+ conf['ssl.key.location'] = self.security_keyLoc
+ conf['ssl.certificate.location'] = self.security_certLoc
- metadata_producer = SerializingProducer(meta_producer_conf)
+ self.logger.debug("Serializing conf: "+str(conf))
+ metadata_producer = SerializingProducer(conf)
return metadata_producer
def delivery_report(self, err, msg):
"""
- Called once for each message produced to indicate delivery result. Triggered by poll() or flush().
+ Called once for each message produced to indicate delivery of message. Triggered by poll() or flush().
:param err: str
err produced after publishing, if there is one
@@ -190,16 +176,29 @@ def delivery_report(self, err, msg):
if err is not None:
self.logger.error('Message delivery failed: {}'.format(err))
else:
- self.logger.error('Message delivered to {} [{}]'.format(msg.topic(), msg.partition()))
+ self.logger.info('Message delivered to {} [{}]'.format(msg.topic(), msg.partition()))
+
+ @staticmethod
+ def get_collection_key_from_uuid(collection_uuid):
+ """
+ Create a key to use in a kafka message from the given string representation of the collection UUID.
+ :param collection_uuid: str
+ collection string to turn into a key.
+ :return:
+ """
+ if type(collection_uuid) == bytes:
+ return str(UUID(bytes=collection_uuid))
+ else:
+ return str(UUID(hex=collection_uuid))
def publish_collection(self, collection_producer, collection_uuid, content_dict, method):
"""
- Publish collection to collection topic
+ Publish a collection to the collection topic
:param collection_producer: SerializingProducer
use connect()
:param collection_uuid: str
- collection uuid that you want colelction to have
+ collection uuid that you want the collection to have
:param content_dict: dict
dictionary containing information you want to publish
:param method: str
@@ -208,11 +207,9 @@ def publish_collection(self, collection_producer, collection_uuid, content_dict,
:return: str
returns msg if publish is successful, kafka error if it wasn't successful
"""
- self.logger.info('Publish collection')
- if type(collection_uuid) == bytes:
- key = str(UUID(bytes=collection_uuid))
- else:
- key = str(UUID(hex=collection_uuid))
+ self.logger.info('Publishing collection')
+
+ key = self.get_collection_key_from_uuid(collection_uuid)
value_dict = {
'type': 'collection',
@@ -221,21 +218,20 @@ def publish_collection(self, collection_producer, collection_uuid, content_dict,
'method': method,
'source': 'unknown',
}
- try:
- collection_producer.produce(topic=self.collection_topic, value=value_dict, key=key,
- on_delivery=self.delivery_report)
- except KafkaError:
- raise
+ self.logger.debug('Publishing collection with topic='+self.collection_topic+' key='+key+' value='+str(value_dict))
+ collection_producer.produce(
+ topic=self.collection_topic,
+ value=value_dict,
+ key=key,
+ on_delivery=self.delivery_report)
collection_producer.poll()
- def publish_granule(self, granule_producer, record_uuid, collection_uuid, content_dict):
+ def publish_granule(self, granule_producer, collection_uuid, content_dict):
"""
- Publishes granule to granule topic
+ Publish a granule to the granule topic
:param granule_producer: SerializingProducer
use connect()
- :param record_uuid: str
- record uuid associated with the granule
:param collection_uuid: str
collection uuid associated with the granule
:param content_dict: dict
@@ -246,10 +242,8 @@ def publish_granule(self, granule_producer, record_uuid, collection_uuid, conten
"""
self.logger.info('Publish granule')
- if type(record_uuid) == bytes:
- key = str(UUID(bytes=collection_uuid))
- else:
- key = str(UUID(hex=collection_uuid))
+ key = self.get_collection_key_from_uuid(collection_uuid)
+
"""
if type(collection_uuid) == bytes:
content_dict['relationships'] = [{"type": "COLLECTION", "id": collection_uuid.hex()}]
@@ -281,9 +275,11 @@ def publish_granule(self, granule_producer, record_uuid, collection_uuid, conten
'discovery': content_dict['discovery']
}
- try:
- granule_producer.produce(topic=self.granule_topic, value=value_dict, key=key,
- on_delivery=self.delivery_report)
- except KafkaError:
- raise
+ self.logger.debug('Publishing granule with topic='+self.granule_topic+' key='+key+' value='+str(value_dict))
+ granule_producer.produce(
+ topic=self.granule_topic,
+ value=value_dict,
+ key=key,
+ on_delivery=self.delivery_report)
+
granule_producer.poll()
diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py
index 55ca06c..47c3bd3 100644
--- a/onestop-python-client/onestop/WebPublisher.py
+++ b/onestop-python-client/onestop/WebPublisher.py
@@ -7,31 +7,30 @@ class WebPublisher:
Attributes
----------
- registry_base_url: str
- url for registry endpoint
- registry_username: str
- username for posting metadata to registry
- registry_password: str
- password for posting metadata to registry
- onestop_base_url: str
- url for onestop endpoint
- logger.info: str
- logging level
+ registry_base_url: str
+ URL for registry endpoint
+ registry_username: str
+ Registry username where credentials needed
+ registry_password: str
+ Registry password where credentials needed
+ onestop_base_url: str
+ URL for OneStop endpoint
+ logger.info: str
+ logging level
Methods
-------
- publish_registry(metadata_type, uuid, payload, method)
- Publish to registry with either POST,PUT, OR PATCH methods
- delete_registry(metadata_type, uuid)
- Deletes item from registry
- search_registry(metadata_type, uuid)
- Searches for an item in registry given its metadata type and uuid
- search_onestop(metadata_type, payload)
- Acquires the item, collection or granule, from OneStop
- get_granules_onestop(self, uuid)
- Acquires granules from OneStop given the uuid
+ publish_registry(metadata_type, uuid, payload, method)
+ Publish an item to registry with either POST, PUT, OR PATCH methods
+ delete_registry(metadata_type, uuid)
+ Delete an item from registry
+ search_registry(metadata_type, uuid)
+ Search for an item in registry given its metadata type and uuid
+ search_onestop(metadata_type, payload)
+ Search for an item in OneStop given its metadata type and payload search criteria
+ get_granules_onestop(self, uuid)
+ Search for a granule in OneStop given its uuid
"""
- conf = None
def __init__(self, registry_base_url, registry_username, registry_password, onestop_base_url, log_level="INFO", **kwargs):
self.registry_base_url = registry_base_url
@@ -43,7 +42,7 @@ def __init__(self, registry_base_url, registry_username, registry_password, ones
self.logger.info("Initializing " + self.__class__.__name__)
if kwargs:
- self.logger.info("There were extra constructor arguments: " + str(kwargs))
+ self.logger.debug("Superfluous parameters in constructor call: " + str(kwargs))
def publish_registry(self, metadata_type, uuid, payload, method):
"""
@@ -84,12 +83,12 @@ def publish_registry(self, metadata_type, uuid, payload, method):
def delete_registry(self, metadata_type, uuid):
"""
- Deletes item from registry
+ Delete an item from registry
:param metadata_type: str
metadata type (GRANULE/COLLECTION)
:param uuid: str
- uuid you want to publish with
+ uuid you want to delete
:return: str
response message indicating if delete was successful
@@ -105,7 +104,7 @@ def delete_registry(self, metadata_type, uuid):
def search_registry(self, metadata_type, uuid):
"""
- Searches for an item in registry given its metadata type and uuid
+ Search for an item in registry given its metadata type and uuid
:param metadata_type: str
metadata type (GRANULE/COLLECTION)
@@ -126,7 +125,7 @@ def search_registry(self, metadata_type, uuid):
def search_onestop(self, metadata_type, payload):
"""
- Searches for an item in OneStop given its metadata type and payload search criteria.
+ Search for an item in OneStop given its metadata type and payload search criteria.
:param metadata_type: str
metadata type (GRANULE/COLLECTION)
@@ -147,7 +146,7 @@ def search_onestop(self, metadata_type, payload):
def get_granules_onestop(self, uuid):
"""
- Searches for a granule in OneStop given its uuid
+ Search for a granule in OneStop given its uuid
:param uuid: str
uuid you want search for
diff --git a/onestop-python-client/onestop/extract/CsbExtractor.py b/onestop-python-client/onestop/extract/CsbExtractor.py
index e79cddc..b1006cb 100644
--- a/onestop-python-client/onestop/extract/CsbExtractor.py
+++ b/onestop-python-client/onestop/extract/CsbExtractor.py
@@ -2,61 +2,33 @@
from datetime import datetime
class CsbExtractor:
+
"""
A class used to extract geospatial data from csv files in an s3 bucket
- Attributes
- ----------
- su : S3 Utils object
- an instance of the s3 utils class used to connect to the corresponding s3 bucket to get access to the csv file for extraction
- boto_client: boto3 client
- specific boto3 client type (s3, s3_resource, glacier, session) used to access aws resources
- bucket: str
- the name of the s3 bucket in which you want to access
- key: str
- the name of key path for the specific item you want to access in the bucket
-
-
Methods
-------
is_csv(file_name)
- checks to see if the given file is of type csv
+ Verifies a file name ends with '.csv'
get_spatial_temporal_bounds(lon_column_name, lat_column_name, date_column_name)
- extracts min/max longitude and latitude values as well as beginning and ending dates from specified csv file
+ Gets the spacial bounding box for the open file. This seeks to the start of the file at start and the end.
extract_coords(max_lon, max_lat, min_lon, min_lat)
- extracts specific coordinates corresponding to min/max longitude and latitude values given from get_spatial_temporal_bounds(....) method
+ Given the max/min lon and lat, the function will parse the csv file to extract the coordinates within the given bounding box.
"""
- def __init__(self, su, key):
- """
- :param su: S3 Utils object
- an instance of the s3 utils class used to connect to the corresponding s3 bucket to get access to the csv file for extraction
- :param key: str
- the name of key path for the specific item you want to access in the bucket
-
- Other Attributes
- ________________
- boto_client: boto3 client
- specific boto3 client type (s3, s3_resource, glacier, session) used to access aws resources
- bucket: str
- the name of the s3 bucket in which you want to access
+ @staticmethod
+ def is_csv(file_name):
"""
- self.su = su
- boto_client = self.su.connect("session", None)
- bucket = self.su.conf['s3_bucket']
- self.key = key
-
- def is_csv(self, file_name):
- """
- Checks to see if the given file is of type csv
+ Verifies a file name ends with '.csv'
:param file_name: str
- the name of the file in the s3 bucket i.e. file1.csv
+ File name with extension on the end.
- :return: boolean
- True if the file name contains .csv and False otherwise
+ :return: str
+ True if ends with csv
+ False if doesn't end with csv
"""
csv_str = '.csv'
if file_name.endswith(csv_str):
@@ -64,28 +36,22 @@ def is_csv(self, file_name):
return False
- # def smart_open_read(self, key):
- # boto_client = self.su.connect("session", None)
- # bucket = self.su.conf['s3_bucket']
- # self.su.read_csv_s3(boto_client, bucket, key)
-
-
- def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_column_name):
+ @staticmethod
+ def get_spatial_temporal_bounds(sm_open_file, lon_column_name, lat_column_name, date_column_name):
"""
- Extracts min/max longitude and latitude values as well as beginning and ending dates from specified csv file
+ Gets the spacial bounding box for the open file. This seeks to the start of the file at start and the end.
+ :param sm_open_file: file-like object
+ A file-like object that is open, say from smart_open's sm_open.
:param lon_column_name: str
- name of longitude column in the csv file
+ Longitude column name
:param lat_column_name: str
- name of the latitude column in the csv file
+ Latitude column name
:param date_column_name: str
- name of the date column in the csv file
+ Date column name
:return: dict
- Key : Value
- geospatial (str) -> List[float] containing min/max longitude and latitude values
- temporal (str) -> List[str] containing beginning and end dates
-
+ geospatial and temporal fields of the bounding box for given constraints.
"""
lon_min_val = None
lon_max_val = None
@@ -99,9 +65,7 @@ def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_col
# variable to be returned in string format
begin_date_str = ''
- boto_client = self.su.connect("session", None)
- bucket = self.su.conf['s3_bucket']
- sm_open_file = self.su.get_csv_s3(boto_client, bucket, self.key)
+ sm_open_file.seek(0)
csv_reader = csv.DictReader(sm_open_file)
for row in csv_reader:
@@ -151,43 +115,40 @@ def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_col
"temporal": [begin_date_str, end_date_str]
}
+ sm_open_file.seek(0)
return geospatial_temporal_bounds
-
- def extract_coords(self, max_lon, max_lat, min_lon, min_lat):
+ @staticmethod
+ def extract_coords(sm_open_file, max_lon, max_lat, min_lon, min_lat):
"""
- Extracts specific coordinates corresponding to min/max longitude and latitude values given from get_spatial_temporal_bounds(....) method
-
- :param max_lon: float
- maximum longitude value
- :param max_lat: float
- maximum latitude value
- :param min_lon: float
- minimum longitude value
- :param min_lat: float
- minimum latitude value
-
- :return: List[ List[Float] ]
- Returns a list of lists. Each list contains floats (longitude and latitude ) value pairs corresponding to
- one of the min/max latitude and longitude values that were extracted previously from get_spatial_temporal_bounds (...)
+ Given the max/min lon and lat, the function will parse the csv file to extract the coordinates within the given bounding box.
+
+ :param sm_open_file: file-like object
+ A file-like object that is open, say from smart_open's sm_open.
+ :param max_lon: str
+ Maximum longitude
+ :param max_lat: str
+ Maximum latitude
+ :param min_lon: str
+ Minimum longitude
+ :param min_lat: str
+ Minimum latitude
+
+ :return: list
+ List of the the coordinates (no duplicates) within the file that are within the given bounding box.
"""
- # Keeps track of all coordinates that needs to be added to json payload
coords = []
- boto_client = self.su.connect("session", None)
- bucket = self.su.conf['s3_bucket']
- sm_open_file = self.su.get_csv_s3(boto_client, bucket, self.key)
+ sm_open_file.seek(0)
csv_reader = csv.DictReader(sm_open_file)
-
for row in csv_reader:
- if float( row['LAT'] ) == min_lat or float( row['LAT'] ) == max_lat or float(
- row['LON'] ) == min_lon or float( row['LON'] ) == max_lon:
+ if float( row['LAT'] ) == min_lat or float( row['LAT'] ) == max_lat or \
+ float( row['LON'] ) == min_lon or float( row['LON'] ) == max_lon:
coord = [float( row['LON'] ), float( row['LAT'] )]
-
- # check to see if that coordinate has already been appended to the list that is keeping track of our coordinates
+ # if this coordinate has already been appended to the list to return (no duplicates)
if coord not in coords:
coords.append( coord )
+ sm_open_file.seek(0)
return coords
-
diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py
index d640b77..9b74bb3 100644
--- a/onestop-python-client/onestop/util/S3MessageAdapter.py
+++ b/onestop-python-client/onestop/util/S3MessageAdapter.py
@@ -1,10 +1,4 @@
-import yaml
from onestop.util.ClientLogger import ClientLogger
-"""
-from onestop.info.ImMessage import ImMessage
-from onestop.info.FileMessage import FileMessage
-from onestop.info.Link import Link
-"""
from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord, Publishing, ErrorEvent
from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location import FileLocation,FileLocationType
@@ -14,81 +8,70 @@
from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.discovery import Discovery, Link
-
class S3MessageAdapter:
"""
A class used to extract information from sqs messages that have been triggered by s3 events and transform it into correct format for publishing to IM Registry
Attributes
----------
- conf: yaml file
- csb-data-stream-config.yml
- s3_utils: S3Utils object
- used to access objects inside of s3 buckets
- logger: ClientLogger object
- utilizes python logger library and creates logging for our specific needs
- logger.info: ClientLogger object
- logging statement that occurs when the class is instantiated
- prefix_mapping: Dict
- contains mapping of various line offices and their associated collection id
+ access_bucket: str
+ Cloud bucket to put in the links field when transformed.
+ type: str
+ COLLECTION or GRANULE
+ file_id_prefix: str
+ File prefix returned as fileIdentifier
+ collection_id: str
+ Collection this data belongs to. Returned as parent identifier.
+ log_level: str
+ The log level to use for this class (Defaults to 'INFO')
- Methods
- -------
- collection_id_map(s3_key)
- given an s3 key that contains one of the NESDIS line offices in its path, it will provide the corresponding collection id
-
- transform(recs)
- transforms sqs message triggered by s3 event to correct format for publishing to IM registry
- """
- def __init__(self, conf_loc, s3_utils):
- """
-
- :param conf_loc: yaml file
- csb-data-stream-config.yml
- :param s3_utils: S3Utils object
- used to access objects inside of s3 buckets
-
- Other Attributes
- ----------------
logger: ClientLogger object
utilizes python logger library and creates logging for our specific needs
logger.info: ClientLogger object
logging statement that occurs when the class is instantiated
- prefix_mapping: Dict
- contains mapping of various line offices and their associated collection id
+ Methods
+ -------
+ transform(recs)
+ transforms sqs message triggered by s3 event to correct format for publishing to IM registry
+ """
+ def __init__(self, access_bucket, s3_message_adapter_metadata_type, file_id_prefix, collection_id, log_level = 'INFO', **wildargs):
"""
- with open(conf_loc) as f:
- self.conf = yaml.load(f, Loader=yaml.FullLoader)
-
- self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False)
- self.logger.info("Initializing " + self.__class__.__name__)
- self.s3_utils = s3_utils
+ Parameters
+ ----------
+ access_bucket: str
+ access bucket to put in the links field when transformed.
+ s3_message_adapter_metadata_type: str
+ COLLECTION or GRANULE
+ file_id_prefix: str
+ File prefix returned as fileIdentifier
+ collection_id: str
+ Collection this data belongs to. Returned as parent identifier.
+ log_level: str
+ Log level for when logging in class.
- self.prefix_mapping = self.conf['prefixMap']
-
- def collection_id_map(self, s3_key):
"""
- Given an s3 key that contains one of the NESDIS line offices in its path, it will provide the corresponding collection id
-
- :param s3_key: str
- key path of object in s3 bucket
+ self.access_bucket = access_bucket
+ self.metadata_type = s3_message_adapter_metadata_type.upper()
+ self.file_id_prefix = file_id_prefix
+ self.collection_id = collection_id
+ self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False)
+ self.logger.info("Initializing " + self.__class__.__name__)
- :return: str
- associated line office collection id
- """
- # Looks through our prefix map and returns appropriate collection id
- for key in self.prefix_mapping:
- if key in s3_key:
- return self.prefix_mapping[key]
+ if self.metadata_type not in ['COLLECTION', 'GRANULE']:
+ raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type))
+ if wildargs:
+ self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs))
def transform(self, recs):
"""
Transforms sqs message triggered by s3 event to correct format for publishing to IM registry
- :param recs: dict
- sqs event message
+ Parameters:
+ ----------
+ recs: dict
+ sqs event message to transform
:return: ParsedRecord Object
The Parsed Record class is an avro schema generated class
@@ -111,8 +94,8 @@ def transform(self, recs):
fileInformation = FileInformation(name=file_name, size=file_size, checksums=[checkSum], optionalAttributes={})
# Relationship
- relationshipType = RelationshipType(type=self.conf['type'])
- relationship = Relationship(id=self.conf['collection_id'], type=relationshipType)
+ relationshipType = RelationshipType(type=self.metadata_type)
+ relationship = Relationship(id=self.collection_id, type=relationshipType)
# File Location
fileLocationType = FileLocationType(type='ARCHIVE')
@@ -127,12 +110,12 @@ def transform(self, recs):
publishing = Publishing(isPrivate=True)
# Discovery
- access_obj_uri = self.conf['access_bucket'] + "/" + s3_key
+ access_obj_uri = self.access_bucket + "/" + s3_key
link1 = Link(linkName="Amazon S3", linkUrl=access_obj_uri, linkProtocol="HTTPS", linkFunction="download")
link2 = Link(linkName="Amazon S3", linkUrl=s3_obj_uri, linkProtocol="Amazon:AWS:S3", linkFunction="download")
# To Change? Come back to this later
- parent_identifier = self.conf['collection_id']
- file_identifier = self.conf['file_identifier_prefix'] + file_name[:-4]
+ parent_identifier = self.collection_id
+ file_identifier = self.file_id_prefix + file_name[:-4]
# Initializing most fields to their default values in the avro schema so that it doesn't cause an error in Kafka
discovery = Discovery(links=[link1, link2], title=file_name, parentIdentifier=parent_identifier,
diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py
index 7bb0fbe..e654df9 100644
--- a/onestop-python-client/onestop/util/S3Utils.py
+++ b/onestop-python-client/onestop/util/S3Utils.py
@@ -1,5 +1,5 @@
import logging
-import yaml
+
import uuid
import boto3
import botocore
@@ -15,100 +15,106 @@ class S3Utils:
Attributes
----------
- conf: yaml file
- aws-util-config-dev.yml
- cred: yaml file
- credentials.yml
- logger: ClientLogger object
- utilizes python logger library and creates logging for our specific needs
- logger.info: ClientLogger object
- logging statement that occurs when the class is instantiated
+ access_key: str
+ Cloud access key
- Methods
- -------
- connect(client_type, region)
- connects to a boto3 client
+ secret_key: str
+ Cloud secret key
- objectkey_exists(bucket, s3_key)
- checks to see if a s3 key path exists in a particular bucket
+ log_level: str
+ The log level to use for this class (Defaults to 'INFO')
- get_uuid_metadata(boto_client, bucket, s3_key)
- returns metadata uuid of an s3 object if it has one, otherwise prints that one does not exist
+ logger: ClientLogger object
+ Creates logging for us to log to.
- add_uuid_metadata(boto_client, bucket, s3_key)
- adds metadata uuid to an s3 object
+ Methods
+ -------
+ connect(client_type, region)
+ connects to a boto3 service
- upload_s3(boto_client, local_file, bucket, s3_key, overwrite)
- uploads a file to s3 bucket
+ objectkey_exists(bucket, s3_key)
+ checks to see if a s3 key path exists in a particular bucket
- get_csv_s3(boto_client, bucket, key)
- gets a csv file from s3 bucket using smart open library
+ get_uuid_metadata(boto_client, bucket, s3_key)
+ returns metadata uuid of an s3 object if it has one, otherwise prints that one does not exist
- read_bytes_s3(boto_client, bucket, key)
- returns raw information of s3 object
+ add_uuid_metadata(boto_client, bucket, s3_key)
+ adds metadata uuid to an s3 object
- upload_archive(boto_client, vault_name, src_data)
- Add an archive to an Amazon S3 Glacier vault. The upload occurs synchronously.
+ upload_s3(boto_client, local_file, bucket, s3_key, overwrite)
+ uploads a file to s3 bucket
- s3_to_glacier(boto_client, bucket_name, key)
- Changes storage class of s3 object from s3 -> glacier. Utilizes s3 client type
+ get_csv_s3(boto_client, bucket, key)
+ gets a csv file from s3 bucket using smart open library
- s3_to_glacier_object_lock(boto_client, bucket_name, key, object_lock_mode, object_lock_retention)
- Changes storage class of s3 object from s3 -> glacier and places it in object lock mode. Utilizes s3 client type
+ read_bytes_s3(boto_client, bucket, key)
+ returns raw information of s3 object
- s3_restore(boto_client, bucket_name, key, days)
- Restores an object in S3 glacier back to S3 for specified amount of days
+ upload_archive(boto_client, vault_name, src_data)
+ Add an archive to an Amazon S3 Glacier vault. The upload occurs synchronously.
- retrieve_inventory(boto_client, vault_name)
- Initiate an Amazon Glacier inventory-retrieval job
+ s3_to_glacier(boto_client, bucket_name, key)
+ Changes storage class of s3 object from s3 -> glacier. Utilizes s3 client type
- retrieve_inventory_results(vault_name, boto_client, job_id)
- Retrieve the results of an Amazon Glacier inventory-retrieval job
- """
- conf = None
+ s3_to_glacier_object_lock(boto_client, bucket_name, key, object_lock_mode, object_lock_retention)
+ Changes storage class of s3 object from s3 -> glacier and places it in object lock mode. Utilizes s3 client type
- def __init__(self, conf_loc, cred_loc):
+ s3_restore(boto_client, bucket_name, key, days)
+ Restores an object in S3 glacier back to S3 for specified amount of days
- with open(conf_loc) as f:
- self.conf = yaml.load(f, Loader=yaml.FullLoader)
+ retrieve_inventory(boto_client, vault_name)
+ Initiate an Amazon Glacier inventory-retrieval job
- with open(cred_loc) as f:
- self.cred = yaml.load(f, Loader=yaml.FullLoader)
+ retrieve_inventory_results(vault_name, boto_client, job_id)
+ Retrieve the results of an Amazon Glacier inventory-retrieval job
+ """
- self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False)
+ def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs):
+ self.access_key = access_key
+ self.secret_key = secret_key
+ self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False)
self.logger.info("Initializing " + self.__class__.__name__)
- def connect(self, client_type, region):
+ if wildargs:
+ self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs))
+
+ def connect(self, type, service_name, region):
"""
- Connects to a boto3 client
+ Connects to a boto3 of specified type using the credentials provided in the constructor.
- :param client_type: str
- boto client type in which you want to access
+ :param type: str
+ boto object type to return, see return type.
+ :param service_name: str
+ (Optional for session type) boto service name in which you want to access
:param region: str
- name of aws region you want to access
+ (Optional for session type) name of aws region you want to access
- :return: boto3 client
- dependent on the client_type parameter
+ :return: boto3 connection object
+ A boto3 connection object; Client, Session, or Resource.
"""
-
- if client_type == "s3":
- boto = boto3.client("s3", aws_access_key_id=self.cred['sandbox']['access_key'],
- aws_secret_access_key=self.cred['sandbox']['secret_key'], region_name=region)
-
- if client_type == "s3_resource":
- boto = boto3.resource("s3", region_name=region, aws_access_key_id=self.cred['sandbox']['access_key'],
- aws_secret_access_key=self.cred['sandbox']['secret_key'] )
-
- if client_type == "glacier":
- boto = boto3.client("glacier", region_name=region, aws_access_key_id=self.cred['sandbox']['access_key'],
- aws_secret_access_key=self.cred['sandbox']['secret_key'])
-
- if client_type == "session":
- boto = boto3.Session(
- aws_access_key_id=self.cred['sandbox']['access_key'],
- aws_secret_access_key=self.cred['sandbox']['secret_key'],
+ type = type.lower()
+ if type == 'session':
+ return boto3.Session(
+ aws_access_key_id=self.access_key,
+ aws_secret_access_key=self.secret_key,
+ region_name=region
+ )
+ elif type == 'client':
+ return boto3.client(
+ service_name,
+ aws_access_key_id=self.access_key,
+ aws_secret_access_key=self.secret_key,
+ region_name=region
+ )
+ elif type == 'resource':
+ return boto3.resource(
+ service_name,
+ region_name=region,
+ aws_access_key_id=self.access_key,
+ aws_secret_access_key=self.secret_key
)
- return boto
+ else:
+ raise Exception('Unknown boto3 type of "%s"'%(type))
def objectkey_exists(self, bucket, s3_key):
"""
@@ -214,7 +220,7 @@ def upload_s3(self, boto_client, local_file, bucket, s3_key, overwrite):
obj_uuid = str(uuid.uuid4())
if not overwrite:
- key_exists = self.objectkey_exists(boto_client, bucket, s3_key)
+ key_exists = self.objectkey_exists(bucket, s3_key)
if (not key_exists) or (key_exists and overwrite):
try:
@@ -226,11 +232,11 @@ def upload_s3(self, boto_client, local_file, bucket, s3_key, overwrite):
self.logger.error("File to upload was not found. Path: "+local_file)
return False
- def get_csv_s3(self, boto_client, bucket, key):
+ def get_csv_s3(self, boto_session, bucket, key):
"""
gets a csv file from s3 bucket using smart open library
- :param boto_client: session
+ :param boto_session: session
utilizes boto session type
:param bucket: str
name of bucket
@@ -240,7 +246,7 @@ def get_csv_s3(self, boto_client, bucket, key):
:return: smart open file
"""
url = "s3://" + bucket + "/" + key
- sm_open_file = sm_open(url, 'r', transport_params={'session': boto_client})
+ sm_open_file = sm_open(url, 'r', transport_params={'session': boto_session})
return sm_open_file
def read_bytes_s3(self, boto_client, bucket, key):
@@ -386,7 +392,6 @@ def s3_restore(self, boto_client, bucket_name, key, days):
# returns status of object retrieval
return obj.restore
-
def retrieve_inventory(self, boto_client, vault_name):
"""
Initiate an Amazon Glacier inventory-retrieval job
diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py
index f782cc5..4f2b6ac 100644
--- a/onestop-python-client/onestop/util/SqsConsumer.py
+++ b/onestop-python-client/onestop/util/SqsConsumer.py
@@ -1,10 +1,7 @@
-import logging
-from datetime import datetime, timezone
-import yaml
-import boto3
import json
-from onestop.util.ClientLogger import ClientLogger
+from datetime import datetime, timezone
+from onestop.util.ClientLogger import ClientLogger
class SqsConsumer:
"""
@@ -12,116 +9,102 @@ class SqsConsumer:
Attributes
----------
- conf: yaml file
- aws-util-config-dev.yml
- cred: yaml file
- credentials.yml
- logger: ClientLogger object
- utilizes python logger library and creates logging for our specific needs
- logger.info: ClientLogger object
- logging statement that occurs when the class is instantiated
+ logger: ClientLogger object
+ utilizes python logger library and creates logging for our specific needs
Methods
-------
- connect()
- connects a boto sqs instance based on configurations in conf and cred yml files
-
- receive_messages(queue, sqs_max_polls, cb)
- polls for messages in the queue
+ receive_messages(sqs_client, sqs_queue_name, sqs_max_polls, cb)
+ polls for messages in the queue
"""
- conf = None
- def __init__(self, conf_loc, cred_loc):
+ def __init__(self, log_level = 'INFO', **wildargs):
"""
-
- :param conf_loc: yaml file
- aws-util-config-dev.yml
- :param cred_loc: yaml file
- credentials.yml
-
- Other Attributes
- ----------------
- logger: ClientLogger object
- utilizes python logger library and creates logging for our specific needs
- logger.info: ClientLogger object
- logging statement that occurs when the class is instantiated
-
+ Attributes
+ ----------
+ log_level: str
+ The log level to use for this class (Defaults to 'INFO')
"""
- with open(conf_loc) as f:
- self.conf = yaml.load(f, Loader=yaml.FullLoader)
-
- with open(cred_loc) as f:
- self.cred = yaml.load(f, Loader=yaml.FullLoader)
-
- self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False)
+ self.log_level = log_level
+ self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False)
self.logger.info("Initializing " + self.__class__.__name__)
- def connect(self):
- """
- Connects a boto sqs instance based on configurations in conf and cred yml files
+ if wildargs:
+ self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs))
- :return: boto sqs
- returns instance of boto sqs resource
+ def connect(self, sqs_resource, sqs_queue_name):
"""
- boto_session = boto3.Session(aws_access_key_id=self.cred['sandbox']['access_key'],
- aws_secret_access_key=self.cred['sandbox']['secret_key'])
- # Get the queue. This returns an SQS.Queue instance
- sqs_session = boto_session.resource('sqs', region_name=self.conf['s3_region'])
- sqs_queue = sqs_session.Queue(self.conf['sqs_url'])
- self.logger.info("Connecting to " + self.conf['sqs_url'])
- return sqs_queue
-
- def receive_messages(self, queue, sqs_max_polls, cb):
+ Gets a boto SQS.Queue resource.
+ :param sqs_resource: boto SQS.Resource
+ SQS resource to create the queue from.
+ :param sqs_queue_name: str
+ SQS queue name to create and return a boto SQS.Queue object to.
+ :return: SQS.Queue
+ An SQS.Queue resource to use for Queue operations.
"""
- Polls for messages in the queue
+ return sqs_resource.create_queue(QueueName=sqs_queue_name)
- :param queue: boto sqs resource
- instance of boto sqs resource given from connect()
+ def receive_messages(self, sqs_queue, sqs_max_polls, cb):
+ """
+ Polls for messages from an sqs queue
+
+ :param sqs_queue: boto SQS.Queue object
+ boto SQS Queue object. Can be generated by the method in this class.
:param sqs_max_polls: int
number of polls
:param cb: function
call back function
- :return: Dependent on the call back function
+ :return: If the Message has a Records key then the call back function gets called on the Message.
"""
self.logger.info("Receive messages")
+ self.logger.info("Polling %d time(s) for SQS messages" % sqs_max_polls)
+
+ if sqs_max_polls < 1:
+ raise ValueError('Max polling value should be greater than 0.')
- i = 1
- while i <= sqs_max_polls:
+ for i in range(1, sqs_max_polls+1):
self.logger.info("Polling attempt: " + str(i))
- i = i + 1
- sqs_messages = queue.receive_messages(MaxNumberOfMessages=10, WaitTimeSeconds=10)
+ # boto3 SQS.Queue appears to have a subset of SQS.Client methods plus a few management queue ones.
+ # The ones they do share seem to have different return types.
+ # The message method names are different and return types different:
+ # Client.send_message and Queue.send_message and Queue.send_messages
+ # Client.receive_message and Queue.receive_messages
+ sqs_messages = sqs_queue.receive_messages(
+ MaxNumberOfMessages=10,
+ WaitTimeSeconds=10
+ )
self.logger.info("Received %d messages." % len(sqs_messages))
+ self.logger.debug("Messages: %s" % sqs_messages)
for sqs_message in sqs_messages:
- try:
- # Log start time
- dt_start = datetime.now(tz=timezone.utc)
- self.logger.info("Started processing message")
+ # Log start time
+ dt_start = datetime.now(tz=timezone.utc)
+ self.logger.info("Starting message processing")
+ self.logger.debug("Message: %s" % sqs_message)
+ self.logger.debug("Message body: %s" % sqs_message.body)
+ try:
message_body = json.loads(sqs_message.body)
+ self.logger.debug("Message body message: %s" % message_body['Message'])
message_content = json.loads(message_body['Message'])
-
- if 'Records' in message_content:
- recs = message_content['Records']
- self.logger.info("Received message")
- self.logger.debug('Records: ' + str(recs))
- else:
- self.logger.info("s3 event without records content received.")
-
- sqs_message.delete()
-
- self.logger.info("The SQS message has been deleted.")
-
- dt_end = datetime.now(tz=timezone.utc)
- processing_time = dt_end - dt_start
-
- self.logger.info("Completed processing message (s):" + str(processing_time.microseconds * 1000))
- cb(recs)
-
except:
self.logger.exception(
"An exception was thrown while processing a message, but this program will continue. The "
- "message will not be deleted from the SQS queue. The message was: %s" % sqs_message.body)
+ "message will not be deleted from the SQS queue. The message was: %s" % sqs_message)
+
+ if 'Records' in message_content:
+ recs = message_content['Records']
+ self.logger.debug('Message "Records": %s' % recs)
+ cb(recs, self.log_level)
+ else:
+ self.logger.info("s3 event message without 'Records' content received.")
+
+ dt_end = datetime.now(tz=timezone.utc)
+ processing_time = dt_end - dt_start
+ self.logger.info("Completed processing the message in %s seconds."%(processing_time.microseconds / 1000000))
+
+ sqs_message.delete()
+ self.logger.info("The SQS message has been deleted.")
diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py
index 57be8da..9170f8d 100644
--- a/onestop-python-client/onestop/util/SqsHandlers.py
+++ b/onestop-python-client/onestop/util/SqsHandlers.py
@@ -1,3 +1,8 @@
+import json
+
+from onestop.util.ClientLogger import ClientLogger
+from onestop.schemas.util.jsonEncoder import EnumEncoder
+
def create_delete_handler(web_publisher):
"""
Creates a delete function handler to be used with SqsConsumer.receive_messages.
@@ -7,21 +12,91 @@ def create_delete_handler(web_publisher):
:param: web_publisher: WebPublisher object
"""
- def delete(records):
- if records is None:
+ def delete(records, log_level='INFO'):
+
+ logger = ClientLogger.get_logger('SqsHandlers.create_delete_handler.delete', log_level, False)
+ logger.info("In create_delete_handler.delete() handler")
+ logger.debug("Records: %s"%records)
+
+ if not records or records is None:
+ logger.info("Ending handler, records empty, records=%s"%records)
return
+
record = records[0]
if record['eventName'] != 'ObjectRemoved:Delete':
+ logger.info("Ending handler, eventName=%s"%record['eventName'])
return
+
+ logger.info('Attempting to delete record %s'%record)
+
bucket = record['s3']['bucket']['name']
s3_key = record['s3']['object']['key']
s3_url = "s3://" + bucket + "/" + s3_key
payload = '{"queries":[{"type": "fieldQuery", "field": "links.linkUrl", "value": "' + s3_url + '"}] }'
search_response = web_publisher.search_onestop('granule', payload)
+ logger.debug('OneStop search response=%s'%search_response)
response_json = search_response.json()
+ logger.debug('OneStop search response json=%s'%response_json)
+ logger.debug('OneStop search response data=%s'%response_json['data'])
if len(response_json['data']) != 0:
granule_uuid = response_json['data'][0]['id']
response = web_publisher.delete_registry('granule', granule_uuid)
+ logger.debug('web_publisher.delete_registry response: %s'%response)
return response
+ logger.warning("OneStop search response has no 'data' field. Response=%s"%response_json)
+
return delete
+
+def create_upload_handler(web_publisher, s3_utils, s3_message_adapter):
+ """
+ Creates a upload function handler to be used with SqsConsumer.receive_messages.
+
+ The upload handler function checks the object for a UUID and if one is not found, it will create one for it.
+
+ :param: web_publisher: WebPublisher object
+ :param: s3_utils: S3Utils object
+ :param: s3ma: S3MessageAdapter object
+
+ """
+ def upload(records, log_level='DEBUG'):
+ logger = ClientLogger.get_logger('SqsHandlers.create_upload_handler.upload', log_level, False)
+ logger.info("In create_upload_handler.upload() handler")
+ logger.debug("Records: %s"%records)
+
+ rec = records[0]
+ s3_key = rec['s3']['object']['key']
+ logger.info("Received message for " + s3_key)
+ logger.info("Event type: " + rec['eventName'])
+ bucket = rec['s3']['bucket']['name']
+ logger.info("BUCKET: %s"%bucket)
+
+ # Fetch the object's uuid from cloud object, if exists.
+ s3_resource = s3_utils.connect('resource', 's3', None)
+ object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key)
+ if object_uuid is not None:
+ logger.info("Retrieved object-uuid: %s"%object_uuid)
+ else:
+ logger.info("Adding uuid")
+ # Can't add uuid to glacier and should be copied over
+ if "backup" not in bucket:
+ object_uuid = s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key)
+
+ # Convert s3 message to IM message
+ im_message = s3_message_adapter.transform(records)
+ json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder)
+ logger.debug('transformed message, json_payload: %s'%json_payload)
+
+ # Send the message to registry
+ method = 'PATCH' # Backup location should be patched if not backup within bucket name
+ if "backup" not in bucket:
+ method = 'POST'
+
+ logger.debug('web_publisher.publish_registry method using "%s" with payload %s'%(method,json_payload))
+ registry_response = web_publisher.publish_registry("granule", object_uuid, json_payload, method)
+ logger.debug('web_publisher.publish_registry response=%s'%registry_response)
+ logger.debug('web_publisher.publish_registry response json=%s'%registry_response.json())
+
+ return registry_response
+
+ return upload
\ No newline at end of file
diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt
index 735dad7..036e217 100644
--- a/onestop-python-client/requirements.txt
+++ b/onestop-python-client/requirements.txt
@@ -5,8 +5,9 @@ smart-open
PyYAML~=5.3.1
setuptools~=49.2.0
argparse~=1.4.0
-boto3~=1.15.11
+boto~=2.49.0
+boto3~=1.17.71
requests~=2.24.0
-botocore~=1.18.11
-moto==1.3.16.dev122
+botocore~=1.20.71
+moto[all]==2.0.5
undictify
diff --git a/onestop-python-client/setup.py b/onestop-python-client/setup.py
index 19ff9fd..5754bba 100644
--- a/onestop-python-client/setup.py
+++ b/onestop-python-client/setup.py
@@ -1,15 +1,12 @@
import setuptools
-with open("README.md", "r") as fh:
- long_description = fh.read()
-
setuptools.setup(
name="onestop-python-client-cedardevs",
version="0.2.5",
author="CEDARDEVS",
author_email="cedar.cires@colorado.edu",
description="A python package for processing messages from the NOAA OneStop event stream (aka Inventory Manager).",
- long_description="This package provides subject matter experts an API to interact with the kafka topics backing OneStop.",
+ long_description="This package provides subject matter experts an API to interact with OneStop via kafka, cloud, and REST.",
long_description_content_type="text/markdown",
url="https://github.com/cedardevs/onestop-clients",
packages=setuptools.find_packages(exclude=("tests",)),
diff --git a/onestop-python-client/test/__init__.py b/onestop-python-client/test/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/onestop-python-client/tests/data/file1.csv b/onestop-python-client/test/data/file1.csv
similarity index 100%
rename from onestop-python-client/tests/data/file1.csv
rename to onestop-python-client/test/data/file1.csv
diff --git a/onestop-python-client/tests/data/file1_s3.csv b/onestop-python-client/test/data/file1_s3.csv
similarity index 100%
rename from onestop-python-client/tests/data/file1_s3.csv
rename to onestop-python-client/test/data/file1_s3.csv
diff --git a/onestop-python-client/tests/data/file2.csv b/onestop-python-client/test/data/file2.csv
similarity index 100%
rename from onestop-python-client/tests/data/file2.csv
rename to onestop-python-client/test/data/file2.csv
diff --git a/onestop-python-client/tests/data/file3.csv b/onestop-python-client/test/data/file3.csv
similarity index 100%
rename from onestop-python-client/tests/data/file3.csv
rename to onestop-python-client/test/data/file3.csv
diff --git a/onestop-python-client/tests/data/file4.csv b/onestop-python-client/test/data/file4.csv
similarity index 100%
rename from onestop-python-client/tests/data/file4.csv
rename to onestop-python-client/test/data/file4.csv
diff --git a/onestop-python-client/test/integration/__init__.py b/onestop-python-client/test/integration/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/onestop-python-client/tests/test_WebPublisher.py b/onestop-python-client/test/integration/test_WebPublisher.py
similarity index 83%
rename from onestop-python-client/tests/test_WebPublisher.py
rename to onestop-python-client/test/integration/test_WebPublisher.py
index c81a7de..5c7935a 100644
--- a/onestop-python-client/tests/test_WebPublisher.py
+++ b/onestop-python-client/test/integration/test_WebPublisher.py
@@ -2,8 +2,10 @@
import json
import unittest
import time
+import os.path
from onestop.WebPublisher import WebPublisher
+from os import path
class WebPublisherTest(unittest.TestCase):
wp = None
@@ -56,16 +58,26 @@ class WebPublisherTest(unittest.TestCase):
def setUpClass(cls):
print("Set it up!")
- cred_loc = "../config/credentials.yml"
- conf_loc = "../config/csb-data-stream-config-template.yml"
-
- with open(cred_loc) as f:
- creds = yaml.load(f, Loader=yaml.FullLoader)
-
- registry_username = creds['registry']['username']
- registry_password = creds['registry']['password']
- access_key = creds['sandbox']['access_key']
- access_secret = creds['sandbox']['secret_key']
+ cred_loc = "config/credentials.yml"
+ conf_loc = "config/csb-data-stream-config-template.yml"
+
+ if path.exists(cred_loc):
+ with open(cred_loc) as f:
+ creds = yaml.load(f, Loader=yaml.FullLoader)
+
+ registry_username = creds['registry']['username']
+ registry_password = creds['registry']['password']
+ access_key = creds['sandbox']['access_key']
+ access_secret = creds['sandbox']['secret_key']
+ else:
+ print("Credentials file doesn't exist at '%s', using environment variables."%cred_loc)
+ registry_username = os.environ.get('REGISTRY_USERNAME')
+ registry_password = os.environ.get("REGISTRY_PASSWORD")
+ access_key = os.environ.get("ACCESS_KEY")
+ access_secret = os.environ.get("SECRET_KEY")
+ if registry_username == None:
+ msg = "REGISTRY_USERNAME not defined as env variable. Credentials file at '" + cred_loc + "' doesn't exist."
+ raise Exception(msg)
with open(conf_loc) as f:
conf = yaml.load(f, Loader=yaml.FullLoader)
diff --git a/onestop-python-client/test/unit/__init__.py b/onestop-python-client/test/unit/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/onestop-python-client/test/unit/extractor/__init__.py b/onestop-python-client/test/unit/extractor/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/onestop-python-client/test/unit/extractor/test_CsbExtractor.py b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py
new file mode 100644
index 0000000..cba1bf7
--- /dev/null
+++ b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py
@@ -0,0 +1,119 @@
+import unittest
+import os
+
+from moto import mock_s3
+from onestop.extract.CsbExtractor import CsbExtractor
+from onestop.util.S3Utils import S3Utils
+
+class CsbExtractorTest(unittest.TestCase):
+
+ def setUp(self):
+ print("Set it up!")
+ self.root_proj_path = os.getcwd()
+ self.assertIsNotNone(self.root_proj_path)
+ self.data_file_path = os.getcwd() + '/test/data/file4.csv'
+ self.key = "file4.csv"
+ # Use open instead of our method because we aren't testing our code here.
+ self.file_obj = open(self.data_file_path)
+
+ config_dict = {
+ "access_key": "test_access_key",
+ "secret_key": "test_secret_key",
+ "log_level": "DEBUG"
+ }
+
+ self.s3_utils = S3Utils(**config_dict)
+ self.bucket = "bucket"
+ self.region = "region"
+
+ def tearDown(self):
+ print("Tear it down!")
+ self.file_obj.close()
+
+ def test_is_csv(self):
+ self.assertTrue(CsbExtractor.is_csv("test/blah/file.csv"), "Failed to determine a csv file name was a csv file.")
+
+ def test_is_not_csv(self):
+ self.assertFalse(CsbExtractor.is_csv("test/blah/file.txt"), "Failed to determine a csv file name was not a csv file.")
+
+ @mock_s3
+ def test_csb_SME_user_path(self):
+ # Setup bucket and file to read
+ s3 = self.s3_utils.connect('client', 's3', self.region)
+ s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region})
+ self.s3_utils.upload_s3(s3, self.data_file_path, self.bucket, self.key, True)
+ self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key))
+
+ # This is how we would expect an external user to get the file.
+ sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect('session', None, self.region), self.bucket, self.key)
+
+ bounds_dict = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME')
+ coords = bounds_dict["geospatial"]
+ self.assertEqual(coords[0], -96.847995)
+ self.assertEqual(coords[1], 29.373065)
+ self.assertEqual(coords[2], -92.747995)
+ self.assertEqual(coords[3], 33.373065)
+
+ date_rng = bounds_dict["temporal"]
+ self.assertEqual(date_rng[0], '2018-04-10T14:00:06.000Z' )
+ self.assertEqual(date_rng[1], '2020-04-10T14:00:06.000Z' )
+
+ def test_get_geospatial_temporal_bounds(self):
+ bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME')
+
+ coords = bounds_dict["geospatial"]
+ self.assertEqual(coords[0], -96.847995)
+ self.assertEqual(coords[1], 29.373065)
+ self.assertEqual(coords[2], -92.747995)
+ self.assertEqual(coords[3], 33.373065)
+
+ date_rng = bounds_dict["temporal"]
+ self.assertEqual(date_rng[0], '2018-04-10T14:00:06.000Z' )
+ self.assertEqual(date_rng[1], '2020-04-10T14:00:06.000Z' )
+
+ def test_get_min_lon(self):
+ bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME')
+
+ coords = bounds_dict["geospatial"]
+ min_lon = coords[0]
+ self.assertEqual(min_lon, -96.847995)
+
+ def test_get_max_datetime(self):
+
+ bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME')
+
+ date_rng = bounds_dict["temporal"]
+ end_date = date_rng[1]
+ self.assertEqual(end_date, '2020-04-10T14:00:06.000Z')
+
+ def test_get_min_datetime(self):
+ bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME')
+
+ date_rng = bounds_dict["temporal"]
+ begin_date = date_rng[0]
+ self.assertEqual(begin_date, '2018-04-10T14:00:06.000Z')
+
+ def test_extract_coords(self):
+ coords = CsbExtractor.extract_coords(self.file_obj, -92.747995, 33.373065, -96.847995, 29.373065)
+ result = [[
+ -94.847995,
+ 29.373065
+ ],
+ [
+ -96.847995,
+ 29.373065
+ ],
+ [
+ -94.847995,
+ 33.373065
+ ],
+ [
+ -92.747995,
+ 29.383065
+ ]
+ ]
+ self.assertEqual(coords, result)
+
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/onestop-python-client/test/unit/test_KafkaConsumer.py b/onestop-python-client/test/unit/test_KafkaConsumer.py
new file mode 100644
index 0000000..4a5345f
--- /dev/null
+++ b/onestop-python-client/test/unit/test_KafkaConsumer.py
@@ -0,0 +1,287 @@
+import unittest
+
+from unittest.mock import ANY, patch, MagicMock, call
+from onestop.KafkaConsumer import KafkaConsumer
+from confluent_kafka.schema_registry import SchemaRegistryClient
+
+class test_KafkaConsumer(unittest.TestCase):
+ kp = None
+ conf_w_security = None
+ conf_wo_security = None
+
+ @classmethod
+ def setUp(cls):
+ print("Set it up!")
+ cls.conf_w_security = {
+ "kafka_consumer_metadata_type" : "GRANULE",
+ "brokers" : "onestop-dev-cp-kafka:9092",
+ "group_id" : "sme-test",
+ "auto_offset_reset" : "earliest",
+ "schema_registry" : "http://onestop-dev-cp-schema-registry:8081",
+ "security" : {
+ "enabled" : True,
+ "caLoc" : "/etc/pki/tls/cert.pem",
+ "keyLoc" : "/etc/pki/tls/private/kafka-user.key",
+ "certLoc" : "/etc/pki/tls/certs/kafka-user.crt"
+ },
+ "collection_topic_consume" : "psi-collection-input-unknown",
+ "granule_topic_consume" : "psi-granule-input-unknown",
+ "log_level" : "DEBUG"
+ }
+ cls.conf_wo_security = dict(cls.conf_w_security)
+ # Remove security credential section.
+ cls.conf_wo_security['security'] = {
+ "enabled":False
+ }
+
+ @classmethod
+ def tearDown(self):
+ print("Tear it down!")
+
+ def test_init_happy_nonconditional_params(self):
+ consumer = KafkaConsumer(**self.conf_w_security)
+
+ self.assertEqual(consumer.metadata_type, self.conf_w_security['kafka_consumer_metadata_type'])
+ self.assertEqual(consumer.brokers, self.conf_w_security['brokers'])
+ self.assertEqual(consumer.group_id, self.conf_w_security['group_id'])
+ self.assertEqual(consumer.auto_offset_reset, self.conf_w_security['auto_offset_reset'])
+ self.assertEqual(consumer.schema_registry, self.conf_w_security['schema_registry'])
+ self.assertEqual(consumer.security_enabled, self.conf_w_security['security']['enabled'])
+ self.assertEqual(consumer.collection_topic, self.conf_w_security['collection_topic_consume'])
+ self.assertEqual(consumer.granule_topic, self.conf_w_security['granule_topic_consume'])
+
+ def test_init_security_enabled(self):
+ consumer = KafkaConsumer(**self.conf_w_security)
+
+ self.assertEqual(consumer.security_caLoc, self.conf_w_security['security']['caLoc'])
+ self.assertEqual(consumer.security_keyLoc, self.conf_w_security['security']['keyLoc'])
+ self.assertEqual(consumer.security_certLoc, self.conf_w_security['security']['certLoc'])
+
+ def test_init_security_disabled(self):
+ consumer = KafkaConsumer(**self.conf_wo_security)
+
+ self.assertRaises(AttributeError, getattr, consumer, "security_caLoc")
+ self.assertRaises(AttributeError, getattr, consumer, "security_keyLoc")
+ self.assertRaises(AttributeError, getattr, consumer, "security_certLoc")
+
+ def test_init_metadata_type_valid(self):
+ consumer = KafkaConsumer(**self.conf_w_security)
+
+ self.assertEqual(consumer.metadata_type, self.conf_w_security['kafka_consumer_metadata_type'])
+
+ def test_init_metadata_type_invalid(self):
+ wrong_metadata_type_config = dict(self.conf_w_security)
+ wrong_metadata_type_config['kafka_consumer_metadata_type'] = "invalid_type"
+
+ self.assertRaises(ValueError, KafkaConsumer, **wrong_metadata_type_config)
+
+ def test_init_extra_params(self):
+ conf = dict(self.conf_wo_security)
+ conf['junk_key'] = 'junk_value'
+ KafkaConsumer(**conf)
+
+ @patch.object(SchemaRegistryClient, '__init__', autospec=True)
+ def test_register_client_w_security(self, mock_client):
+ exp_security_conf = {
+ 'url':self.conf_w_security['schema_registry'],
+ 'ssl.ca.location': self.conf_w_security['security']['caLoc'],
+ 'ssl.key.location': self.conf_w_security['security']['keyLoc'],
+ 'ssl.certificate.location': self.conf_w_security['security']['certLoc']
+ }
+ mock_client.return_value = None
+
+ consumer = KafkaConsumer(**self.conf_w_security)
+ consumer.register_client()
+
+ mock_client.assert_called()
+ mock_client.assert_called_with(ANY, exp_security_conf)
+
+ @patch.object(SchemaRegistryClient, '__init__', autospec=True)
+ def test_register_client_wo_security(self, mock_client):
+ exp_security_conf = {
+ 'url':self.conf_w_security['schema_registry'],
+ 'ssl.ca.location': self.conf_w_security['security']['caLoc'],
+ 'ssl.key.location': self.conf_w_security['security']['keyLoc'],
+ 'ssl.certificate.location': self.conf_w_security['security']['certLoc']
+ }
+ mock_client.return_value = None
+
+ consumer = KafkaConsumer(**self.conf_wo_security)
+ consumer.register_client()
+ try:
+ mock_client.assert_called_with(ANY, exp_security_conf)
+ except:
+ return
+ raise AssertionError('Expected register_client() to not have been called with security arguments.')
+
+ @patch('onestop.KafkaConsumer.AvroDeserializer')
+ @patch('onestop.KafkaConsumer.DeserializingConsumer')
+ def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consumer, mock_avro_deserializer):
+ conf_w_security_collection = dict(self.conf_w_security)
+ conf_w_security_collection['kafka_consumer_metadata_type'] = "COLLECTION"
+
+ consumer = KafkaConsumer(**conf_w_security_collection)
+ reg_client = consumer.register_client()
+ reg_client.get_latest_version = MagicMock()
+ deser_consumer = consumer.create_consumer(reg_client)
+
+ # Verify AvroDeserializer called with expected registry client
+ mock_avro_deserializer.assert_called_with(schema_str=ANY, schema_registry_client=reg_client)
+
+ self.assertIsNotNone(deser_consumer)
+
+ @patch('onestop.KafkaConsumer.AvroDeserializer')
+ @patch('onestop.KafkaConsumer.DeserializingConsumer')
+ def test_create_consumer_collection_w_security(self, mock_deserializing_consumer, mock_avro_deserializer):
+ conf_w_security_collection = dict(self.conf_w_security)
+ topic = conf_w_security_collection['collection_topic_consume']
+ conf_w_security_collection['kafka_consumer_metadata_type'] = 'COLLECTION'
+
+ consumer = KafkaConsumer(**conf_w_security_collection)
+ reg_client = MagicMock()
+ deser_consumer = consumer.create_consumer(reg_client)
+
+ # Verify metadata type was taken into consideration for getting topic information
+ reg_client.get_latest_version.assert_called_with(topic + '-value')
+
+ # Verify security passed into DeserializingConsumer
+ mock_deserializing_consumer.assert_called_with(
+ {
+ 'bootstrap.servers': conf_w_security_collection['brokers'],
+ 'security.protocol': 'SSL',
+ 'ssl.ca.location': conf_w_security_collection['security']['caLoc'],
+ 'ssl.key.location': conf_w_security_collection['security']['keyLoc'],
+ 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'],
+ 'key.deserializer': ANY,
+ 'value.deserializer': ANY,
+ 'group.id': conf_w_security_collection['group_id'],
+ 'auto.offset.reset': conf_w_security_collection['auto_offset_reset']
+ })
+ mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic])
+
+ self.assertIsNotNone(deser_consumer)
+
+ @patch('onestop.KafkaConsumer.AvroDeserializer')
+ @patch('onestop.KafkaConsumer.DeserializingConsumer')
+ def test_create_consumer_collection_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer):
+ conf_wo_security_collection = dict(self.conf_wo_security)
+ topic = conf_wo_security_collection['collection_topic_consume']
+ conf_wo_security_collection['kafka_consumer_metadata_type'] = 'COLLECTION'
+
+ consumer = KafkaConsumer(**conf_wo_security_collection)
+ reg_client = MagicMock()
+ deser_consumer = consumer.create_consumer(reg_client)
+
+ # Verify metadata type was taken into consideration for getting topic information
+ reg_client.get_latest_version.assert_called_with(topic + '-value')
+
+ # Verify no security passed into DeserializingConsumer
+ mock_deserializing_consumer.assert_called_with(
+ {
+ 'bootstrap.servers': conf_wo_security_collection['brokers'],
+ 'key.deserializer': ANY,
+ 'value.deserializer': ANY,
+ 'group.id': conf_wo_security_collection['group_id'],
+ 'auto.offset.reset': conf_wo_security_collection['auto_offset_reset']
+ })
+ mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic])
+
+ self.assertIsNotNone(deser_consumer)
+
+ @patch('onestop.KafkaConsumer.AvroDeserializer')
+ @patch('onestop.KafkaConsumer.DeserializingConsumer')
+ def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, mock_avro_deserializer):
+ conf_w_security_granule = dict(self.conf_w_security)
+ topic = conf_w_security_granule['granule_topic_consume']
+ conf_w_security_granule['kafka_consumer_metadata_type'] = 'GRANULE'
+
+ consumer = KafkaConsumer(**conf_w_security_granule)
+ reg_client = MagicMock()
+ deser_consumer = consumer.create_consumer(reg_client)
+
+ # Verify metadata type was taken into consideration for getting topic information
+ reg_client.get_latest_version.assert_called_with(topic + '-value')
+
+ # Verify security passed into DeserializingConsumer
+ mock_deserializing_consumer.assert_called_with(
+ {
+ 'bootstrap.servers': conf_w_security_granule['brokers'],
+ 'security.protocol': 'SSL',
+ 'ssl.ca.location': conf_w_security_granule['security']['caLoc'],
+ 'ssl.key.location': conf_w_security_granule['security']['keyLoc'],
+ 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'],
+ 'key.deserializer': ANY,
+ 'value.deserializer': ANY,
+ 'group.id': conf_w_security_granule['group_id'],
+ 'auto.offset.reset': conf_w_security_granule['auto_offset_reset']
+ })
+ mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic])
+
+ self.assertIsNotNone(deser_consumer)
+
+ @patch('onestop.KafkaConsumer.AvroDeserializer')
+ @patch('onestop.KafkaConsumer.DeserializingConsumer')
+ def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer):
+ conf_wo_security_granule = dict(self.conf_wo_security)
+ exp_topic = conf_wo_security_granule['granule_topic_consume']
+ conf_wo_security_granule['kafka_consumer_metadata_type'] = 'GRANULE'
+
+ consumer = KafkaConsumer(**conf_wo_security_granule)
+ reg_client = MagicMock()
+ deser_consumer = consumer.create_consumer(reg_client)
+
+ # Verify metadata type was taken into consideration for getting topic information
+ reg_client.get_latest_version.assert_called_with(exp_topic + '-value')
+
+ # Verify no security passed into DeserializingConsumer called with expected configuration
+ mock_deserializing_consumer.assert_called_with(
+ {
+ 'bootstrap.servers': conf_wo_security_granule['brokers'],
+ 'key.deserializer': ANY,
+ 'value.deserializer': ANY,
+ 'group.id': conf_wo_security_granule['group_id'],
+ 'auto.offset.reset': conf_wo_security_granule['auto_offset_reset']
+ })
+ mock_deserializing_consumer.return_value.subscribe.assert_called_with([exp_topic])
+
+ self.assertIsNotNone(deser_consumer)
+
+ def test_connect(self):
+ mock_client = MagicMock()
+
+ consumer = KafkaConsumer(**self.conf_w_security)
+ consumer.register_client = MagicMock(return_value=mock_client)
+ consumer.create_consumer = MagicMock(return_value=MagicMock(mock_client))
+ consumer.connect()
+
+ consumer.register_client.assert_called_once()
+ consumer.create_consumer.assert_called_with(mock_client)
+
+ @patch('confluent_kafka.cimpl.Message')
+ @patch('onestop.KafkaConsumer.DeserializingConsumer')
+ def test_consume(self, mock_metadata_consumer, mock_message):
+ mock_message_key = 'key1'
+ mock_message_value = 'value1'
+ consumer = KafkaConsumer(**self.conf_w_security)
+ consumer.register_client = MagicMock(return_value=MagicMock())
+ mock_message.key.return_value = mock_message_key
+ mock_message.value.return_value = mock_message_value
+ mock_metadata_consumer.poll.side_effect = [None, mock_message, Exception]
+ mock_handler = MagicMock()
+
+ # Would have liked not having the try/catch but it wasn't ignoring the exception. Just need to not fail due to end of loop.
+ try:
+ self.assertRaises(Exception, consumer.consume(mock_metadata_consumer, mock_handler))
+ except Exception as e:
+ print("Ignoring exception: {}".format(e))
+
+ # Verify kafka consumer poll called expected number of times
+ self.assertEqual(mock_metadata_consumer.poll.call_count, 3)
+ mock_metadata_consumer.poll.assert_has_calls([call(10), call(10), call(10)])
+
+ # Verify callback function was called once with expected message attributes
+ mock_handler.assert_called_once()
+ mock_handler.assert_called_with(mock_message_key, mock_message_value, self.conf_w_security['log_level'])
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/onestop-python-client/test/unit/test_KafkaPublisher.py b/onestop-python-client/test/unit/test_KafkaPublisher.py
new file mode 100644
index 0000000..6357a3c
--- /dev/null
+++ b/onestop-python-client/test/unit/test_KafkaPublisher.py
@@ -0,0 +1,335 @@
+import unittest
+import json
+
+from onestop.KafkaPublisher import KafkaPublisher
+from unittest.mock import ANY, patch, MagicMock
+from confluent_kafka.schema_registry import SchemaRegistryClient
+
+class test_KafkaPublisher(unittest.TestCase):
+ kp = None
+ conf_w_security = None
+ conf_wo_security = None
+
+ @classmethod
+ def setUp(cls):
+ print("Set it up!")
+ cls.conf_w_security = {
+ "kafka_publisher_metadata_type" : "GRANULE",
+ "brokers" : "onestop-dev-cp-kafka:9092",
+ "schema_registry" : "http://onestop-dev-cp-schema-registry:8081",
+ "security" : {
+ "enabled" : True,
+ "caLoc" : "/etc/pki/tls/cert.pem",
+ "keyLoc" : "/etc/pki/tls/private/kafka-user.key",
+ "certLoc" : "/etc/pki/tls/certs/kafka-user.crt"
+ },
+ "collection_topic_publish" : "psi-collection-input-unknown",
+ "granule_topic_publish" : "psi-granule-input-unknown",
+ "log_level" : "DEBUG"
+ }
+ cls.conf_wo_security = dict(cls.conf_w_security)
+ # Remove security credential section.
+ cls.conf_wo_security['security'] = {
+ "enabled":False
+ }
+
+ @classmethod
+ def tearDown(self):
+ print("Tear it down!")
+
+ def test_init_happy_nonconditional_params(self):
+ publisher = KafkaPublisher(**self.conf_w_security)
+
+ self.assertEqual(publisher.metadata_type, self.conf_w_security['kafka_publisher_metadata_type'])
+ self.assertEqual(publisher.brokers, self.conf_w_security['brokers'])
+ self.assertEqual(publisher.schema_registry, self.conf_w_security['schema_registry'])
+ self.assertEqual(publisher.security_enabled, self.conf_w_security['security']['enabled'])
+ self.assertEqual(publisher.collection_topic, self.conf_w_security['collection_topic_publish'])
+ self.assertEqual(publisher.granule_topic, self.conf_w_security['granule_topic_publish'])
+
+ def test_init_security_enabled(self):
+ publisher = KafkaPublisher(**self.conf_w_security)
+
+ self.assertEqual(publisher.security_caLoc, self.conf_w_security['security']['caLoc'])
+ self.assertEqual(publisher.security_keyLoc, self.conf_w_security['security']['keyLoc'])
+ self.assertEqual(publisher.security_certLoc, self.conf_w_security['security']['certLoc'])
+
+ def test_init_security_disabled(self):
+ publisher = KafkaPublisher(**self.conf_wo_security)
+
+ self.assertRaises(AttributeError, getattr, publisher, "security_caLoc")
+ self.assertRaises(AttributeError, getattr, publisher, "security_keyLoc")
+ self.assertRaises(AttributeError, getattr, publisher, "security_certLoc")
+
+ def test_init_metadata_type_valid(self):
+ publisher = KafkaPublisher(**self.conf_w_security)
+
+ self.assertEqual(publisher.metadata_type, self.conf_w_security['kafka_publisher_metadata_type'])
+
+ def test_init_metadata_type_invalid(self):
+ wrong_metadata_type_config = dict(self.conf_w_security)
+ wrong_metadata_type_config['kafka_publisher_metadata_type'] = "invalid_type"
+
+ self.assertRaises(ValueError, KafkaPublisher, **wrong_metadata_type_config)
+
+ def test_init_extra_params(self):
+ conf = dict(self.conf_wo_security)
+ conf['junk_key'] = 'junk_value'
+ KafkaPublisher(**conf)
+
+ @patch.object(SchemaRegistryClient, '__init__', autospec=True)
+ def test_register_client_w_security(self, mock_client):
+ exp_security_conf = {
+ 'url':self.conf_w_security['schema_registry'],
+ 'ssl.ca.location': self.conf_w_security['security']['caLoc'],
+ 'ssl.key.location': self.conf_w_security['security']['keyLoc'],
+ 'ssl.certificate.location': self.conf_w_security['security']['certLoc']
+ }
+ mock_client.return_value = None
+
+ publisher = KafkaPublisher(**self.conf_w_security)
+ publisher.register_client()
+
+ mock_client.assert_called()
+ mock_client.assert_called_with(ANY, exp_security_conf)
+
+ @patch.object(SchemaRegistryClient, '__init__', autospec=True)
+ def test_register_client_wo_security(self, mock_client):
+ exp_security_conf = {
+ 'url':self.conf_w_security['schema_registry'],
+ 'ssl.ca.location': self.conf_w_security['security']['caLoc'],
+ 'ssl.key.location': self.conf_w_security['security']['keyLoc'],
+ 'ssl.certificate.location': self.conf_w_security['security']['certLoc']
+ }
+ mock_client.return_value = None
+
+ publisher = KafkaPublisher(**self.conf_wo_security)
+ publisher.register_client()
+ try:
+ mock_client.assert_called_with(ANY, exp_security_conf)
+ except:
+ return
+ raise AssertionError('Expected register_client() to not have been called with security arguments.')
+
+ @patch('onestop.KafkaPublisher.AvroSerializer')
+ @patch('onestop.KafkaPublisher.SerializingProducer')
+ def test_create_producer_calls_AvroSerializer(self, mock_serializing_publisher, mock_avro_serializer):
+ conf_w_security_collection = dict(self.conf_w_security)
+ conf_w_security_collection['kafka_publisher_metadata_type'] = "COLLECTION"
+
+ publisher = KafkaPublisher(**conf_w_security_collection)
+ reg_client = publisher.register_client()
+ reg_client.get_latest_version = MagicMock()
+ publisher.create_producer(reg_client)
+
+ # Verify AvroSerializer called with expected registry client
+ mock_avro_serializer.assert_called_with(schema_str=ANY, schema_registry_client=reg_client)
+
+ @patch('onestop.KafkaPublisher.AvroSerializer')
+ @patch('onestop.KafkaPublisher.SerializingProducer')
+ def test_create_producer_collection_w_security(self, mock_serializing_producer, mock_avro_serializer):
+ conf_w_security_collection = dict(self.conf_w_security)
+ topic = conf_w_security_collection['collection_topic_publish']
+ conf_w_security_collection['kafka_publisher_metadata_type'] = 'COLLECTION'
+
+ publisher = KafkaPublisher(**conf_w_security_collection)
+ reg_client = MagicMock()
+ prod = publisher.create_producer(reg_client)
+
+ # Verify metadata type was taken into consideration for getting topic information
+ reg_client.get_latest_version.assert_called_with(topic + '-value')
+
+ # Verify security passed into SerializingProducer
+ mock_serializing_producer.assert_called_with(
+ {
+ 'bootstrap.servers': conf_w_security_collection['brokers'],
+ 'security.protocol': 'SSL',
+ 'ssl.ca.location': conf_w_security_collection['security']['caLoc'],
+ 'ssl.key.location': conf_w_security_collection['security']['keyLoc'],
+ 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'],
+ 'value.serializer': ANY,
+ })
+
+ self.assertIsNotNone(prod)
+
+ @patch('onestop.KafkaPublisher.AvroSerializer')
+ @patch('onestop.KafkaPublisher.SerializingProducer')
+ def test_create_producer_collection_wo_security(self, mock_serializing_producer, mock_avro_serializer):
+ conf_wo_security_collection = dict(self.conf_wo_security)
+ topic = conf_wo_security_collection['collection_topic_publish']
+ conf_wo_security_collection['kafka_publisher_metadata_type'] = 'COLLECTION'
+
+ publisher = KafkaPublisher(**conf_wo_security_collection)
+ reg_client = MagicMock()
+ prod = publisher.create_producer(reg_client)
+
+ # Verify metadata type was taken into consideration for getting topic information
+ reg_client.get_latest_version.assert_called_with(topic + '-value')
+
+ # Verify no security passed into SerializingProducer
+ mock_serializing_producer.assert_called_with(
+ {
+ 'bootstrap.servers': conf_wo_security_collection['brokers'],
+ 'value.serializer': ANY,
+ })
+
+ self.assertIsNotNone(prod)
+
+ @patch('onestop.KafkaPublisher.AvroSerializer')
+ @patch('onestop.KafkaPublisher.SerializingProducer')
+ def test_create_producer_granule_w_security(self, mock_serializing_producer, mock_avro_serializer):
+ conf_w_security_granule = dict(self.conf_w_security)
+ topic = conf_w_security_granule['granule_topic_publish']
+ conf_w_security_granule['kafka_publisher_metadata_type'] = 'GRANULE'
+
+ publisher = KafkaPublisher(**conf_w_security_granule)
+ reg_client = MagicMock()
+ prod = publisher.create_producer(reg_client)
+
+ # Verify metadata type was taken into consideration for getting topic information
+ reg_client.get_latest_version.assert_called_with(topic + '-value')
+
+ # Verify security passed into SerializingProducer
+ mock_serializing_producer.assert_called_with(
+ {
+ 'bootstrap.servers': conf_w_security_granule['brokers'],
+ 'security.protocol': 'SSL',
+ 'ssl.ca.location': conf_w_security_granule['security']['caLoc'],
+ 'ssl.key.location': conf_w_security_granule['security']['keyLoc'],
+ 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'],
+ 'value.serializer': ANY,
+ })
+
+ self.assertIsNotNone(prod)
+
+ @patch('onestop.KafkaPublisher.AvroSerializer')
+ @patch('onestop.KafkaPublisher.SerializingProducer')
+ def test_create_producer_granule_wo_security(self, mock_serializing_producer, mock_avro_serializer):
+ conf_wo_security_granule = dict(self.conf_wo_security)
+ exp_topic = conf_wo_security_granule['granule_topic_publish']
+ conf_wo_security_granule['kafka_publisher_metadata_type'] = 'GRANULE'
+
+ publisher = KafkaPublisher(**conf_wo_security_granule)
+ reg_client = MagicMock()
+ prod = publisher.create_producer(reg_client)
+
+ # Verify metadata type was taken into consideration for getting topic information
+ reg_client.get_latest_version.assert_called_with(exp_topic + '-value')
+
+ # Verify no security passed into SerializingProducer called with expected configuration
+ mock_serializing_producer.assert_called_with(
+ {
+ 'bootstrap.servers': conf_wo_security_granule['brokers'],
+ 'value.serializer': ANY,
+ })
+
+ self.assertIsNotNone(prod)
+
+ def test_connect(self):
+ mock_client = MagicMock()
+
+ publisher = KafkaPublisher(**self.conf_w_security)
+ publisher.register_client = MagicMock(return_value=mock_client)
+ publisher.create_producer = MagicMock(return_value=MagicMock(mock_client))
+ publisher.connect()
+
+ publisher.register_client.assert_called_once()
+ publisher.create_producer.assert_called_with(mock_client)
+
+ def test_get_collection_key_from_uuid(self):
+ expKey = '12345678-1234-5678-1234-567812345678'
+ for uuid in [
+ '{12345678-1234-5678-1234-567812345678}',
+ '12345678123456781234567812345678',
+ 'urn:uuid:12345678-1234-5678-1234-567812345678',
+ b'\x12\x34\x56\x78'*4,
+# b'\x78\x56\x34\x12\x34\x12\x78\x56' + b'\x12\x34\x56\x78\x12\x34\x56\x78',
+# {0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678},
+# 0x12345678123456781234567812345678,
+ ]:
+ with self.subTest(uuid=uuid):
+ print ("Testing uuid "+str(uuid))
+ key = KafkaPublisher.get_collection_key_from_uuid(uuid)
+ print("Acquired uuid="+str(key))
+ self.assertEqual(key, expKey)
+
+ @patch('onestop.KafkaPublisher.SerializingProducer')
+ def test_publish_collection(self, mock_collection_producer):
+ uuid = '{12345678-1234-5678-1234-567812345678}'
+ content_dict = {
+ 'title': 'this is a test',
+ 'location': 'somewhere in space'
+ }
+ method = 'PUT'
+ publisher = KafkaPublisher(**self.conf_w_security)
+ publisher.register_client = MagicMock(return_value=MagicMock())
+ mock_collection_producer.produce = MagicMock()
+ mock_collection_producer.poll.side_effect = [1]
+
+ publisher.publish_collection(mock_collection_producer, uuid, content_dict, method)
+
+ # Verify kafka produce called once
+ mock_collection_producer.produce.assert_called_with(
+ topic=self.conf_w_security['collection_topic_publish'],
+ value={
+ 'type': 'collection',
+ 'content': json.dumps(content_dict),
+ 'contentType': 'application/json',
+ 'method': method,
+ 'source': 'unknown',
+ },
+ key=publisher.get_collection_key_from_uuid(uuid),
+ on_delivery=publisher.delivery_report
+ )
+
+ # Verify kafka produce poll called once
+ mock_collection_producer.poll.assert_called_once()
+
+
+ @patch('onestop.KafkaPublisher.SerializingProducer')
+ def test_publish_granule(self, mock_collection_producer):
+ uuid = '{12345678-1234-5678-1234-567812345678}'
+ content_dict = {
+ 'title': 'this is a test',
+ 'location': 'somewhere in space',
+ 'relationships': [{"type": "COLLECTION",
+ "id": '{12345678-1234-5678-1234-567812345678}'}],
+ 'errors': [],
+ 'analysis': 'No analysis',
+ 'fileLocations': 'archived',
+ 'fileInformation': 'no information',
+ 'discovery': 'AWS'
+ }
+ publisher = KafkaPublisher(**self.conf_w_security)
+ publisher.register_client = MagicMock(return_value=MagicMock())
+ mock_collection_producer.produce = MagicMock()
+ mock_collection_producer.poll.side_effect = [1]
+
+ publisher.publish_granule(mock_collection_producer, uuid, content_dict)
+
+ # Verify kafka produce called once
+ mock_collection_producer.produce.assert_called_with(
+ topic=self.conf_w_security['granule_topic_publish'],
+ value={
+ 'type': 'granule',
+ 'content': json.dumps(content_dict),
+ #'contentType': 'application/json',
+ 'method': 'PUT',
+ 'source': 'unknown',
+ 'operation': None,
+ 'relationships': content_dict['relationships'],
+ 'errors': content_dict['errors'],
+ 'analysis': content_dict['analysis'],
+ 'fileLocations': {'fileLocation': content_dict['fileLocations']},
+ 'fileInformation': content_dict['fileInformation'],
+ 'discovery': content_dict['discovery']
+ },
+ key=publisher.get_collection_key_from_uuid(uuid),
+ on_delivery=publisher.delivery_report
+ )
+
+ # Verify kafka produce poll called once
+ mock_collection_producer.poll.assert_called_once()
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py
new file mode 100644
index 0000000..231e6cf
--- /dev/null
+++ b/onestop-python-client/test/unit/test_SqsHandlers.py
@@ -0,0 +1,320 @@
+import json
+import unittest
+
+from unittest import mock
+from unittest.mock import patch
+from moto import mock_sqs
+from test.utils import abspath_from_relative, create_delete_message
+from onestop.WebPublisher import WebPublisher
+from onestop.util.S3Utils import S3Utils
+from onestop.util.S3MessageAdapter import S3MessageAdapter
+from onestop.util.SqsConsumer import SqsConsumer
+from onestop.util.SqsHandlers import create_delete_handler
+from onestop.util.SqsHandlers import create_upload_handler
+from onestop.schemas.util.jsonEncoder import EnumEncoder
+
+class test_SqsHandler(unittest.TestCase):
+
+ def setUp(self):
+ print("Set it up!")
+
+ self.config_dict = {
+ 'access_key': 'test_access_key',
+ 'secret_key': 'test_secret_key',
+ 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com',
+ 's3_message_adapter_metadata_type': 'COLLECTION',
+ 'file_id_prefix': 'gov.noaa.ncei.csb:',
+ 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177',
+ 'registry_base_url': 'http://localhost/onestop/api/registry',
+ 'registry_username': 'admin',
+ 'registry_password': 'whoknows',
+ 'onestop_base_url': 'http://localhost/onestop/api/search/search',
+ 'log_level': 'DEBUG'
+ }
+
+ self.wp = WebPublisher(**self.config_dict)
+ self.s3_utils = S3Utils(**self.config_dict)
+ self.s3_message_adapter = S3MessageAdapter(**self.config_dict)
+ self.sqs_consumer = SqsConsumer(**self.config_dict)
+
+ self.sqs_max_polls = 3
+ self.region = 'us-east-2'
+ self.bucket = 'archive-testing-demo'
+ self.key = 'ABI-L1b-RadF/2019/298/15/OR_ABI-L1b-RadF-M6C15_G16_s20192981500369_e20192981510082_c20192981510166.nc'
+
+ def tearDown(self):
+ print("Tear it down!")
+
+ def mocked_search_response_data(*args, **kwargs):
+ class MockResponse:
+ def __init__(self, json_data, status_code):
+ self.json_data = json_data
+ self.status_code = status_code
+
+ def json(self):
+ return self.json_data
+
+ print ("args: "+str(args)+" kwargs: "+str(kwargs))
+ onestop_search_response = {
+ "data":[
+ {
+ "attributes":{
+ "serviceLinks":[
+
+ ],
+ "citeAsStatements":[
+
+ ],
+ "links":[
+ {
+ "linkFunction":"download",
+ "linkUrl":"s3://archive-testing-demo-backup/public/NESDIS/CSB/csv/2019/12/01/20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv",
+ "linkName":"Amazon S3",
+ "linkProtocol":"Amazon:AWS:S3"
+ },
+ {
+ "linkFunction":"download",
+ "linkUrl":"https://archive-testing-demo.s3-us-east-2.amazonaws.com/public/NESDIS/CSB/csv/2019/12/01/20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv",
+ "linkName":"Amazon S3",
+ "linkProtocol":"HTTPS"
+ }
+ ],
+ "internalParentIdentifier":"fdb56230-87f4-49f2-ab83-104cfd073177",
+ "filesize":63751,
+ "title":"20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv"
+ },
+ "id":"77b11a1e-1b75-46e1-b7d6-99b5022ed113",
+ "type":"granule"
+ }
+ ],
+ "meta":{
+ "took":1,
+ "total":6,
+ "exactCount":True
+ }
+ }
+ return MockResponse(onestop_search_response, 200)
+
+ def mocked_search_response_data_empty(*args, **kwargs):
+ class MockResponse:
+ def __init__(self, json_data, status_code):
+ self.json_data = json_data
+ self.status_code = status_code
+
+ def json(self):
+ return self.json_data
+
+ print ("args: "+str(args)+" kwargs: "+str(kwargs))
+ onestop_search_response = {
+ "data":[],
+ "meta":{
+ "took":1,
+ "total":6,
+ "exactCount":True
+ }
+ }
+ return MockResponse(onestop_search_response, 200)
+
+ @mock_sqs
+ @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True)
+ @patch('onestop.WebPublisher')
+ def test_delete_handler_happy(self, mock_wp, mock_response):
+ queue_name = 'test_queue'
+ sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region)
+ sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url
+ sqs_queue = sqs_resource.Queue(queue_name)
+
+ # Send a test message
+ sqs_client = self.s3_utils.connect('client', 'sqs' , self.region)
+ message = create_delete_message(self.region, self.bucket, self.key)
+ sqs_client.send_message(
+ QueueUrl=sqs_queue_url,
+ MessageBody=json.dumps(message)
+ )
+
+ mock_wp.search_onestop.side_effect = mock_response
+ cb = create_delete_handler(mock_wp)
+
+ self.sqs_consumer.receive_messages(sqs_queue, 1, cb)
+
+ # Verify search and delete called once.
+ mock_wp.search_onestop.assert_called_once()
+ mock_wp.delete_registry.assert_called_once()
+
+ @mock_sqs
+ @mock.patch('requests.get', side_effect=mocked_search_response_data_empty, autospec=True)
+ @patch('onestop.WebPublisher')
+ def test_delete_handler_data_empty_ends_cb(self, mock_wp, mock_response):
+ queue_name = 'test_queue'
+ sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region)
+ sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url
+ sqs_queue = sqs_resource.Queue(queue_name)
+
+ # Send a test message
+ sqs_client = self.s3_utils.connect('client', 'sqs' , self.region)
+ message = create_delete_message(self.region, self.bucket, self.key)
+ sqs_client.send_message(
+ QueueUrl=sqs_queue_url,
+ MessageBody=json.dumps(message)
+ )
+
+ mock_wp.search_onestop.side_effect = mock_response
+ cb = create_delete_handler(mock_wp)
+
+ self.sqs_consumer.receive_messages(sqs_queue, 1, cb)
+
+ # Verify search and delete called once.
+ mock_wp.search_onestop.assert_called_once()
+ mock_wp.delete_registry.assert_not_called()
+
+ @mock_sqs
+ @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True)
+ @patch('onestop.WebPublisher')
+ def test_delete_handler_no_records_ends_cb(self, mock_wp, mock_response):
+ queue_name = 'test_queue'
+ sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region)
+ sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url
+ sqs_queue = sqs_resource.Queue(queue_name)
+
+ # Send a test message
+ sqs_client = self.s3_utils.connect('client', 'sqs' , self.region)
+ sqs_client.send_message(
+ QueueUrl=sqs_queue_url,
+ MessageBody=json.dumps({"Message":'''{"Records":[]}'''})
+ )
+
+ mock_wp.search_onestop.side_effect = mock_response
+ cb = create_delete_handler(mock_wp)
+
+ self.sqs_consumer.receive_messages(sqs_queue, 1, cb)
+
+ # Verify search and delete called once.
+ mock_wp.search_onestop.assert_not_called()
+ mock_wp.delete_registry.assert_not_called()
+
+ @mock_sqs
+ @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True)
+ @patch('onestop.WebPublisher')
+ def test_delete_handler_eventName_not_delete_ends_cb(self, mock_wp, mock_response):
+ queue_name = 'test_queue'
+ sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region)
+ sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url
+ sqs_queue = sqs_resource.Queue(queue_name)
+
+ # Send a test message
+ sqs_client = self.s3_utils.connect('client', 'sqs' , self.region)
+ sqs_client.send_message(
+ QueueUrl=sqs_queue_url,
+ MessageBody=json.dumps({"Message":'''{"Records":[{"eventName":"Unknown"}]}'''})
+ )
+
+ mock_wp.search_onestop.side_effect = mock_response
+ cb = create_delete_handler(mock_wp)
+
+ self.sqs_consumer.receive_messages(sqs_queue, 1, cb)
+
+ # Verify search and delete called once.
+ mock_wp.search_onestop.assert_not_called()
+ mock_wp.delete_registry.assert_not_called()
+
+ @mock_sqs
+ @patch('onestop.WebPublisher')
+ @patch('onestop.util.S3Utils')
+ def test_upload_handler_happy(self, mock_s3_utils, mock_wp):
+ bucket = self.bucket
+ key = self.key
+ queue_name = 'test_queue'
+ sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region)
+ sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url
+ sqs_queue = sqs_resource.Queue(queue_name)
+
+ # Send a test message
+ sqs_client = self.s3_utils.connect('client', 'sqs' , self.region)
+ message = create_delete_message(self.region, bucket, key)
+ sqs_client.send_message(
+ QueueUrl=sqs_queue_url,
+ MessageBody=json.dumps(message)
+ )
+
+ cb = create_upload_handler(mock_wp, mock_s3_utils, self.s3_message_adapter)
+ self.sqs_consumer.receive_messages(sqs_queue, 1, cb)
+
+ # Verify get uuid called
+ mock_s3_utils.connect.assert_called_with('resource', 's3', None)
+ mock_s3_utils.get_uuid_metadata.assert_called_with(
+ mock_s3_utils.connect(),
+ bucket,
+ key)
+ # Verify uuid not added
+ mock_s3_utils.add_uuid_metadata.assert_not_called()
+ # Verify publish called & transform called
+ mock_wp.publish_registry.assert_called_with(
+ 'granule',
+ mock_s3_utils.get_uuid_metadata(),
+ json.dumps(self.s3_message_adapter.transform(json.loads(message['Message'])['Records']).to_dict(), cls=EnumEncoder),
+ 'POST'
+ )
+
+ @mock_sqs
+ @patch('onestop.WebPublisher')
+ @patch('onestop.util.S3Utils')
+ def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_wp):
+ bucket = self.bucket
+ key = self.key
+ queue_name = 'test_queue'
+ sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region)
+ sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url
+ sqs_queue = sqs_resource.Queue(queue_name)
+
+ # Send a test message
+ sqs_client = self.s3_utils.connect('client', 'sqs' , self.region)
+ message = create_delete_message(self.region, bucket, key)
+ sqs_client.send_message(
+ QueueUrl=sqs_queue_url,
+ MessageBody=json.dumps(message)
+ )
+
+ mock_s3_utils.get_uuid_metadata.return_value = None
+ cb = create_upload_handler(mock_wp, mock_s3_utils, self.s3_message_adapter)
+
+ self.sqs_consumer.receive_messages(sqs_queue, 1, cb)
+
+ # Verify add uuid called
+ mock_s3_utils.add_uuid_metadata.assert_called_with(
+ mock_s3_utils.connect(),
+ bucket,
+ key)
+
+ @mock_sqs
+ @patch('onestop.WebPublisher')
+ @patch('onestop.util.S3Utils')
+ def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_wp):
+ bucket = "testing_backup_bucket" # backup in bucket means a PATCH should happen.
+ key = self.key
+ queue_name = 'test_queue'
+ sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region)
+ sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url
+ sqs_queue = sqs_resource.Queue(queue_name)
+
+ # Send a test message
+ sqs_client = self.s3_utils.connect('client', 'sqs' , self.region)
+ message = create_delete_message(self.region, bucket, key)
+ sqs_client.send_message(
+ QueueUrl=sqs_queue_url,
+ MessageBody=json.dumps(message)
+ )
+
+ cb = create_upload_handler(mock_wp, mock_s3_utils, self.s3_message_adapter)
+
+ self.sqs_consumer.receive_messages(sqs_queue, 1, cb)
+
+ # Verify publish called
+ mock_wp.publish_registry.assert_called_with(
+ 'granule',
+ mock_s3_utils.get_uuid_metadata(),
+ json.dumps(self.s3_message_adapter.transform(json.loads(message['Message'])['Records']).to_dict(), cls=EnumEncoder),
+ 'PATCH'
+ )
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/onestop-python-client/test/unit/test_WebPublisher.py b/onestop-python-client/test/unit/test_WebPublisher.py
new file mode 100644
index 0000000..af0802f
--- /dev/null
+++ b/onestop-python-client/test/unit/test_WebPublisher.py
@@ -0,0 +1,145 @@
+import json
+import unittest
+
+from unittest.mock import ANY
+from unittest import mock
+from moto import mock_s3
+from onestop.WebPublisher import WebPublisher
+
+class test_WebPublisher(unittest.TestCase):
+ username="admin"
+ password="a_password"
+ uuid = "9f0a5ff2-fcc0-5bcb-a225-024b669c9bba"
+ registry_base_url = "https://localhost/onestop/api/registry"
+ registry_full_url_granule = registry_base_url + "/metadata/granule/" + uuid
+ registry_full_url_collection = registry_base_url + "/metadata/collection/" + uuid
+ onestop_base_url = "https://localhost/onestop/api/search"
+
+ payloadDict = {
+ "fileInformation": {
+ "name": "file2.csv",
+ "size": 1385,
+ "checksums": [{
+ "algorithm": "MD5",
+ "value": "44d2452e8bc2c8013e9c673086fbab7a"
+ }]
+ },
+ "relationships": [
+ {"type": "COLLECTION",
+ "id": "fdb56230-87f4-49f2-ab83-104cfd073177"
+ }
+ ],
+ "fileLocations": {
+ "nesdis-ncei-csb-dev/csv/file2.csv": {
+ "uri": "https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com/csv/file2.csv",
+ "type": "ACCESS",
+ "restricted": False,
+ "serviceType": "HTTPS",
+ "asynchronous": False
+ }
+ },
+ "discovery": {
+ "title": "file2.csv",
+ "parentIdentifier": "fdb56230-87f4-49f2-ab83-104cfd073177",
+ "fileIdentifier": "gov.noaa.ncei.csb:file2"
+ }
+ }
+
+ addlocDict = {
+ "fileLocations": {
+ "Crt3a-Hq2SGUp8n8QSRNpFIf59kmMONqaKlJ_7-Igd8ijMM62deLdtVkiYwlaePbC4JNCsfeg5i-DWDmwxLIx9V-OGgiQp_CZ0rEFXIZxM_ZPyGu7TTv8wwos5SvAI6xDURhzoCH-w": {
+ "uri": "/282856304593/vaults/noaa-nesdis-ncei-vault-test/archives/Crt3a-Hq2SGUp8n8QSRNpFIf59kmMONqaKlJ_7-Igd8ijMM62deLdtVkiYwlaePbC4JNCsfeg5i-DWDmwxLIx9V-OGgiQp_CZ0rEFXIZxM_ZPyGu7TTv8wwos5SvAI6xDURhzoCH-w",
+ "type": "ACCESS",
+ "restricted": True,
+ "serviceType": "Amazon:AWS:Glacier",
+ "asynchronous": True
+ }
+ }
+ }
+
+
+ def setUp(self):
+ print("Set it up!")
+
+ self.wp = WebPublisher(self.registry_base_url,
+ self.username,
+ self.password,
+ self.onestop_base_url,
+ 'DEBUG')
+
+ def tearDown(self):
+ print("Tear it down!")
+
+ def mocked_requests_patch(*args, **kwargs):
+ class MockResponse:
+ def __init__(self, json_data, status_code):
+ self.json_data = json_data
+ self.status_code = status_code
+
+ def json(self):
+ return self.json_data
+
+ print ("args: "+str(args)+" kwargs: "+str(kwargs))
+
+ return MockResponse({"key1":"value1"}, 200)
+
+ @mock_s3
+ @mock.patch('requests.post', side_effect=mocked_requests_patch, autospec=True)
+ def test_publish(self, mock_get):
+ payload = json.dumps(self.payloadDict)
+ self.wp.publish_registry("granule", self.uuid, payload, "POST")
+
+ mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'})
+
+ @mock_s3
+ @mock.patch('requests.put', side_effect=mocked_requests_patch, autospec=True)
+ def test_publish(self, mock_get):
+ payload = json.dumps(self.payloadDict)
+ self.wp.publish_registry("granule", self.uuid, payload, "PUT")
+
+ mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'})
+
+ @mock_s3
+ @mock.patch('requests.patch', side_effect=mocked_requests_patch, autospec=True)
+ def test_add_glacier_location(self, mock_get):
+ payload = json.dumps(self.addlocDict)
+ self.wp.publish_registry("granule", self.uuid, payload, "PATCH")
+
+ mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'})
+
+ @mock_s3
+ @mock.patch('requests.delete', side_effect=mocked_requests_patch, autospec=True)
+ def test_delete_registry_granule(self, mock_get):
+ self.wp.delete_registry("granule", self.uuid)
+
+ mock_get.assert_called_with(url = self.registry_full_url_granule, headers = ANY, auth = ANY, verify = ANY)
+ mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), verify = ANY, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, verify = False, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = {'Content-Type': 'application/json'})
+
+ @mock_s3
+ @mock.patch('requests.delete', side_effect=mocked_requests_patch, autospec=True)
+ def test_delete_registry_collection(self, mock_get):
+ self.wp.delete_registry("collection", self.uuid)
+
+ mock_get.assert_called_with(url = self.registry_full_url_collection, headers = ANY, auth = ANY, verify = ANY)
+ mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), verify = ANY, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, verify = False, headers = ANY)
+ mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = {'Content-Type': 'application/json'})
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/onestop-python-client/test/unit/util/__init__.py b/onestop-python-client/test/unit/util/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py
similarity index 61%
rename from onestop-python-client/tests/util/S3MessageAdapterTest.py
rename to onestop-python-client/test/unit/util/test_S3MessageAdapter.py
index 41a8f9d..93dfed2 100644
--- a/onestop-python-client/tests/util/S3MessageAdapterTest.py
+++ b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py
@@ -1,11 +1,12 @@
import unittest
+
from moto import mock_s3
-from tests.utils import abspath_from_relative
from onestop.util.S3Utils import S3Utils
from onestop.util.S3MessageAdapter import S3MessageAdapter
class S3MessageAdapterTest(unittest.TestCase):
s3ma = None
+ config_dict = None
recs1 = \
[{
@@ -51,22 +52,50 @@ class S3MessageAdapterTest(unittest.TestCase):
def setUp(self):
print("Set it up!")
- self.s3_utils = S3Utils(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"),
- abspath_from_relative(__file__, "../../config/credentials-template.yml"))
- self.s3ma = S3MessageAdapter(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml"),
- self.s3_utils)
+
+ self.config_dict = {
+ 'access_key': 'test_access_key',
+ 'secret_key': 'test_secret_key',
+ 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com',
+ 's3_message_adapter_metadata_type': 'COLLECTION',
+ 'file_id_prefix': 'gov.noaa.ncei.csb:',
+ 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177',
+ 'log_level': 'DEBUG'
+ }
+
+ self.s3_utils = S3Utils(**self.config_dict)
+ self.s3ma = S3MessageAdapter(**self.config_dict)
+
+ self.region = 'us-east-2'
def tearDown(self):
print("Tear it down!")
- def test_parse_config(self):
- self.assertFalse(self.s3ma.conf['collection_id']==None)
+ def test_init_metadata_type_valid(self):
+ publisher = S3MessageAdapter(**self.config_dict)
+
+ self.assertEqual(publisher.metadata_type, self.config_dict['s3_message_adapter_metadata_type'])
+ def test_init_metadata_type_invalid(self):
+ wrong_metadata_type_config = dict(self.config_dict)
+ wrong_metadata_type_config['s3_message_adapter_metadata_type'] = "invalid_type"
+
+ self.assertRaises(ValueError, S3MessageAdapter, **wrong_metadata_type_config)
+
+ def test_metadata_type_lowercase(self):
+ metadata_type = 'collection'
+ uppercase_metadata_type = metadata_type.upper()
+ config = dict(self.config_dict)
+ config['s3_message_adapter_metadata_type'] = metadata_type
+
+ s3MA = S3MessageAdapter(**config)
+
+ self.assertEqual(uppercase_metadata_type, s3MA.metadata_type)
@mock_s3
def test_transform(self):
- s3 = self.s3_utils.connect('s3', self.s3_utils.conf['s3_region'])
- location = {'LocationConstraint': self.s3_utils.conf['s3_region']}
+ s3 = self.s3_utils.connect('client', 's3', self.region)
+ location = {'LocationConstraint': self.region}
bucket = 'nesdis-ncei-csb-dev'
key = 'csv/file1.csv'
key2 = 'csv/file2.csv'
@@ -81,4 +110,11 @@ def test_transform(self):
print(payload)
self.assertTrue(payload!=None)
+ @mock_s3
+ def test_extra_parameters_constructor(self):
+ testParams = dict(self.config_dict)
+ testParams['extra'] = 'extra value'
+ self.assertRaises(Exception, S3MessageAdapter(**testParams))
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py
new file mode 100644
index 0000000..830a1d8
--- /dev/null
+++ b/onestop-python-client/test/unit/util/test_S3Utils.py
@@ -0,0 +1,267 @@
+import csv
+import unittest
+import uuid
+import json
+
+from unittest import mock
+from moto import mock_s3, mock_sqs
+from moto import mock_glacier
+from test.utils import abspath_from_relative
+from onestop.util.S3Utils import S3Utils
+from boto.glacier.layer1 import Layer1
+from botocore.response import StreamingBody
+from io import StringIO
+
+class S3UtilsTest(unittest.TestCase):
+
+ def setUp(self):
+ print("Set it up!")
+
+ config_dict = {
+ 'access_key': 'test_access_key',
+ 'secret_key': 'test_secret_key',
+ 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com',
+ 'metadata_type': 'COLLECTION',
+ 'file_id_prefix': 'gov.noaa.ncei.csb:',
+ 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177',
+ 'log_level': 'DEBUG'
+ }
+
+ self.s3_utils = S3Utils(**config_dict)
+
+ self.region = 'us-east-2'
+ self.region2 = 'eu-north-1'
+ self.bucket = 'archive-testing-demo'
+
+ @mock_sqs
+ def test_connect_session(self):
+ session = self.s3_utils.connect('Session', None, self.region)
+
+ # No exception is called for unique method call
+ session.client('sqs')
+ session.resource('s3')
+
+ @mock_sqs
+ def test_connect_client(self):
+ client = self.s3_utils.connect('Client', 'sqs', self.region)
+
+ # No exception is called for unique method call
+ client.list_queues()
+
+ @mock_sqs
+ def test_connect_resource(self):
+ resource = self.s3_utils.connect('Resource', 'sqs', self.region)
+
+ # No exception is called for unique method call
+ resource.Queue(url='test')
+
+ @mock_sqs
+ def test_connect_exception_for_invalid_connection_type(self):
+ with self.assertRaises(Exception):
+ self.s3_utils.connect('junk', 'sqs', self.region)
+
+ @mock_s3
+ def test_get_uuid_metadata(self):
+ boto_client = self.s3_utils.connect('resource', 's3', None)
+ s3_key = "csv/file1.csv"
+
+ location = {'LocationConstraint': self.region}
+ boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location)
+ obj_uuid = str(uuid.uuid4())
+ boto_client.Object(self.bucket, s3_key).put(Bucket=self.bucket, Key=s3_key, Body="my_body", Metadata={'object-uuid': obj_uuid})
+
+ self.assertFalse(self.s3_utils.get_uuid_metadata(boto_client, self.bucket, s3_key) == None)
+
+ @mock_s3
+ def test_add_uuid_metadata(self):
+ boto_client = self.s3_utils.connect('resource', 's3', self.region)
+
+ s3_key = "csv/file1.csv"
+
+ location = {'LocationConstraint': self.region}
+ boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location)
+ boto_client.Object(self.bucket, s3_key).put(Bucket=self.bucket, Key=s3_key, Body="my_body")
+
+ self.assertTrue(self.s3_utils.add_uuid_metadata(boto_client, self.bucket, s3_key))
+
+ @mock_s3
+ def test_add_file_s3_overwrite(self):
+ boto_client = self.s3_utils.connect('client', 's3', None)
+ local_file = abspath_from_relative(__file__, "../../data/file4.csv")
+ s3_key = "csv/file4.csv"
+ location = {'LocationConstraint': self.region}
+ boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location)
+
+ self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_key, True))
+
+ @mock_s3
+ def test_add_file_s3_nooverwrite(self):
+ boto_client = self.s3_utils.connect('client', 's3', None)
+ local_file = abspath_from_relative(__file__, "../../data/file4.csv")
+ s3_key = "csv/file4.csv"
+ location = {'LocationConstraint': self.region}
+ boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location)
+
+ self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_key, False))
+
+ @mock_s3
+ def test_get_csv_s3(self):
+ boto_session = self.s3_utils.connect('session', None, self.region)
+ s3 = self.s3_utils.connect('client', 's3', self.region)
+ location = {'LocationConstraint': self.region}
+ s3_key = "csv/file1.csv"
+ s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location)
+ s3.put_object(Bucket=self.bucket, Key=s3_key, Body="body")
+
+ sm_open_file = self.s3_utils.get_csv_s3(boto_session, self.bucket, s3_key)
+
+ # print("reading csv:" + line.decode('utf-8'))
+ csv_reader = csv.DictReader(sm_open_file)
+ for row in csv_reader:
+ print(str(row["LON"]))
+
+ @mock_s3
+ def test_read_bytes_s3(self):
+ boto_client = self.s3_utils.connect('client', 's3', None)
+ s3_key = "csv/file1.csv"
+ boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region})
+ boto_client.put_object(Bucket=self.bucket, Key=s3_key, Body="body")
+
+ self.assertTrue(self.s3_utils.read_bytes_s3(boto_client, self.bucket, s3_key))
+
+ @mock_s3
+ def test_add_files(self):
+ boto_client = self.s3_utils.connect('client', 's3', None)
+ local_files = ["file1_s3.csv", "file2.csv", "file3.csv"]
+ location = {'LocationConstraint': self.region}
+ boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location)
+
+ for file in local_files:
+ local_file = abspath_from_relative(__file__, "../../data/" + file)
+ s3_file = "csv/" + file
+ self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_file, True))
+
+ @mock_s3
+ @mock_glacier
+ def test_s3_cross_region(self):
+ print('Cross Region Vault Upload ------------- ')
+ key = "csv/file1.csv"
+
+ # makes connection to low level s3 client
+ s3 = self.s3_utils.connect('client', 's3', self.region)
+ location = {'LocationConstraint': self.region}
+ s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location)
+ s3.put_object(Bucket=self.bucket, Key=key, Body="body")
+
+ # Reads object data and stores it into a variable
+ file_data = self.s3_utils.read_bytes_s3(s3, self.bucket, key)
+
+ # Redirecting upload to vault in second region
+ glacier = self.s3_utils.connect('client', 'glacier', self.region2)
+ vault_name = 'archive-vault-new'
+ glacier.create_vault(vaultName=vault_name)
+ print('vault name: ' + str(vault_name))
+ print('region name: ' + str(self.region2))
+ print('-------file data---------')
+ print(file_data)
+ response = self.s3_utils.upload_archive(glacier, vault_name, file_data)
+
+ self.assertTrue(response['archiveId']!=None)
+
+ @mock_s3
+ @mock_glacier
+ def test_s3_to_glacier(self):
+ """
+ Changes the storage class of an object from S3 to Glacier
+ Requires the configure and credential locations as parameters as well as the key of the object
+ """
+
+ print("S3 to Glacier---------")
+ key = "csv/file1_s3.csv"
+
+ # Create boto3 low level api connection
+ s3 = self.s3_utils.connect('client', 's3', self.region)
+ location = {'LocationConstraint': self.region}
+ s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location)
+ s3.put_object(Bucket=self.bucket, Key=key, Body="body")
+
+ # Using the S3 util class invoke the change of storage class
+ response = self.s3_utils.s3_to_glacier(s3, self.bucket, key)
+ print(response['ResponseMetadata']['HTTPHeaders']['x-amz-storage-class'])
+ # Assert 'x-amz-storage-class': 'GLACIER'
+
+ self.assertTrue(response['ResponseMetadata']['HTTPHeaders']['x-amz-storage-class'] == "GLACIER")
+
+ @mock_s3
+ def test_s3_restore(self):
+ """
+ Uses high level api to restore object from glacier to s3
+ """
+
+ key = "csv/file1_s3.csv"
+ days = 3
+
+ # use high level api
+ s3 = self.s3_utils.connect('resource', 's3' , self.region2)
+ location = {'LocationConstraint': self.region2}
+ s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location)
+ s3.Object(self.bucket, key).put(Bucket=self.bucket, Key=key, Body="body")
+
+ self.assertTrue(self.s3_utils.s3_restore(s3, self.bucket, key, days) != None)
+
+ @mock_glacier
+ def test_retrieve_inventory(self):
+ """
+ Initiates job for archive retrieval. Takes 3-5 hours to complete if not mocked.
+ """
+
+ # Using glacier api initiates job and returns archive results
+ # Connect to your glacier vault for retrieval
+ glacier = self.s3_utils.connect('client', 'glacier', self.region2)
+ vault_name = 'archive-vault-new'
+ glacier.create_vault(vaultName=vault_name)
+
+ response = self.s3_utils.retrieve_inventory(glacier, vault_name)
+ print('jobid %s'%response['jobId'])
+ self.assertTrue(response['jobId'] != None)
+
+ @mock_glacier
+ @mock_s3
+ def test_retrieve_inventory_results(self):
+ """
+ Once the job has been completed, use the job id to retrieve archive results
+ """
+
+ # Connect to your glacier vault for retrieval
+ glacier = mock.Mock(spec=Layer1)#self.s3_utils.connect('client', 'glacier', self.region)
+ vault_name = 'archive-vault-new'
+ glacier.create_vault(vaultName=vault_name)
+
+ body_json = {'Body': [{'test':'value'}]}
+ body_encoded = json.dumps(body_json)#.encode("utf-16")
+
+ body = StreamingBody(
+ StringIO(str(body_encoded)),
+ len(str(body_encoded))
+ )
+
+ mocked_response = {
+ 'body': body
+ }
+ glacier.get_job_output.return_value = mocked_response
+ with mock.patch('boto.glacier.job.tree_hash_from_str') as t:
+ t.return_value = 'tree_hash'
+ inventory = self.s3_utils.retrieve_inventory_results(vault_name, glacier, 'ASDF78')
+
+ self.assertEqual(body_json, inventory)
+
+ @mock_s3
+ def test_extra_parameters_constructor(self):
+ testParams = {"access_key": "blah",
+ "secret_key": "blah",
+ "log_level": "DEBUG",
+ "extra": "extra value"}
+ self.assertRaises(Exception, S3Utils(**testParams))
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/onestop-python-client/test/unit/util/test_SqsConsumer.py b/onestop-python-client/test/unit/util/test_SqsConsumer.py
new file mode 100644
index 0000000..03ee897
--- /dev/null
+++ b/onestop-python-client/test/unit/util/test_SqsConsumer.py
@@ -0,0 +1,178 @@
+import unittest
+import json
+
+from moto import mock_sqs
+from unittest.mock import MagicMock, ANY
+from onestop.util.S3Utils import S3Utils
+from onestop.util.SqsConsumer import SqsConsumer
+
+class SqsConsumerTest(unittest.TestCase):
+ config_dict = {
+ 'access_key': 'test_access_key',
+ 'secret_key': 'test_secret_key',
+ 's3_region': 'us-east-2',
+ 's3_bucket': 'archive-testing-demo',
+ 'sqs_url': 'https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs',
+ 'metadata_type': 'COLLECTION',
+ 'file_id_prefix': 'gov.noaa.ncei.csb:',
+ 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177',
+ 'registry_base_url': 'http://localhost/onestop/api/registry',
+ 'registry_username': 'admin',
+ 'registry_password': 'whoknows',
+ 'onestop_base_url': 'http://localhost/onestop/api/search/search',
+ 'log_level': 'DEBUG'
+ }
+
+ records = [{"eventVersion":"2.1"}]
+ message = json.dumps(
+ {"Type": "Notification",
+ "MessageId": "9d0691d2-ae9c-58f9-a9f4-c8dcf05d87be",
+ "TopicArn": "arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1",
+ "Subject": "Amazon S3 Notification",
+ "Message": json.dumps({"Records": records}),
+ "Timestamp": "2021-05-06T21:15:45.427Z",
+ "SignatureVersion": "1",
+ "Signature": "Ui5s4uVgcMr5fjGmePCMgmi14Dx9oS8hIpjXXiQo+xZPgsHkUayz7dEeGmMGGt45l8blmZTZEbxJG+HVGfIUmQGRqoimwiLm+mIAaNIN/BV76FVFcQUIkORX8gYN0a4RS3HU8/ElrKFK8Iz0zpxJdjwxa3xPCDwu+dTotiLTJxSouvg8MmkkDnq758a8vZ9WK2PaOlZiZ3m8Mv2ZvLrozZ/DAAz48HSad6Mymhit82RpGCUxy4SDwXVlP/nLB01AS11Gp2HowJR8NXyStrZYzzQEc+PebITaExyikgTMiVhRHkmb7JrtZPpgZu2daQsSooqpwyIzb6pvgwu9W54jkw==",
+ "SigningCertURL": "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem",
+ "UnsubscribeURL": "https://sns.us-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1:e7a9a9f5-792e-48a6-9ec8-40f7f5a8f600"
+ })
+
+ message_wo_records = json.dumps(
+ {"Type": "Notification",
+ "MessageId": "9d0691d2-ae9c-58f9-a9f4-c8dcf05d87be",
+ "TopicArn": "arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1",
+ "Subject": "Amazon S3 Notification",
+ "Message": "{}",
+ "Timestamp": "2021-05-06T21:15:45.427Z",
+ "SignatureVersion": "1",
+ "Signature": "Ui5s4uVgcMr5fjGmePCMgmi14Dx9oS8hIpjXXiQo+xZPgsHkUayz7dEeGmMGGt45l8blmZTZEbxJG+HVGfIUmQGRqoimwiLm+mIAaNIN/BV76FVFcQUIkORX8gYN0a4RS3HU8/ElrKFK8Iz0zpxJdjwxa3xPCDwu+dTotiLTJxSouvg8MmkkDnq758a8vZ9WK2PaOlZiZ3m8Mv2ZvLrozZ/DAAz48HSad6Mymhit82RpGCUxy4SDwXVlP/nLB01AS11Gp2HowJR8NXyStrZYzzQEc+PebITaExyikgTMiVhRHkmb7JrtZPpgZu2daQsSooqpwyIzb6pvgwu9W54jkw==",
+ "SigningCertURL": "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem",
+ "UnsubscribeURL": "https://sns.us-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1:e7a9a9f5-792e-48a6-9ec8-40f7f5a8f600"
+ })
+
+ @mock_sqs
+ def setUp(self):
+ print("Set it up!")
+
+ self.s3_utils = S3Utils(**self.config_dict)
+ self.sqs_consumer = SqsConsumer(**self.config_dict)
+
+ def tearDown(self):
+ print("Tear it down!")
+
+ @mock_sqs
+ def test_connect(self):
+ queue_name = 'test'
+ sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region'])
+ expQueue = sqs_resource.create_queue(QueueName=queue_name)
+ queue = self.sqs_consumer.connect(sqs_resource, queue_name)
+
+ self.assertEqual(expQueue.url, queue.url)
+
+ # Kind of pointless since we catch every exception this doesn't fail when it should....
+ @mock_sqs
+ def test_receive_messages_no_records(self):
+ mock_cb = MagicMock()
+
+ # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL
+ queue_name = 'test_queue'
+ sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region'])
+ sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url
+
+ # Send a test message lacking Records field
+ sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region'])
+ sqs_client.send_message(
+ QueueUrl=sqs_queue_url,
+ MessageBody= self.message_wo_records
+ )
+ queue = sqs_resource.Queue(queue_name)
+
+ self.sqs_consumer.receive_messages(queue, 1, mock_cb)
+
+ # Verify callback function was called once with expected message attributes
+ mock_cb.assert_not_called()
+
+ @mock_sqs
+ def test_receive_messages_fails_invalid_sqs_max_polls(self):
+ with self.assertRaises(ValueError):
+ self.sqs_consumer.receive_messages(MagicMock(), 0, MagicMock())
+
+ @mock_sqs
+ def test_receive_messages_polls_msgs_expected_times(self):
+ mock_cb = MagicMock()
+ queue = MagicMock()
+
+ sqs_max_polls = 2
+ self.sqs_consumer.receive_messages(queue, sqs_max_polls, mock_cb)
+
+ # Verify polling called expected times
+ self.assertEqual(queue.receive_messages.call_count, sqs_max_polls)
+
+ @mock_sqs
+ def test_receive_messages_callback_occurs(self):
+ mock_cb = MagicMock()
+
+ # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL
+ queue_name = 'test_queue'
+ sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region'])
+ sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url
+
+ # Send a test message
+ sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region'])
+ sqs_client.send_message(
+ QueueUrl=sqs_queue_url,
+ MessageBody= self.message
+ )
+ queue = sqs_resource.Queue(queue_name)
+
+ self.sqs_consumer.receive_messages(queue, 1, mock_cb)
+
+ # Verify callback function was called once with expected message attributes
+ mock_cb.assert_called_with(self.records, ANY)
+
+ @mock_sqs
+ def test_happy_path(self):
+ mock_cb = MagicMock()
+
+ # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL
+ queue_name = 'test_queue'
+ sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region'])
+ queue = self.sqs_consumer.connect(sqs_resource, queue_name) #sqs_resource.create_queue(QueueName=queue_name)
+
+ # Send a test message
+ sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region'])
+ sqs_client.send_message(
+ QueueUrl=queue.url,
+ MessageBody= self.message
+ )
+
+ self.sqs_consumer.receive_messages(queue, 1, mock_cb)
+
+ # Verify callback function was called once with expected message attributes
+ mock_cb.assert_called_with(self.records, ANY)
+
+ # An example using external send/receive methods
+ @unittest.skip
+ @mock_sqs
+ def test_write_message_valid(self):
+ "Test the write_message method with a valid message"
+ sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region'])
+ sqs = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region'])
+ queue = sqs.create_queue(QueueName='test-skype-sender')
+ self.sqs_consumer.sqs_url = queue.url
+ skype_message = 'Testing with a valid message'
+ channel = 'test'
+ expected_message = str({'msg':f'{skype_message}', 'channel':channel})
+ message = str({'msg':f'{skype_message}', 'channel':channel})
+ queue.send_message(MessageBody=(message))
+
+ sqs_messages = queue.receive_messages()
+ print('Message: %s'%sqs_messages)
+ print('Message0: %s'%sqs_messages[0])
+ assert sqs_messages[0].body == expected_message, 'Message in skype-sender does not match expected'
+ print(f'The message in skype-sender SQS matches what we sent')
+ assert len(sqs_messages) == 1, 'Expected exactly one message in SQS'
+ print(f'\nExactly one message in skype-sender SQS')
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
diff --git a/onestop-python-client/tests/utils.py b/onestop-python-client/test/utils.py
similarity index 83%
rename from onestop-python-client/tests/utils.py
rename to onestop-python-client/test/utils.py
index 2f1e6d5..fc124fb 100644
--- a/onestop-python-client/tests/utils.py
+++ b/onestop-python-client/test/utils.py
@@ -15,7 +15,8 @@ def create_delete_message(region, bucket, key):
"Message": '''{
"Records": [{
"eventVersion": "2.1", "eventSource": "aws:s3", "awsRegion": "''' + region + '''",
- "eventTime": "2020-12-14T20:56:08.725Z", "eventName": "ObjectRemoved:Delete",
+ "eventTime": "2020-12-14T20:56:08.725Z",
+ "eventName": "ObjectRemoved:Delete",
"userIdentity": {"principalId": "AX8TWPQYA8JEM"},
"requestParameters": {"sourceIPAddress": "65.113.158.185"},
"responseElements": {"x-amz-request-id": "D8059E6A1D53597A",
@@ -25,7 +26,11 @@ def create_delete_message(region, bucket, key):
"bucket": {"name": "''' + bucket + '''",
"ownerIdentity": {"principalId": "AX8TWPQYA8JEM"},
"arn": "arn:aws:s3:::''' + bucket + '''"},
- "object": {"key": "''' + key + '''", "sequencer": "005FD7D1765F04D8BE"}
+ "object": {"key": "''' + key + '''",
+ "sequencer": "005FD7D1765F04D8BE",
+ "eTag": "44d2452e8bc2c8013e9c673086fbab7a",
+ "size": 1385,
+ "versionId": "q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf"}
}
}]
}''',
diff --git a/onestop-python-client/tests/KafkaPublisherTest.py b/onestop-python-client/tests/KafkaPublisherTest.py
deleted file mode 100644
index 7d992ae..0000000
--- a/onestop-python-client/tests/KafkaPublisherTest.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import unittest
-
-import json
-
-from onestop.KafkaPublisher import KafkaPublisher
-
-class KafkaPublisherTest(unittest.TestCase):
- kp = None
-
- def setUp(self):
- print("Set it up!")
- self.kp = KafkaPublisher("../config/kafka-publisher-config-dev.yml")
-
- def tearDown(self):
- print("Tear it down!")
-
- def test_parse_config(self):
- self.assertFalse(self.kp.conf['brokers']==None)
-
- def test_publish_collection(self):
- print("Publish collection")
- # Integration test TBD
-
-if __name__ == '__main__':
- unittest.main()
\ No newline at end of file
diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/tests/SqsHandlersTest.py
deleted file mode 100644
index 12323ef..0000000
--- a/onestop-python-client/tests/SqsHandlersTest.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import json
-import unittest
-import boto3
-
-from moto import mock_s3
-from moto import mock_sqs
-from tests.utils import abspath_from_relative, create_delete_message
-from onestop.WebPublisher import WebPublisher
-from onestop.util.S3Utils import S3Utils
-from onestop.util.S3MessageAdapter import S3MessageAdapter
-from onestop.util.SqsConsumer import SqsConsumer
-from onestop.util.SqsHandlers import create_delete_handler
-
-
-class SqsHandlerTest(unittest.TestCase):
- wp = None
- su = None
- s3ma = None
- sqs = None
- wp_config = abspath_from_relative(__file__, "../config/web-publisher-config-local.yml")
- aws_config = abspath_from_relative(__file__, "../config/aws-util-config-dev.yml")
- cred_config = abspath_from_relative(__file__, "../config/credentials-template.yml")
- csb_config = abspath_from_relative(__file__, "../config/csb-data-stream-config.yml")
-
- collection_uuid = '5b58de08-afef-49fb-99a1-9c5d5c003bde'
- payloadDict = {
- "fileInformation": {
- "name": "OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc",
- "size": 30551050,
- "checksums": [{
- "algorithm": "SHA1",
- "value": "bf4c5b58f8d5f9445f7b277f988e5861184f775a"
- }],
- "format": "NetCDF"
- },
- "relationships": [{
- "type": "COLLECTION",
- "id": collection_uuid
- }],
- "fileLocations": {
- "s3://noaa-goes16/ABI-L1b-RadF/2019/298/17/OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc": {
- "uri": "s3://noaa-goes16/ABI-L1b-RadF/2019/298/17/OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc",
- "type": "ACCESS",
- "deleted": "false",
- "restricted": "false",
- "asynchronous": "false",
- "locality": "us-east-2",
- "lastModified": 1572025823000,
- "serviceType": "Amazon:AWS:S3",
- "optionalAttributes": {}
- }
- }
- }
-
- def setUp(self):
- print("Set it up!")
- self.wp = WebPublisher(self.wp_config, self.cred_config)
- self.su = S3Utils(self.aws_config, self.cred_config)
- self.s3ma = S3MessageAdapter(self.csb_config, self.su)
-
- def tearDown(self):
- print("Tear it down!")
-
- @mock_s3
- @mock_sqs
- def init_s3(self):
- bucket = self.su.conf['s3_bucket']
- key = self.su.conf['s3_key']
- boto_client = self.su.connect("s3", None)
- boto_client.create_bucket(Bucket=bucket)
- boto_client.put_object(Bucket=bucket, Key=key, Body="foobar")
-
- sqs_client = boto3.client('sqs', region_name=self.su.conf['s3_region'])
- sqs_queue = sqs_client.create_queue(QueueName=self.su.conf['sqs_name'])
- self.sqs = SqsConsumer(self.aws_config, self.cred_config)
- message = create_delete_message(self.su.conf['s3_region'], bucket, key)
- sqs_client.send_message(QueueUrl=sqs_queue['QueueUrl'], MessageBody=json.dumps(message))
- return sqs_queue['QueueUrl']
-
- def delete_handler_wrapper(self, recs):
- handler = create_delete_handler(self.wp)
- result = handler(recs)
- self.assertTrue(result)
-
- @mock_sqs
- def test_delete_handler(self):
- mock_queue_url = self.init_s3()
- sqs_queue = boto3.resource('sqs', region_name=self.su.conf['s3_region']).Queue(mock_queue_url)
- self.sqs.receive_messages(sqs_queue, self.su.conf['sqs_max_polls'], self.delete_handler_wrapper)
diff --git a/onestop-python-client/tests/extractor/CsbExtractorTest.py b/onestop-python-client/tests/extractor/CsbExtractorTest.py
deleted file mode 100644
index 7dbbc9e..0000000
--- a/onestop-python-client/tests/extractor/CsbExtractorTest.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import unittest
-from onestop.extract.CsbExtractor import CsbExtractor
-from onestop.util.S3Utils import S3Utils
-from tests.utils import abspath_from_relative
-
-
-class CsbExtractorTest(unittest.TestCase):
-
- # def setUp(self):
- # print("Set it up!")
- # file_name = '../data/file4.csv'
- # self.csb_extractor = CsbExtractor(file_name)
-
- def setUp(self):
- print("Set it up!")
- key = "public/NESDIS/CSB/file4.csv"
- self.su = S3Utils( abspath_from_relative( __file__, "../../config/aws-util-config-dev.yml" ),
- abspath_from_relative(__file__, "../../config/credentials.yml") )
- self.csb_extractor = CsbExtractor(self.su, key)
-
- def tearDown(self):
- print("Tear it down!")
-
- def test_is_csv(self):
- csv_str = '.csv'
- self.assertTrue(self.csb_extractor.is_csv(self.csb_extractor.file_name))
-
-
- def test_get_geospatial_temporal_bounds(self):
- bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME')
- coords = bounds_dict["geospatial"]
- print(str(coords))
- self.assertEqual(coords[0], -96.847995)
- self.assertEqual(coords[1], 29.373065)
- self.assertEqual(coords[2], -92.747995)
- self.assertEqual(coords[3], 33.373065)
-
- date_rng = bounds_dict["temporal"]
- self.assertEqual(date_rng[0], '2018-04-10T14:00:06.000Z' )
- self.assertEqual(date_rng[1], '2020-04-10T14:00:06.000Z' )
-
-
- def test_get_min_lon(self):
- bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME')
- coords = bounds_dict["geospatial"]
- min_lon = coords[0]
- self.assertEqual(min_lon, -96.847995)
-
-
- def test_get_max_datetime(self):
- bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME')
- date_rng = bounds_dict["temporal"]
- end_date = date_rng[1]
- self.assertEqual(end_date, '2020-04-10T14:00:06.000Z')
-
-
- def test_get_min_datetime(self):
- bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME')
- date_rng = bounds_dict["temporal"]
- begin_date = date_rng[0]
- self.assertEqual(begin_date, '2018-04-10T14:00:06.000Z')
-
-
- def test_extract_coords(self):
- bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME')
- coords = bounds_dict["geospatial"]
-
- min_lon = coords[0]
- min_lat = coords[1]
- max_lon = coords[2]
- max_lat = coords[3]
-
- coords = self.csb_extractor.extract_coords(max_lon, max_lat, min_lon, min_lat)
- result = [[
- -94.847995,
- 29.373065
- ],
- [
- -96.847995,
- 29.373065
- ],
- [
- -94.847995,
- 33.373065
- ],
- [
- -92.747995,
- 29.383065
- ]
- ]
- self.assertEqual(coords, result)
-
-
-if __name__ == '__main__':
- unittest.main()
\ No newline at end of file
diff --git a/onestop-python-client/tests/util/IntegrationTest.py b/onestop-python-client/tests/util/IntegrationTest.py
deleted file mode 100644
index 381e4d7..0000000
--- a/onestop-python-client/tests/util/IntegrationTest.py
+++ /dev/null
@@ -1 +0,0 @@
-#TBD
\ No newline at end of file
diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py
deleted file mode 100644
index 34850ad..0000000
--- a/onestop-python-client/tests/util/S3UtilsTest.py
+++ /dev/null
@@ -1,209 +0,0 @@
-import csv
-import unittest
-import uuid
-from moto import mock_s3
-from moto import mock_glacier
-
-from tests.utils import abspath_from_relative
-from onestop.util.S3Utils import S3Utils
-
-class S3UtilsTest(unittest.TestCase):
- su = None
-
- def setUp(self):
- print("Set it up!")
- self.su = S3Utils(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"),
- abspath_from_relative(__file__, "../../config/credentials.yml"))
-
- def tearDown(self):
- print("Tear it down!")
- # Remove files from bucket
-
- def test_parse_config(self):
- self.assertFalse(self.su.conf['sqs_url']==None)
-
- @mock_s3
- def test_get_uuid_metadata(self):
- boto_client = self.su.connect("s3_resource", None)
- s3_key = "csv/file1.csv"
- bucket = self.su.conf['s3_bucket']
- region = self.su.conf['s3_region']
- location = {'LocationConstraint': region}
- boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location)
- obj_uuid = str(uuid.uuid4())
- boto_client.Object(bucket, s3_key).put(Bucket=bucket, Key=s3_key, Body="my_body", Metadata={'object-uuid': obj_uuid})
-
- self.assertFalse(self.su.get_uuid_metadata(boto_client, bucket, s3_key) == None)
-
- @mock_s3
- def test_add_uuid_metadata(self):
- region = self.su.conf['s3_region']
- boto_client = self.su.connect("s3_resource", region)
-
- s3_key = "csv/file1.csv"
- bucket = self.su.conf['s3_bucket']
-
- location = {'LocationConstraint': region}
- boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location)
- boto_client.Object(bucket, s3_key).put(Bucket=bucket, Key=s3_key, Body="my_body")
-
- self.assertTrue(self.su.add_uuid_metadata(boto_client, bucket, s3_key))
-
- @mock_s3
- def test_add_file_s3(self):
- boto_client = self.su.connect("s3", None)
- local_file = abspath_from_relative(__file__, "../data/file4.csv")
- s3_key = "csv/file4.csv"
- bucket = self.su.conf['s3_bucket']
- region = self.su.conf['s3_region']
- location = {'LocationConstraint': region}
- boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location)
- overwrite = True
-
- self.assertTrue(self.su.upload_s3(boto_client, local_file, bucket, s3_key, overwrite))
-
- def test_get_csv_s3(self):
- boto_client = self.su.connect("session", None)
- s3_key = "csv/file1.csv"
- bucket = self.su.conf['s3_bucket']
- sm_open_file = self.su.get_csv_s3(boto_client, bucket, s3_key)
-
- # print("reading csv:" + line.decode('utf-8'))
- csv_reader = csv.DictReader(sm_open_file)
- for row in csv_reader:
- print(str(row["LON"]))
-
- def test_read_bytes_s3(self):
- boto_client = self.su.connect("s3", None)
- s3_key = "csv/file1.csv"
- bucket = self.su.conf['s3_bucket']
- self.assertTrue(self.su.read_bytes_s3(boto_client, bucket, s3_key))
-
- @mock_s3
- def test_add_files(self):
- boto_client = self.su.connect("s3", None)
- local_files = ["file1_s3.csv", "file2.csv", "file3.csv"]
- bucket = self.su.conf['s3_bucket']
- region = self.su.conf['s3_region']
- location = {'LocationConstraint': region}
- boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location)
- overwrite = True
- s3_file = None
- for file in local_files:
- local_file = abspath_from_relative(__file__, "../data/" + file)
- s3_file = "csv/" + file
- self.assertTrue(self.su.upload_s3(boto_client, local_file, bucket, s3_file, overwrite))
-
- @mock_s3
- @mock_glacier
- def test_s3_cross_region(self):
- print('Cross Region Vault Upload ------------- ')
- key = "csv/file1.csv"
- # grabs te region and bucket name from the config file
- region = self.su.conf['s3_region']
- bucket = self.su.conf['s3_bucket']
-
- # makes connection to low level s3 client
- s3 = self.su.connect('s3', region)
- location = {'LocationConstraint': region}
- s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location)
- s3.put_object(Bucket=bucket, Key=key, Body="body")
-
- # Reads object data and stores it into a variable
- file_data = self.su.read_bytes_s3(s3, bucket, key)
-
- # Redirecting upload to vault in second region
- glacier = self.su.connect("glacier", self.su.conf['s3_region2'])
- vault_name = self.su.conf['vault_name']
- glacier.create_vault(vaultName=vault_name)
- print('vault name: ' + str(vault_name))
- print('region name: ' + str(self.su.conf['s3_region2']))
- print('-------file data---------')
- print(file_data)
- response = self.su.upload_archive(glacier, vault_name, file_data)
-
- self.assertTrue(response['archiveId']!=None)
-
- @mock_s3
- @mock_glacier
- def test_s3_to_glacier(self):
- """
- Changes the storage class of an object from S3 to Glacier
- Requires the configure and credential locations as parameters as well as the key of the object
- """
-
- print("S3 to Glacier---------")
- key = "csv/file1_s3.csv"
- # grabs te region and bucket name from the config file
- region = self.su.conf['s3_region']
- bucket = self.su.conf['s3_bucket']
-
- # Create boto3 low level api connection
- s3 = self.su.connect('s3', region)
- location = {'LocationConstraint': region}
- s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location)
- s3.put_object(Bucket=bucket, Key=key, Body="body")
-
- # Using the S3 util class invoke the change of storage class
- response = self.su.s3_to_glacier(s3, bucket, key)
- print(response['ResponseMetadata']['HTTPHeaders']['x-amz-storage-class'])
- # Assert 'x-amz-storage-class': 'GLACIER'
-
- self.assertTrue(response['ResponseMetadata']['HTTPHeaders']['x-amz-storage-class'] == "GLACIER")
-
- @mock_s3
- def test_s3_restore(self):
- """
- Uses high level api to restore object from glacier to s3
- """
-
- region = self.su.conf['s3_region2']
- bucket = self.su.conf['s3_bucket']
- key = "csv/file1_s3.csv"
- days = 3
-
- # use high level api
- s3 = self.su.connect('s3_resource', region)
- location = {'LocationConstraint': region}
- s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location)
- s3.Object(bucket, key).put(Bucket=bucket, Key=key, Body="body")
-
- self.assertTrue(self.su.s3_restore(s3, bucket, key, days) != None)
-
- @mock_glacier
- def test_retrieve_inventory(self):
- """
- Initiates job for archive retrieval. Takes 3-5 hours to complete
- """
-
- # Using glacier api initiates job and returns archive results
- # Connect to your glacier vault for retrieval
- glacier = self.su.connect("glacier", self.su.conf['s3_region2'])
- vault_name = self.su.conf['vault_name']
- glacier.create_vault(vaultName=vault_name)
-
-
- response = self.su.retrieve_inventory(glacier, vault_name)
- self.assertTrue(response['jobId']!= None)
-
- '''
- Excluding for now because it's an asynchronous test
- def test_retrieve_inventory_results(self, jobid):
- """
- Once the job has been completed, use the job id to retrieve archive results
- """
-
- # Connect to your glacier vault for retrieval
- glacier = self.su.connect("glacier", self.su.conf['region'])
- vault_name = self.su.conf['vault_name']
-
- # Retrieve the job results
- inventory = self.su.retrieve_inventory_results(vault_name, glacier, jobid)
-
- self.assertTrue(inventory != None)
- '''
-
-
-
-if __name__ == '__main__':
- unittest.main()
\ No newline at end of file
diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/tests/util/SqsConsumerTest.py
deleted file mode 100644
index 4d6be77..0000000
--- a/onestop-python-client/tests/util/SqsConsumerTest.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import unittest
-import boto3
-from moto import mock_sqs
-from tests.utils import abspath_from_relative
-from onestop.util.SqsConsumer import SqsConsumer
-
-class SqsConsumerTest(unittest.TestCase):
- sc = None
-
- def setUp(self):
- print("Set it up!")
- self.sc = SqsConsumer(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"),
- abspath_from_relative(__file__, "../../config/credentials-template.yml"))
-
- def tearDown(self):
- print("Tear it down!")
-
- def test_parse_config(self):
- self.assertFalse(self.sc.conf['sqs_url']==None)
-
- @mock_sqs
- def test_poll_messages(self):
- # Create the mock queue beforehand and set its mock URL as the 'sqs_url' config value for SqsConsumer
- boto_session = boto3.Session(aws_access_key_id=self.sc.cred['sandbox']['access_key'],
- aws_secret_access_key=self.sc.cred['sandbox']['secret_key'])
- sqs_session = boto_session.resource('sqs', region_name=self.sc.conf['s3_region'])
- res = sqs_session.create_queue(QueueName="test_queue")
- self.sc.conf['sqs_url'] = res.url
- queue = self.sc.connect()
- self.sc.receive_messages(queue, self.sc.conf['sqs_max_polls'], lambda *args, **kwargs: None)
-
-
-if __name__ == '__main__':
- unittest.main()
\ No newline at end of file
diff --git a/scripts/README.md b/scripts/README.md
deleted file mode 100644
index e0276cb..0000000
--- a/scripts/README.md
+++ /dev/null
@@ -1,60 +0,0 @@
-#Scripts
-
-## Table of Contents
-* [Quickstart](#quickstart)
-* [Kubectl Pod Verification](#kubectl-pod-verification)
-* [Load Data](#load-data)
-* [Updating Containers](#updating-containers)
-
-This directory contains scripts that use the onestop-python-library to send data to a OneStop.
-
-## Quickstart
-- Install conda (miniconda works).
-- Restart terminal or source files to recognize conda commands.
-- Create a new conda environment and activate it
- - `conda create -n onestop-clients python=3`
- - `conda activate onestop-clients`
- - `pip install setuptools`
-
-- Install any libraries needed by your sme script
- - Ex: `pip install PyYaml`
-
-- Build the latest onestop-python-client
- - `pip uninstall onestop-python-client-cedardevs`
- - `pip install ./onestop-python-client` (run from root of this repository)
-
-- Input credentials for helm in the file `helm/onestop-sqs-consumer/values.yaml`
- - Then:
- - `helm uninstall sme`
- - `helm install sme helm/onestop-sqs-consumer`
-
-## Kubectl Pod Verification
-- Verify onestop-client pod is running, copy the pod name.
- - `kubectl get pods`
-
-- Exec into it
- - `kubectl exec -it -- sh` where the is listed in `kubectl get pods`
-
-- Check logs
- - `kubectl logs `
-
-## Load Data
-There are several repositories to aid in loading data into a OneStop. Please read the appropriate repository's readme for accurate and up to date usage information.
-
-- To load data locally you will need a OneStop running locally. This is an example of how to do that, more info in the OneStop repository.
- - `skaffold dev --status-check false`
-
-- To load test collections from onestop-test-data repository (read the README for more information) to your local OneStop:
- - `./upload.sh demo http://localhost/onestop/api/registry`
-
-- From the osim-deployment repository there is a staging-scripts directory with scripts for loading some data:
- - `./copyS3objects.sh -max_files=5 copy-config/archive-testing-demo-csb.sh`
-
-## Updating Containers
-- If the onestop-python-client code changes then run:
- - `docker build . -t cedardevs/onestop-python-client:latest`
-
-- If just the scripts change
- - `docker build ./scripts/sqs-to-registry -t cedardevs/onestop-s3-handler`
-
-
diff --git a/scripts/archive_client_integration.py b/scripts/archive_client_integration.py
index 2831045..be672f8 100644
--- a/scripts/archive_client_integration.py
+++ b/scripts/archive_client_integration.py
@@ -1,64 +1,74 @@
import argparse
-from onestop.util.S3Utils import S3Utils
-
-
-def handler():
- '''
- Simultaneously upload files to main bucket 'noaa-nccf-dev' in us-east-2 and glacier in cross region bucket 'noaa-nccf-dev-archive' in us-west-2.
-
- :return: str
- Returns response from boto3 indicating if upload was successful.
- '''
- print("Handler...")
+import yaml
+import os
- # config for s3 low level api for us-east-2
- s3 = s3_utils.connect('s3', s3_utils.conf['s3_region'])
- bucket_name = s3_utils.conf['s3_bucket']
-
- # config for s3 low level api cross origin us-west-2
- s3_cross_region = s3_utils.connect('s3', s3_utils.conf['s3_region2'])
- bucket_name_cross_region = s3_utils.conf['s3_bucket2']
-
- overwrite = True
-
- # Add 3 files to bucket
- local_files = ["file1.csv", "file2.csv"]
- s3_file = None
- for file in local_files:
- local_file = "tests/data/" + file
- # changed the key for testing
- s3_file = "public/NESDIS/CSB/" + file
- s3_utils.upload_s3(s3, local_file, bucket_name, s3_file, overwrite)
+from onestop.util.S3Utils import S3Utils
- # Upload file to cross region bucket then transfer to glacier right after
- s3_utils.upload_s3(s3_cross_region, local_file, bucket_name_cross_region, s3_file, overwrite)
- s3_utils.s3_to_glacier(s3_cross_region, bucket_name_cross_region, s3_file)
+config_dict = {}
if __name__ == '__main__':
+ # Example command: python3 archive_client_integration.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml
+ # python3 archive_client_integration.py -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml
parser = argparse.ArgumentParser(description="Launches archive client integration")
- parser.add_argument('-conf', dest="conf", required=True,
+ # Set default config location to the Helm mounted pod configuration location
+ parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml',
help="AWS config filepath")
-
parser.add_argument('-cred', dest="cred", required=True,
help="Credentials filepath")
args = vars(parser.parse_args())
- # Get configuration file path locations
+ # Generate configuration dictionary
conf_loc = args.pop('conf')
+ with open(conf_loc) as f:
+ config_dict.update(yaml.load(f, Loader=yaml.FullLoader))
+
+ # Get credentials from passed in fully qualified path or ENV.
cred_loc = args.pop('cred')
+ if cred_loc is not None:
+ with open(cred_loc) as f:
+ creds = yaml.load(f, Loader=yaml.FullLoader)
+ registry_username = creds['registry']['username']
+ registry_password = creds['registry']['password']
+ access_key = creds['sandbox']['access_key']
+ access_secret = creds['sandbox']['secret_key']
+ else:
+ print("Using env variables for config parameters")
+ registry_username = os.environ.get("REGISTRY_USERNAME")
+ registry_password = os.environ.get("REGISTRY_PASSWORD")
+ access_key = os.environ.get("ACCESS_KEY")
+ access_secret = os.environ.get("SECRET_KEY")
+
+ config_dict.update({
+ 'registry_username' : registry_username,
+ 'registry_password' : registry_password,
+ 'access_key' : access_key,
+ 'secret_key' : access_secret
+ })
# Upload a test file to s3 bucket
- s3_utils = S3Utils(conf_loc, cred_loc)
-
- handler()
-
-
-
-
+ s3_utils = S3Utils(**config_dict)
-
+ s3 = s3_utils.connect('client', 's3', config_dict['s3_region'])
+ # config for s3 low level api cross origin us-west-2
+ s3_cross_region = s3_utils.connect('client', 's3', config_dict['s3_region2'])
+ bucket_name_cross_region = config_dict['s3_bucket2']
+ overwrite = True
+ # Files to upload - TODO: User should change these paths.
+ local_files = ["/scripts/data/file1.csv", "/scripts/data/file2.csv"]
+ for file in local_files:
+ print("Uploading file: %s"%file)
+ # changed the key for testing
+ s3_file = "public/NESDIS/CSB/" + file
+ upload = s3_utils.upload_s3(s3, file, config_dict['s3_bucket'], s3_file, overwrite)
+ if not upload:
+ raise Exception("Unknown, upload to s3 failed.")
+ # Upload file to cross region bucket then transfer to glacier right after
+ upload = s3_utils.upload_s3(s3_cross_region, file, bucket_name_cross_region, s3_file, overwrite)
+ if not upload:
+ raise Exception("Unknown, upload to s3 failed.")
+ s3_utils.s3_to_glacier(s3_cross_region, bucket_name_cross_region, s3_file)
diff --git a/scripts/bucket_automation.py b/scripts/bucket_automation.py
index a64f11c..5c922ee 100644
--- a/scripts/bucket_automation.py
+++ b/scripts/bucket_automation.py
@@ -1,7 +1,12 @@
import argparse
import json
+import os
+import yaml
+
from onestop.util.S3Utils import S3Utils
+config_dict = {}
+
def handler():
'''
Creates bucket with defined key paths
@@ -10,43 +15,42 @@ def handler():
Returns boto3 response indicating if bucket creation was successful
'''
# connect to low level api
- s3 = s3_utils.connect("s3", s3_utils.conf['s3_region'])
+ s3 = s3_utils.connect('client', 's3', config_dict['s3_region'])
# use s3_resource api to check if the bucket exists
- s3_resource = s3_utils.connect("s3_resource", s3_utils.conf['s3_region'])
+ s3_resource = s3_utils.connect('resource', 's3', config_dict['s3_region'])
# Create bucket name
bucket_name = "noaa-nccf-dev"
- # checks to see if the bucket is already created, if it isn't create yet then it will create the bucket, set bucket policy, and create key paths
+ # Create bucket policy
+ bucket_policy = {
+ "Version": "2012-10-17",
+ "Id": "noaa-nccf-dev-policy",
+ "Statement": [
+ {
+ "Sid": "PublicRead",
+ "Effect": "Allow",
+ "Principal": "*",
+ "Action": "s3:GetObject",
+ "Resource": f'arn:aws:s3:::{bucket_name}/public/*'
+ }]
+ }
+ # Convert the policy from JSON dict to string
+ bucket_policy_str = json.dumps(bucket_policy)
+
+ # checks to see if the bucket is already created, if it isn't create it, then it will create the bucket, set bucket policy, and create key paths
if not s3_resource.Bucket(bucket_name) in s3_resource.buckets.all():
"""
- Create bucket
- need to specify bucket location for every region except us-east-1 -> https://github.com/aws/aws-cli/issues/2603
"""
s3.create_bucket(Bucket=bucket_name,
- CreateBucketConfiguration={'LocationConstraint': s3_utils.conf['s3_region']},
+ CreateBucketConfiguration={'LocationConstraint': config_dict['s3_region']},
ObjectLockEnabledForBucket=True)
- # Create bucket policy
- bucket_policy = {
- "Version": "2012-10-17",
- "Id": "noaa-nccf-dev-policy",
- "Statement": [
- {
- "Sid": "PublicRead",
- "Effect": "Allow",
- "Principal": "*",
- "Action": "s3:GetObject",
- "Resource": f'arn:aws:s3:::{bucket_name}/public/*'
- }]
- }
-
- # Convert the policy from JSON dict to string
- bucket_policy = json.dumps(bucket_policy)
-
# Set new bucket policy
- s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy)
+ s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy_str)
"""
- Create Public Key Paths
@@ -86,6 +90,9 @@ def handler():
s3.put_object(Bucket=bucket_name, Body='', Key='private/OMAO/')
s3.put_object(Bucket=bucket_name, Body='', Key='private/OAR/')
+ else:
+ #Set bucket policy
+ s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy_str)
# Set CORS bucket config
cors_config = {
@@ -109,12 +116,6 @@ def handler():
}
s3.put_bucket_cors(Bucket=bucket_name, CORSConfiguration=cors_config)
- # Convert the policy from JSON dict to string
- bucket_policy = json.dumps(bucket_policy)
-
- #Set new bucket policy
- s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy)
-
"""
- Set ACL for public read
"""
@@ -131,18 +132,42 @@ def handler():
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Launches e2e test")
- parser.add_argument('-conf', dest="conf", required=True,
+ # Set default config location to the Helm mounted pod configuration location
+ parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml',
help="AWS config filepath")
-
parser.add_argument('-cred', dest="cred", required=True,
help="Credentials filepath")
args = vars(parser.parse_args())
- # Get configuration file path locations
+ # Generate configuration dictionary
conf_loc = args.pop('conf')
+ with open(conf_loc) as f:
+ config_dict.update(yaml.load(f, Loader=yaml.FullLoader))
+
+ # Get credentials from passed in fully qualified path or ENV.
cred_loc = args.pop('cred')
+ if cred_loc is not None:
+ with open(cred_loc) as f:
+ creds = yaml.load(f, Loader=yaml.FullLoader)
+ registry_username = creds['registry']['username']
+ registry_password = creds['registry']['password']
+ access_key = creds['sandbox']['access_key']
+ access_secret = creds['sandbox']['secret_key']
+ else:
+ print("Using env variables for config parameters")
+ registry_username = os.environ.get("REGISTRY_USERNAME")
+ registry_password = os.environ.get("REGISTRY_PASSWORD")
+ access_key = os.environ.get("ACCESS_KEY")
+ access_secret = os.environ.get("SECRET_KEY")
+
+ config_dict.update({
+ 'registry_username' : registry_username,
+ 'registry_password' : registry_password,
+ 'access_key' : access_key,
+ 'secret_key' : access_secret
+ })
# Create S3Utils instance
- s3_utils = S3Utils(conf_loc, cred_loc)
+ s3_utils = S3Utils(**config_dict)
handler()
\ No newline at end of file
diff --git a/scripts/config/aws-util-config-dev.yml b/scripts/config/aws-util-config-dev.yml
index e054f49..9102be0 100644
--- a/scripts/config/aws-util-config-dev.yml
+++ b/scripts/config/aws-util-config-dev.yml
@@ -1,5 +1,4 @@
# Example config values for osim client
-log_level: INFO
# AWS config values
sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs
diff --git a/scripts/config/aws-util-config-test.yml b/scripts/config/aws-util-config-test.yml
index 6aac07a..9de4618 100644
--- a/scripts/config/aws-util-config-test.yml
+++ b/scripts/config/aws-util-config-test.yml
@@ -1,5 +1,4 @@
# Example config values for osim client
-log_level: DEBUG
# AWS config values
sqs_url: 'test-queue'
diff --git a/scripts/config/csb-data-stream-config.yml b/scripts/config/csb-data-stream-config.yml
index 1556ab9..f110852 100644
--- a/scripts/config/csb-data-stream-config.yml
+++ b/scripts/config/csb-data-stream-config.yml
@@ -1,12 +1,15 @@
-log_level: INFO
+# COLLECTION or GRANULE
+kafka_consumer_metadata_type: COLLECTION
+kafka_publisher_metadata_type: COLLECTION
+s3_message_adapter_metadata_type: COLLECTION
+
format: csv
headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER
-type: COLLECTION
collection_id: fdb56230-87f4-49f2-ab83-104cfd073177
-psi_registry_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com
+registry_base_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com
access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com
#access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com
-file_identifier_prefix: "gov.noaa.ncei.csb:"
+file_id_prefix: "gov.noaa.ncei.csb:"
prefixMap:
NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177'
diff --git a/scripts/config/kafka-publisher-config-dev.yml b/scripts/config/kafka-publisher-config-dev.yml
index 85a66f3..8a94bf3 100644
--- a/scripts/config/kafka-publisher-config-dev.yml
+++ b/scripts/config/kafka-publisher-config-dev.yml
@@ -1,5 +1,4 @@
# Example config values for osim client
-log_level: DEBUG
# COLLECTION or GRANULE
metadata_type: GRANULE
@@ -7,8 +6,8 @@ metadata_type: GRANULE
# Kafka config values
brokers: onestop-dev-cp-kafka:9092
schema_registry: http://onestop-dev-cp-schema-registry:8081
-collection_topic_produce: psi-granules-by-collection
-granule_topic_produce: psi-granule-parsed
+collection_topic_publish: psi-granules-by-collection
+granule_topic_publish: psi-granule-parsed
collection_topic_consume: psi-collection-input-unknown
granule_topic_consume: psi-granule-input-unknown
group_id: sme-test
diff --git a/scripts/config/web-publisher-config-dev.yml b/scripts/config/web-publisher-config-dev.yml
index 9b08391..387d252 100644
--- a/scripts/config/web-publisher-config-dev.yml
+++ b/scripts/config/web-publisher-config-dev.yml
@@ -1,5 +1,4 @@
# Example config values for osim client
-log_level: INFO
# COLLECTION or GRANULE
metadata_type: granule
diff --git a/scripts/config/web-publisher-config-local.yml b/scripts/config/web-publisher-config-local.yml
index 32db955..3ce7d88 100644
--- a/scripts/config/web-publisher-config-local.yml
+++ b/scripts/config/web-publisher-config-local.yml
@@ -1,5 +1,4 @@
# Example config values for osim client
-log_level: INFO
# COLLECTION or GRANULE
metadata_type: granule
diff --git a/scripts/launch_delete_handler.py b/scripts/launch_delete_handler.py
index 7bb3983..6d000d4 100644
--- a/scripts/launch_delete_handler.py
+++ b/scripts/launch_delete_handler.py
@@ -1,79 +1,59 @@
-import json
-import boto3
import argparse
-from moto import mock_s3
-from moto import mock_sqs
-from tests.utils import create_delete_message
+import os
+import yaml
+
from onestop.WebPublisher import WebPublisher
from onestop.util.S3Utils import S3Utils
from onestop.util.SqsConsumer import SqsConsumer
from onestop.util.SqsHandlers import create_delete_handler
-
-def mock_init_s3(s3u):
- """ Sets up bucket, object, SQS queue, and delete message.
-
- Assumes there are additional keys passed in via config
-
- :param s3u: S3Utils object
- :return: URL of the mock queue created in SQS
- """
- boto_client = s3u.connect("s3", None)
- bucket = s3u.conf['s3_bucket']
- region = s3u.conf['s3_region']
- key = s3u.conf['s3_key']
- boto_client.create_bucket(Bucket=bucket)
- boto_client.put_object(Bucket=bucket, Key=key, Body="foobar")
-
- sqs_client = boto3.client('sqs', region_name=region)
- sqs_queue = sqs_client.create_queue(QueueName=s3u.conf['sqs_name'])
- message = create_delete_message(region, bucket, key)
- sqs_client.send_message(QueueUrl=sqs_queue['QueueUrl'], MessageBody=json.dumps(message))
- return sqs_queue['QueueUrl']
-
+config_dict = {}
if __name__ == '__main__':
# All command-line arguments have defaults that use test data, with AWS mocking set to true
parser = argparse.ArgumentParser(description="Launches SQS delete test")
- parser.add_argument('--aws-conf', dest="aws_conf", required=False, default="config/aws-util-config-test.yml",
+ # Set default config location to the Helm mounted pod configuration location
+ parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml',
help="AWS config filepath")
- parser.add_argument('--osim-conf', dest="osim_conf", required=False, default="config/web-publisher-config-local.yml",
- help="OSIM config filepath")
- parser.add_argument('-mock', dest="mock", required=False, default=True, help="Use mock AWS or real values")
-
- parser.add_argument('-cred', dest="cred", required=False, default="config/credentials-template.yml",
+ parser.add_argument('-cred', dest="cred", required=True,
help="Credentials filepath")
args = vars(parser.parse_args())
- wp_config = args.pop('osim_conf')
- aws_config = args.pop('aws_conf')
- cred_config = args.pop('cred')
- use_mocks = args.pop('mock')
-
- web_publisher = WebPublisher(wp_config, cred_config)
- s3_utils = S3Utils(aws_config, cred_config)
- sqs_consumer = SqsConsumer(aws_config, cred_config)
-
- if use_mocks is True:
- mock_1 = mock_s3()
- mock_2 = mock_sqs()
- mock_1.start()
- mock_2.start()
- mock_queue_url = mock_init_s3(s3_utils)
- # Need to override the config value here so that sqs_consumer.connect will use the correct url for the queue
- sqs_consumer.conf['sqs_url'] = mock_queue_url
-
- sqs_max_polls = s3_utils.conf['sqs_max_polls']
+ # Generate configuration dictionary
+ conf_loc = args.pop('conf')
+ with open(conf_loc) as f:
+ config_dict.update(yaml.load(f, Loader=yaml.FullLoader))
+
+ # Get credentials from passed in fully qualified path or ENV.
+ cred_loc = args.pop('cred')
+ if cred_loc is not None:
+ with open(cred_loc) as f:
+ creds = yaml.load(f, Loader=yaml.FullLoader)
+ registry_username = creds['registry']['username']
+ registry_password = creds['registry']['password']
+ access_key = creds['sandbox']['access_key']
+ access_secret = creds['sandbox']['secret_key']
+ else:
+ print("Using env variables for config parameters")
+ registry_username = os.environ.get("REGISTRY_USERNAME")
+ registry_password = os.environ.get("REGISTRY_PASSWORD")
+ access_key = os.environ.get("ACCESS_KEY")
+ access_secret = os.environ.get("SECRET_KEY")
+
+ config_dict.update({
+ 'registry_username' : registry_username,
+ 'registry_password' : registry_password,
+ 'access_key' : access_key,
+ 'secret_key' : access_secret
+ })
+
+ web_publisher = WebPublisher(**config_dict)
+ s3_utils = S3Utils(**config_dict)
+ sqs_consumer = SqsConsumer(**config_dict)
+
+ sqs_max_polls = config_dict['sqs_max_polls']
delete_handler = create_delete_handler(web_publisher)
+ s3_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region'])
+ queue = sqs_consumer.connect(s3_resource, config_dict['sqs_name'])
- queue = sqs_consumer.connect()
- try:
- sqs_consumer.receive_messages(queue, sqs_max_polls, delete_handler)
- if use_mocks is True:
- mock_1.stop()
- mock_2.stop()
- except Exception as e:
- print("Message queue consumption failed: {}".format(e))
- if use_mocks is True:
- mock_1.stop()
- mock_2.stop()
+ sqs_consumer.receive_messages(queue, sqs_max_polls, delete_handler)
diff --git a/scripts/launch_e2e.py b/scripts/launch_e2e.py
index 2d5b79b..820fd22 100644
--- a/scripts/launch_e2e.py
+++ b/scripts/launch_e2e.py
@@ -1,15 +1,18 @@
import argparse
import json
import os
+import yaml
+
from onestop.util.SqsConsumer import SqsConsumer
from onestop.util.S3Utils import S3Utils
from onestop.util.S3MessageAdapter import S3MessageAdapter
from onestop.WebPublisher import WebPublisher
-from onestop.extract.CsbExtractor import CsbExtractor
from onestop.schemas.util.jsonEncoder import EnumEncoder
+from onestop.util.ClientLogger import ClientLogger
+config_dict = {}
-def handler(recs):
+def handler(recs, log_level):
'''
Processes metadata information from sqs message triggered by S3 event and uploads to registry through web publisher (https). Also uploads s3 object to glacier.
@@ -20,50 +23,48 @@ def handler(recs):
IM registry response and boto3 glacier response
'''
- print("Handler...")
+ logger = ClientLogger.get_logger('launch_e2e.handler', log_level, False)
+ logger.info('In Handler')
- # Now get boto client for object-uuid retrieval
- object_uuid = None
- bucket = None
- print(recs)
+ # If record exists try to get object-uuid retrieval
+ logger.debug('Records:%s'%recs)
if recs is None:
- print("No records retrieved")
+ logger.info('No records retrieved, doing nothing.')
+ return
+
+ rec = recs[0]
+ logger.debug('Record: %s'%rec)
+ bucket = rec['s3']['bucket']['name']
+ s3_key = rec['s3']['object']['key']
+ logger.info("Getting uuid")
+ s3_resource = s3_utils.connect('resource', 's3', None)
+ object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key)
+ if object_uuid is not None:
+ logger.info('Retrieved object-uuid: %s'% object_uuid)
else:
- rec = recs[0]
- print(rec)
- bucket = rec['s3']['bucket']['name']
- s3_key = rec['s3']['object']['key']
- print("Getting uuid")
- # High-level api
- s3_resource = s3_utils.connect("s3_resource", None)
- object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key)
- if object_uuid is not None:
- print("Retrieved object-uuid: " + object_uuid)
- else:
- print("Adding uuid")
- s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key)
+ logger.info('UUID not found, adding uuid to bucket=%s key=%s'%(bucket, s3_key))
+ s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key)
+ s3ma = S3MessageAdapter(**config_dict)
im_message = s3ma.transform(recs)
-
+ logger.debug('S3MessageAdapter.transform: %s'%im_message)
json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder)
+ logger.debug('S3MessageAdapter.transform.json dump: %s'%json_payload)
- print(json_payload)
-
-
+ wp = WebPublisher(**config_dict)
registry_response = wp.publish_registry("granule", object_uuid, json_payload, "POST")
- #print(registry_response.json())
+ logger.debug('publish_registry response: %s'%registry_response.json())
# Upload to archive
file_data = s3_utils.read_bytes_s3(s3_client, bucket, s3_key)
- glacier = s3_utils.connect("glacier", s3_utils.conf['s3_region'])
- vault_name = s3_utils.conf['vault_name']
-
+ glacier = s3_utils.connect('client', 'glacier', config_dict['s3_region'])
+ vault_name = config_dict['vault_name']
resp_dict = s3_utils.upload_archive(glacier, vault_name, file_data)
-
- print("archiveLocation: " + resp_dict['location'])
- print("archiveId: " + resp_dict['archiveId'])
- print("sha256: " + resp_dict['checksum'])
+ logger.debug('Upload response: %s'%resp_dict)
+ logger.info('upload archived location: %s'% resp_dict['location'])
+ logger.info('archiveId: %s'% resp_dict['archiveId'])
+ logger.info('sha256: %s'% resp_dict['checksum'])
addlocPayload = {
"fileLocations": {
@@ -80,97 +81,60 @@ def handler(recs):
json_payload = json.dumps(addlocPayload, indent=2)
# Send patch request next with archive location
registry_response = wp.publish_registry("granule", object_uuid, json_payload, "PATCH")
-
+ logger.debug('publish to registry response: %s'% registry_response)
+ logger.info('Finished publishing to registry.')
if __name__ == '__main__':
- """
parser = argparse.ArgumentParser(description="Launches e2e test")
- parser.add_argument('-conf', dest="conf", required=True,
+ parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml',
help="AWS config filepath")
parser.add_argument('-cred', dest="cred", required=True,
help="Credentials filepath")
args = vars(parser.parse_args())
- # Get configuration file path locations
- conf_loc = args.pop('conf')
- cred_loc = args.pop('cred')
- # Upload a test file to s3 bucket
- s3_utils = S3Utils(conf_loc, cred_loc)
- # Low-level api ? Can we just use high level revisit me!
- s3 = s3_utils.connect("s3", None)
- registry_user = os.environ.get("REGISTRY_USERNAME")
- registry_pwd = os.environ.get("REGISTRY_PASSWORD")
- print(registry_user)
- access_key = os.environ.get("AWS_ACCESS")
- access_secret = os.environ.get("AWS_SECRET")
- print(access_key)
-
- # High-level api
- s3_resource = s3_utils.connect("s3_resource", None)
- bucket = s3_utils.conf['s3_bucket']
- overwrite = True
- sqs_max_polls = s3_utils.conf['sqs_max_polls']
- # Add 3 files to bucket
- local_files = ["file1.csv", "file4.csv"]
- s3_file = None
- for file in local_files:
- local_file = "tests/data/" + file
- s3_file = "csv/" + file
- s3_utils.upload_s3(s3, local_file, bucket, s3_file, overwrite)
- # Receive s3 message and MVM from SQS queue
- sqs_consumer = SqsConsumer(conf_loc, cred_loc)
- s3ma = S3MessageAdapter("scripts/config/csb-data-stream-config.yml", s3_utils)
- # Retrieve data from s3 object
- #csb_extractor = CsbExtractor()
- wp = WebPublisher("config/web-publisher-config-dev.yml", cred_loc)
- queue = sqs_consumer.connect()
- try:
- debug = False
- sqs_consumer.receive_messages(queue, sqs_max_polls, handler)
- except Exception as e:
- print("Message queue consumption failed: {}".format(e))
- """
- parser = argparse.ArgumentParser(description="Launches e2e test")
- parser.add_argument('-conf', dest="conf", required=True,
- help="AWS config filepath")
-
- parser.add_argument('-cred', dest="cred", required=True,
- help="Credentials filepath")
- args = vars(parser.parse_args())
-
- # Get configuration file path locations
+ # Generate configuration dictionary
conf_loc = args.pop('conf')
- cred_loc = args.pop('cred')
-
- # Upload a test file to s3 bucket
- s3_utils = S3Utils(conf_loc, cred_loc)
+ with open(conf_loc) as f:
+ config_dict.update(yaml.load(f, Loader=yaml.FullLoader))
- # Low-level api ? Can we just use high level revisit me!
- s3_client = s3_utils.connect("s3", None)
-
- bucket = s3_utils.conf['s3_bucket']
-
- sqs_max_polls = s3_utils.conf['sqs_max_polls']
-
- # Add 3 files to bucket
+ # Get credentials from passed in fully qualified path or ENV.
+ cred_loc = args.pop('cred')
+ if cred_loc is not None:
+ with open(cred_loc) as f:
+ creds = yaml.load(f, Loader=yaml.FullLoader)
+ registry_username = creds['registry']['username']
+ registry_password = creds['registry']['password']
+ access_key = creds['sandbox']['access_key']
+ access_secret = creds['sandbox']['secret_key']
+ else:
+ print("Using env variables for config parameters")
+ registry_username = os.environ.get("REGISTRY_USERNAME")
+ registry_password = os.environ.get("REGISTRY_PASSWORD")
+ access_key = os.environ.get("ACCESS_KEY")
+ access_secret = os.environ.get("SECRET_KEY")
+
+ config_dict.update({
+ 'registry_username' : registry_username,
+ 'registry_password' : registry_password,
+ 'access_key' : access_key,
+ 'secret_key' : access_secret
+ })
+
+ s3_utils = S3Utils(**config_dict)
+ s3_client = s3_utils.connect('client', 's3', config_dict['s3_region'])
+
+ # Upload test files to s3 bucket
local_files = ["file1.csv", "file4.csv"]
s3_file = None
for file in local_files:
- local_file = "data/" + file
+ local_file = "scripts/data/" + file
# s3_file = "csv/" + file
- s3_file = "NESDIS/CSB/" + file
- if not s3_utils.upload_s3(s3_client, local_file, bucket, s3_file, True):
+ s3_file = "public/" + file
+ if not s3_utils.upload_s3(s3_client, local_file, config_dict['s3_bucket'], s3_file, True):
exit("Error setting up for e2e: The test files were not uploaded to the s3 bucket therefore the tests cannot continue.")
# Receive s3 message and MVM from SQS queue
- sqs_consumer = SqsConsumer(conf_loc, cred_loc)
- s3ma = S3MessageAdapter("config/csb-data-stream-config.yml", s3_utils)
- wp = WebPublisher("config/web-publisher-config-dev.yml", cred_loc)
-
- queue = sqs_consumer.connect()
- try:
- debug = False
- sqs_consumer.receive_messages(queue, sqs_max_polls, handler)
-
- except Exception as e:
- print("Message queue consumption failed: {}".format(e))
+ sqs_consumer = SqsConsumer(**config_dict)
+ sqs_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region'])
+ queue = sqs_consumer.connect(sqs_resource, config_dict['sqs_name'])
+ sqs_consumer.receive_messages(queue, config_dict['sqs_max_polls'], handler)
diff --git a/scripts/launch_kafka_publisher.py b/scripts/launch_kafka_publisher.py
index f4a853d..85283c2 100644
--- a/scripts/launch_kafka_publisher.py
+++ b/scripts/launch_kafka_publisher.py
@@ -1,17 +1,21 @@
import argparse
+import yaml
+
from onestop.KafkaPublisher import KafkaPublisher
if __name__ == '__main__':
'''
Uploads collection to Kafka collection topic
'''
- parser = argparse.ArgumentParser(description="Launches KafkaPublisher to publish kafkda topics")
- parser.add_argument('-conf', dest="conf", required=True,
+ parser = argparse.ArgumentParser(description="Launches KafkaPublisher to publish kafka topics")
+ parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml',
help="Config filepath")
args = vars(parser.parse_args())
conf_loc = args.pop('conf')
+ with open(conf_loc) as f:
+ conf = yaml.load(f, Loader=yaml.FullLoader)
# "discovery":
# {
@@ -22,13 +26,19 @@
# FileIdentifier: gov.noaa.ngdc.mgg.dem:yaquina_bay_p210_30m
collection_uuid = '3ee5976e-789a-41d5-9cae-d51e7b92a247'
content_dict = {'discovery': {'title': 'My Extra New Title!',
- 'fileIdentifier': 'gov.noaa.osim2.mgg.dem:yaquina_bay_p210_30m'
+ 'fileIdentifier': 'gov.noaa.osim2.mgg.dem:yaquina_bay_p210_30m',
+ "links": [
+ {
+ "linkFunction": "download", "linkName": "Amazon S3", "linkProtocol": "HTTPS",
+ "linkUrl": "https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2"
+ }
+ ]
}
}
# method one of POST, PUT, PATCH, DELETE
method = 'POST' #Update
- kafka_publisher = KafkaPublisher(conf_loc)
+ kafka_publisher = KafkaPublisher(**conf)
metadata_producer = kafka_publisher.connect()
kafka_publisher.publish_collection(metadata_producer, collection_uuid, content_dict, method)
diff --git a/scripts/launch_pyconsumer.py b/scripts/launch_pyconsumer.py
index f9dbcf6..5d22317 100644
--- a/scripts/launch_pyconsumer.py
+++ b/scripts/launch_pyconsumer.py
@@ -1,11 +1,18 @@
+import argparse
import os
+import yaml
+import json
+
from onestop.util.SqsConsumer import SqsConsumer
from onestop.util.S3Utils import S3Utils
from onestop.util.S3MessageAdapter import S3MessageAdapter
from onestop.WebPublisher import WebPublisher
+from onestop.util.ClientLogger import ClientLogger
+from onestop.schemas.util.jsonEncoder import EnumEncoder
+config_dict = {}
-def handler(recs):
+def handler(recs, log_level):
'''
Processes metadata information from sqs message triggered by S3 event and uploads to registry through web publisher (https). Utilizes helm for credentials and conf.
@@ -15,71 +22,83 @@ def handler(recs):
:return: str
IM registry response
'''
- print("Handling message...")
+ logger = ClientLogger.get_logger('launch_pyconsumer.handler', log_level, False)
+ logger.info('In Handler')
# Now get boto client for object-uuid retrieval
object_uuid = None
if recs is None:
- print("No records retrieved")
- else:
- rec = recs[0]
- bucket = rec['s3']['bucket']['name']
- s3_key = rec['s3']['object']['key']
+ logger.info('No records retrieved, doing nothing.')
+ return
- # Fetch the object to get the uuid
- object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key)
+ rec = recs[0]
+ bucket = rec['s3']['bucket']['name']
+ s3_key = rec['s3']['object']['key']
- if object_uuid is not None:
- print("Retrieved object-uuid: " + object_uuid)
- else:
- print("Adding uuid")
- s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key)
+ # Fetch the object to get the uuid
+ logger.info("Getting uuid")
+ s3_resource = s3_utils.connect('resource', 's3', None)
+ object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key)
+
+ if object_uuid is not None:
+ logger.info('Retrieved object-uuid: %s'% object_uuid)
+ else:
+ logger.info('UUID not found, adding uuid to bucket=%s key=%s'%(bucket, s3_key))
+ s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key)
# Convert s3 message to IM message
- s3ma = S3MessageAdapter(conf_loc, s3_utils)
- json_payload = s3ma.transform(recs)
+ s3ma = S3MessageAdapter(**config_dict)
+ im_message = s3ma.transform(recs)
+ logger.debug('S3MessageAdapter.transform: %s'%im_message)
+ json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder)
+ logger.debug('S3MessageAdapter.transform.json dump: %s'%json_payload)
#Send the message to Onestop
- wp = WebPublisher(conf_loc, cred_loc)
- registry_response = wp.publish_registry("granule", object_uuid, json_payload.serialize(), "POST")
- print("RESPONSE: ")
- print(registry_response.json())
+ wp = WebPublisher(**config_dict)
+ registry_response = wp.publish_registry("granule", object_uuid, json_payload, "POST")
+ logger.debug('publish_registry response: %s'%registry_response.json())
if __name__ == '__main__':
- conf_loc = "/etc/config/config.yml"
- cred_loc = "creds.yml"
-
- registry_user = os.environ.get("REGISTRY_USERNAME")
- registry_pwd = os.environ.get("REGISTRY_PASSWORD")
- access_key = os.environ.get("ACCESS_KEY")
- access_secret = os.environ.get("SECRET_KEY")
-
- f = open(cred_loc, "w+")
-
-#write creds to a file to avoid changing the python library
- s = """sandbox:
- access_key: {key}
- secret_key: {secret}
-
-registry:
- username: {user}
- password: {pw}
- """.format(key=access_key, secret=access_secret, user=registry_user, pw=registry_pwd)
- f.write(s)
- f.close()
- r = open(cred_loc, "r")
-
- # # Receive s3 message and MVM from SQS queue
- s3_utils = S3Utils(conf_loc, cred_loc)
- sqs_max_polls = s3_utils.conf['sqs_max_polls']
- sqs_consumer = SqsConsumer(conf_loc, cred_loc)
- queue = sqs_consumer.connect()
-
- try:
- debug = False
- # # Pass in the handler method
- sqs_consumer.receive_messages(queue, sqs_max_polls, handler)
-
- except Exception as e:
- print("Message queue consumption failed: {}".format(e))
+ parser = argparse.ArgumentParser(description="Launches e2e test")
+ parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml',
+ help="AWS config filepath")
+ parser.add_argument('-cred', dest="cred", required=True,
+ help="Credentials filepath")
+ args = vars(parser.parse_args())
+
+ # Generate configuration dictionary
+ conf_loc = args.pop('conf')
+ with open(conf_loc) as f:
+ config_dict.update(yaml.load(f, Loader=yaml.FullLoader))
+
+ # Get credentials from passed in fully qualified path or ENV.
+ cred_loc = args.pop('cred')
+ if cred_loc is not None:
+ with open(cred_loc) as f:
+ creds = yaml.load(f, Loader=yaml.FullLoader)
+ registry_username = creds['registry']['username']
+ registry_password = creds['registry']['password']
+ access_key = creds['sandbox']['access_key']
+ access_secret = creds['sandbox']['secret_key']
+ else:
+ print("Using env variables for config parameters")
+ registry_username = os.environ.get("REGISTRY_USERNAME")
+ registry_password = os.environ.get("REGISTRY_PASSWORD")
+ access_key = os.environ.get("ACCESS_KEY")
+ access_secret = os.environ.get("SECRET_KEY")
+
+ config_dict.update({
+ 'registry_username' : registry_username,
+ 'registry_password' : registry_password,
+ 'access_key' : access_key,
+ 'secret_key' : access_secret
+ })
+
+ s3_utils = S3Utils(**config_dict)
+
+ # Receive s3 message and MVM from SQS queue
+ sqs_consumer = SqsConsumer(**config_dict)
+ sqs_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region'])
+ queue = sqs_consumer.connect(sqs_resource, config_dict['sqs_name'])
+ sqs_consumer.receive_messages(queue, config_dict['sqs_max_polls'], handler)
diff --git a/scripts/sme/Dockerfile b/scripts/sme/Dockerfile
index d4b48fa..19051c3 100644
--- a/scripts/sme/Dockerfile
+++ b/scripts/sme/Dockerfile
@@ -1,6 +1,8 @@
+# Expect this to copy the scripts directory over and install onestop-python-client.
FROM cedardevs/onestop-python-client:latest
-COPY . .
+
+# Install additional python libraries needed by scripts
RUN pip install argparse
RUN pip install psycopg2
-#ENTRYPOINT [ "python" ,"scripts/sme/sme.py", "-cmd consume", "-b localhost:9092", "-s http://localhost:8081", "-t psi-collection-extractor-to" , "-g sme-test", "-o earliest" ]
+
CMD tail -f /dev/null
diff --git a/scripts/sme/README.md b/scripts/sme/README.md
deleted file mode 100644
index 72a4c2b..0000000
--- a/scripts/sme/README.md
+++ /dev/null
@@ -1,39 +0,0 @@
-# SME Script
-
-## AWS Credentials
-Populate values for ACCESS_KEY and SECRET_KEY in credentials.yml
-
-## Helm Values
-Update values in onestop-sqs-consumer/vaules.yaml
-
-## Prerequisites
-You will need a kafka broker and a schema-registry running to test this package. To bring up the OneStop stack, see the [OneStop quickstart documentation](https://github.com/cedardevs/onestop/blob/master/docs/developer/quickstart.md#quick-start-kubernetes--helm--skaffold)
-
-### Start up kubernetes clusters using skaffold
-
-``skaffold dev --status-check=false --force=false``
-
-### Load test data to expose Kafka Topics
-```./upload.sh IM COOPS/ localhost/onestop/api/registry```
-
-### Install onestop-python-client repo into directory
-
-``pip install ./onestop-python-client ``
-
-## Usage
-
-### Upload CSB Data to first topic (psi-granule-input-unknown)
-```python launch_e2e.py -conf config/aws-util-config-dev.yml -cred config/credentials-template.yml```
-
-### Start up sme container
-```helm install sme helm/onestop-sqs-consumer```
-
-### Exec into sme container and run extraction code
-
-```kubectl exec -it -- bash```
-
-```python sme.py```
-
-
-### Look at newly added data in parsed-granule-input topic
-```python smeFunc.py```
diff --git a/scripts/sme/sme.py b/scripts/sme/sme.py
index 6509aa3..12f7859 100644
--- a/scripts/sme/sme.py
+++ b/scripts/sme/sme.py
@@ -1,6 +1,7 @@
-import argparse
import json
import os
+import yaml
+import argparse
from onestop.extract.CsbExtractor import CsbExtractor
from onestop.KafkaConsumer import KafkaConsumer
@@ -9,13 +10,16 @@
from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.point import Point
from onestop.schemas.geojsonSchemaClasses.point_type import PointType
from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord
-from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm
+#from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm
from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.temporal_bounding import TemporalBounding
from onestop.schemas.util.jsonEncoder import EnumEncoder, as_enum, EnumEncoderValue
from onestop.KafkaPublisher import KafkaPublisher
-from spatial import script_generation, postgres_insert
+#from spatial import script_generation, postgres_insert
+from onestop.util.ClientLogger import ClientLogger
-def handler(key, value):
+config_dict = {}
+
+def handler(key, value, log_level = 'INFO'):
'''
Consumes message from psi-input-unknown, extracts geospatial data, uploads new payload to parsed-record topic in kafka, and uploads geospatial data to Postgres
@@ -27,99 +31,120 @@ def handler(key, value):
:return: str
returns response message from kafka
'''
- print('Key:', key)
- print('Value: ' ,value)
# Grabs the contents of the message and turns the dict string into a dictionary using json.loads
- try:
- content_dict = json.loads(value['content'], object_hook=as_enum)
-
- parsed_record = ParsedRecord().from_dict(content_dict)
-
- # Geospatial Extraction
- # Extract the bucket key for csb_extractor object initialization
- bucket_key = content_dict['discovery']['links'][0]['linkUrl'].split('.com/')[1]
-
- csb_extractor = CsbExtractor(su, bucket_key)
- if csb_extractor.is_csv(bucket_key):
- geospatial = csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME')
- begin_date, end_date = geospatial['temporal'][0], geospatial['temporal'][1]
- max_lon, max_lat, min_lon, min_lat = geospatial['geospatial'][2], geospatial['geospatial'][3], \
- geospatial['geospatial'][0], geospatial['geospatial'][1]
- coords = csb_extractor.extract_coords(max_lon, max_lat, min_lon, min_lat)
-
- # Create spatial bounding types based on the given coords
- pointType = PointType('Point')
- point = Point(coordinates=coords[0], type=pointType)
-
- # Create temp bounding obj
- tempBounding = TemporalBounding(beginDate=begin_date, endDate=end_date)
-
- # Update parsed record object with geospatial data
- parsed_record.discovery.temporalBounding = tempBounding
- parsed_record.discovery.spatialBounding = point
-
- """
- # Insert data into postgres
- script = script_generation(coords[0], key)
- postgres_insert(script)
- """
-
- # update content dict
- parsed_record.type = value['type']
- content_dict = parsed_record.to_dict()
- # reformat Relationship field
- relationship_type = content_dict['relationships'][0]['type']['type']
- content_dict['relationships'][0]['type'] = relationship_type
-
- # reformat File Locations
- filelocation_type = content_dict['fileLocations']['type']['type']
- content_dict['fileLocations']['type'] = filelocation_type
-
+ logger = ClientLogger.get_logger('sme.handler', log_level, False)
+ logger.info('In Handler')
+ # This is an example for testing purposes.
+ value = {
+ "type": "granule",
+ "content": "{ \"discovery\": {\n \"fileIdentifier\": \"92ade5dc-946d-11ea-abe4-0242ac120004\",\n \"links\": [\n {\n \"linkFunction\": \"download\",\n \"linkName\": \"Amazon S3\",\n \"linkProtocol\": \"HTTPS\",\n \"linkUrl\": \"https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n }\n ],\n \"parentIdentifier\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"spatialBounding\": null,\n \"temporalBounding\": {\n \"beginDate\": \"2020-05-12T16:20:15.158Z\", \n \"endDate\": \"2020-05-12T16:21:51.494Z\"\n },\n \"title\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"fileInformation\": {\n \"checksums\": [{\"algorithm\": \"MD5\",\"value\": \"44d2452e8bc2c8013e9c673086fbab7a\"}]\n, \"optionalAttributes\":{}, \"format\": \"HSD\",\n \"name\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\",\n \"size\": 208918\n },\n \"fileLocations\": {\n \"optionalAttributes\":{}, \"uri\":\"//nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\", \"asynchronous\": false,\n \"deleted\": false,\n \"lastModified\": 1589300890000,\n \"locality\": \"us-east-1\",\n \"restricted\": false,\n \"serviceType\": \"Amazon:AWS:S3\",\n \"type\": {\"type\":\"ACCESS\"},\n \"uri\": \"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"relationships\": [\n {\n \"id\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"type\": {\"type\":\"COLLECTION\"}\n }\n ]\n }",
+ "contentType": "application/json",
+ "method": "PUT",
+ "source": "unknown",
+ "operation": "ADD"
+ }
+ logger.debug('content: %s'%value['content'])
+
+ content_dict = json.loads(value['content'], object_hook=as_enum)
+ logger.debug('content_dict: %s'%content_dict)
+ parsed_record = ParsedRecord().from_dict(content_dict)
+
+ # Geospatial Extraction
+ bucket_key = content_dict['discovery']['links'][0]['linkUrl'].split('.com/')[1]
+ logger.info("Bucket key="+bucket_key)
+ if CsbExtractor.is_csv(bucket_key):
+ logger.info('Extracting geospatial information')
+ sm_open_file = su.get_csv_s3(su.connect("session", None), config_dict['s3_bucket'], bucket_key)
+ geospatial = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME')
+ begin_date, end_date = geospatial['temporal'][0], geospatial['temporal'][1]
+ max_lon, max_lat, min_lon, min_lat = geospatial['geospatial'][2], geospatial['geospatial'][3], \
+ geospatial['geospatial'][0], geospatial['geospatial'][1]
+ coords = CsbExtractor.extract_coords(sm_open_file, max_lon, max_lat, min_lon, min_lat)
+
+ # Create spatial bounding types based on the given coords
+ pointType = PointType('Point')
+ point = Point(coordinates=coords[0], type=pointType)
content_dict['discovery']['spatialBounding']['type'] = pointType.value
- # Transform content_dict to appropiate payload
- # cls=EnumEncoderValue argument looks for instances of Enum classes and extracts only the value of the Enum
- content_dict = json.dumps(content_dict, cls=EnumEncoderValue)
- content_dict = json.loads(content_dict)
-
- # Produce new information to kafka
- kafka_publisher = KafkaPublisher("scripts/config/kafka-publisher-config-dev.yml")
- metadata_producer = kafka_publisher.connect()
- collection_id = parsed_record.relationships[0].id
- kafka_publisher.publish_granule(metadata_producer, collection_id, collection_id, content_dict)
-
- except:
- print('Invalid Format')
-
+ # Create temp bounding obj
+ tempBounding = TemporalBounding(beginDate=begin_date, endDate=end_date)
+
+ # Update parsed record object with geospatial data
+ parsed_record.discovery.temporalBounding = tempBounding
+ parsed_record.discovery.spatialBounding = point
+
+ """
+ # Insert data into postgres
+ script = script_generation(coords[0], key)
+ postgres_insert(script)
+ """
+ else:
+ logger.info('Record not CSV - Skipping extracting geospatial information')
+
+ # update content dict
+ parsed_record.type = value['type']
+ content_dict = parsed_record.to_dict()
+ # reformat Relationship field
+ relationship_type = content_dict['relationships'][0]['type']['type']
+ content_dict['relationships'][0]['type'] = relationship_type
+
+ # reformat File Locations
+ filelocation_type = content_dict['fileLocations']['type']['type']
+ content_dict['fileLocations']['type'] = filelocation_type
+
+ # Transform content_dict to appropiate payload
+ # cls=EnumEncoderValue argument looks for instances of Enum classes and extracts only the value of the Enum
+ content_dict = json.dumps(content_dict, cls=EnumEncoderValue)
+ content_dict = json.loads(content_dict)
+
+ # Produce new information to publish to kafka, TODO: Be wary of cyclical publish/consuming here, since the consumer calls this handler.
+ kafka_publisher = KafkaPublisher(**config_dict)
+ metadata_producer = kafka_publisher.connect()
+ collection_id = parsed_record.relationships[0].id
+ kafka_publisher.publish_granule(metadata_producer, collection_id, content_dict)
if __name__ == '__main__':
- # This is where helm will mount the config
- conf_loc = "/etc/config/config.yml"
- # this is where we are about to write the cred yaml
- cred_loc = "creds.yml"
-
- registry_user = os.environ.get("REGISTRY_USERNAME")
- registry_pwd = os.environ.get("REGISTRY_PASSWORD")
- access_key = os.environ.get("ACCESS_KEY")
- access_secret = os.environ.get("SECRET_KEY")
-
- f = open(cred_loc, "w+")
-
- # TODO revisit this when we make a standard that all scripts will follow
- # write creds to a file to avoid changing the python library
- s = """
- sandbox:
- access_key: {key}
- secret_key: {secret}
- registry:
- username: {user}
- password: {pw}
- """.format(key=access_key, secret=access_secret, user=registry_user, pw=registry_pwd)
- f.write(s)
- f.close()
- r = open(cred_loc, "r")
-
- su = S3Utils(conf_loc, cred_loc)
- kafka_consumer = KafkaConsumer(conf_loc)
+ # Example command: python3 sme.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml
+ # python3 archive_client_integration.py -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml
+ parser = argparse.ArgumentParser(description="Launches sme test")
+ # Set default config location to the Helm mounted pod configuration location
+ parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml',
+ help="AWS config filepath")
+ parser.add_argument('-cred', dest="cred", required=True,
+ help="Credentials filepath")
+ args = vars(parser.parse_args())
+
+ # Generate configuration dictionary
+ conf_loc = args.pop('conf')
+ with open(conf_loc) as f:
+ config_dict.update(yaml.load(f, Loader=yaml.FullLoader))
+
+ # Get credentials from passed in fully qualified path or ENV.
+ cred_loc = args.pop('cred')
+ if cred_loc is not None:
+ with open(cred_loc) as f:
+ creds = yaml.load(f, Loader=yaml.FullLoader)
+ registry_username = creds['registry']['username']
+ registry_password = creds['registry']['password']
+ access_key = creds['sandbox']['access_key']
+ access_secret = creds['sandbox']['secret_key']
+ else:
+ print("Using env variables for config parameters")
+ registry_username = os.environ.get("REGISTRY_USERNAME")
+ registry_password = os.environ.get("REGISTRY_PASSWORD")
+ access_key = os.environ.get("ACCESS_KEY")
+ access_secret = os.environ.get("SECRET_KEY")
+
+ config_dict.update({
+ 'registry_username' : registry_username,
+ 'registry_password' : registry_password,
+ 'access_key' : access_key,
+ 'secret_key' : access_secret
+ })
+
+ su = S3Utils(**config_dict)
+
+ kafka_consumer = KafkaConsumer(**config_dict)
metadata_consumer = kafka_consumer.connect()
- kafka_consumer.consume(metadata_consumer, lambda k, v: handler(k, v))
\ No newline at end of file
+# handler('', '', config_dict['log_level']) # For testing purposes
+ kafka_consumer.consume(metadata_consumer, handler)
\ No newline at end of file
diff --git a/scripts/sme/smeFunc.py b/scripts/sme/smeFunc.py
index 2e11d51..f07c7b6 100644
--- a/scripts/sme/smeFunc.py
+++ b/scripts/sme/smeFunc.py
@@ -1,8 +1,11 @@
+import yaml
import argparse
-import json
from onestop.KafkaConsumer import KafkaConsumer
+from onestop.util.ClientLogger import ClientLogger
-def handler(key,value):
+config_dict = {}
+
+def handler(key, value, log_level = 'INFO'):
'''
Prints key, value pair of items in topic
@@ -13,9 +16,10 @@ def handler(key,value):
:return: None
'''
- print(key)
- print('VALUE-------')
- print(value)
+ logger = ClientLogger.get_logger('smeFunc.handler', log_level, False)
+ logger.info('In Handler')
+ logger.info('key=%s value=%s'%(key, value))
+
"""
if (value['type'] == 'collection' or not bool(value['fileInformation'])):
print(value['discovery']['fileIdentifier'])
@@ -25,51 +29,20 @@ def handler(key,value):
if __name__ == '__main__':
-
- kafka_consumer = KafkaConsumer("scripts/config/kafka-publisher-config-dev.yml")
- kafka_consumer.granule_topic = 'psi-granule-parsed'
+ # Example command: python3 smeFunc.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml
+ # python3 smeFunc.py
+ parser = argparse.ArgumentParser(description="Launches smeFunc test")
+ # Set default config location to the Helm mounted pod configuration location
+ parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml',
+ help="AWS config filepath")
+ args = vars(parser.parse_args())
+
+ # Generate configuration dictionary
+ conf_loc = args.pop('conf')
+ with open(conf_loc) as f:
+ config_dict.update(yaml.load(f, Loader=yaml.FullLoader))
+
+ kafka_consumer = KafkaConsumer(**config_dict)
+ kafka_consumer.granule_topic_consume = 'psi-granule-parsed'
metadata_consumer = kafka_consumer.connect()
- kafka_consumer.consume(metadata_consumer, lambda k, v: handler(k, v))
- """
- parser = argparse.ArgumentParser(description="Allows smeFunc to produce or consume messagges from kafkda topics")
- parser.add_argument('-cmd', dest="command", required=True,
- help="Command (produce/consume)")
-
- parser.add_argument('-b', dest="bootstrap.servers", required=True,
- help="Bootstrap broker(s) (host[:port])")
- parser.add_argument('-s', dest="schema.registry.url", required=True,
- help="Schema Registry (http(s)://host[:port]")
- parser.add_argument('-t', dest="topic", required=True,
- help="Topic name")
- parser.add_argument('-g', dest="group.id", required=False,
- help="Consumer group")
- parser.add_argument('-o', dest="auto.offset.reset", required=False,
- help="offset")
-
- config = vars(parser.parse_args())
- topic = config.pop('topic')
- cmd = config.pop('command')
-
- if (cmd=="consume"):
- consume(config, topic, lambda k, v: handler(k, v))
-
-
- if (cmd=="produce"):
-
- #Example content
- value = {
- "type": "collection",
- "content": "Update!",
- "contentType": "application/json",
- "method": "PUT",
- "source": "unknown",
- "operation": "ADD"
- }
-
- key = "3ee5976e-789a-41d5-9cae-d51e7b92a247"
-
- data = {key: value}
-
- produce(config, topic, data)
- """
-
+ kafka_consumer.consume(metadata_consumer, handler)
diff --git a/scripts/sqs-to-registry/Dockerfile b/scripts/sqs-to-registry/Dockerfile
index 9db0598..985421d 100644
--- a/scripts/sqs-to-registry/Dockerfile
+++ b/scripts/sqs-to-registry/Dockerfile
@@ -1,10 +1,9 @@
+# Expect this to copy the scripts directory over and install onestop-python-client.
FROM cedardevs/onestop-python-client:latest
-COPY . .
+
#required by the sme script, not our library
RUN pip install argparse
-#I should not have to do this, since it is done in the base image
-#RUN pip install -r ./onestop-python-client/requirements.txt
ENTRYPOINT [ "python" ]
CMD [ "s3_notification_handler.py" ]
#CMD tail -f /dev/null
\ No newline at end of file
diff --git a/scripts/sqs-to-registry/config/e2e.yml b/scripts/sqs-to-registry/config/e2e.yml
index 4c2c800..a2bdcfc 100644
--- a/scripts/sqs-to-registry/config/e2e.yml
+++ b/scripts/sqs-to-registry/config/e2e.yml
@@ -14,14 +14,16 @@ s3_bucket2: noaa-nccf-dev-archive
#CSB stream config
format: csv
headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER
-type: COLLECTION
collection_id: fdb56230-87f4-49f2-ab83-104cfd073177
access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com
#access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com
file_identifier_prefix: "gov.noaa.ncei.csb:"
# COLLECTION or GRANULE
-metadata_type: granule
+kafka_consumer_metadata_type: GRANULE
+kafka_publisher_metadata_type: GRANULE
+s3_message_adapter_metadata_type: COLLECTION
+
registry_base_url: http://onestop-registry:80
onestop_base_url: http://onestop-search:8080
diff --git a/scripts/sqs-to-registry/s3_notification_handler.py b/scripts/sqs-to-registry/s3_notification_handler.py
index 2b26ab5..c2785a0 100644
--- a/scripts/sqs-to-registry/s3_notification_handler.py
+++ b/scripts/sqs-to-registry/s3_notification_handler.py
@@ -1,55 +1,87 @@
import os
import yaml
+import json
+
from onestop.util.SqsConsumer import SqsConsumer
from onestop.util.S3Utils import S3Utils
from onestop.util.S3MessageAdapter import S3MessageAdapter
from onestop.WebPublisher import WebPublisher
from onestop.util.SqsHandlers import create_delete_handler
from onestop.util.SqsHandlers import create_upload_handler
+from onestop.util.ClientLogger import ClientLogger
-from datetime import date
import argparse
+config_dict = {}
+
+test_message = {
+ "Type": "Notification",
+ "MessageId": "e12f0129-0236-529c-aeed-5978d181e92a",
+ "TopicArn": "arn:aws:sns:" + config_dict['s3_region'] + ":798276211865:cloud-archive-client-sns",
+ "Subject": "Amazon S3 Notification",
+ "Message": '''{
+ "Records": [{
+ "eventVersion": "2.1", "eventSource": "aws:s3", "awsRegion": "''' + config_dict['s3_region'] + '''",
+ "eventTime": "2020-12-14T20:56:08.725Z",
+ "eventName": "ObjectRemoved:Delete",
+ "userIdentity": {"principalId": "AX8TWPQYA8JEM"},
+ "requestParameters": {"sourceIPAddress": "65.113.158.185"},
+ "responseElements": {"x-amz-request-id": "D8059E6A1D53597A",
+ "x-amz-id-2": "7DZF7MAaHztZqVMKlsK45Ogrto0945RzXSkMnmArxNCZ+4/jmXeUn9JM1NWOMeKK093vW8g5Cj5KMutID+4R3W1Rx3XDZOio"},
+ "s3": {
+ "s3SchemaVersion": "1.0", "configurationId": "archive-testing-demo-event",
+ "bucket": {"name": "''' + config_dict['s3_bucket'] + '''",
+ "ownerIdentity": {"principalId": "AX8TWPQYA8JEM"},
+ "arn": "arn:aws:s3:::''' + config_dict['s3_bucket'] + '''"},
+ "object": {"key": "123",
+ "sequencer": "005FD7D1765F04D8BE",
+ "eTag": "44d2452e8bc2c8013e9c673086fbab7a",
+ "size": 1385,
+ "versionId": "q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf"}
+ }
+ }]
+ }''',
+ "Timestamp": "2020-12-14T20:56:23.786Z",
+ "SignatureVersion": "1",
+ "Signature": "MB5P0H5R5q3zOFoo05lpL4YuZ5TJy+f2c026wBWBsQ7mbNQiVxAy4VbbK0U1N3YQwOslq5ImVjMpf26t1+zY1hoHoALfvHY9wPtc8RNlYqmupCaZgtwEl3MYQz2pHIXbcma4rt2oh+vp/n+viARCToupyysEWTvw9a9k9AZRuHhTt8NKe4gpphG0s3/C1FdvrpQUvxoSGVizkaX93clU+hAFsB7V+yTlbKP+SNAqP/PaLtai6aPY9Lb8reO2ZjucOl7EgF5IhBVT43HhjBBj4JqYBNbMPcId5vMfBX8qI8ANIVlGGCIjGo1fpU0ROxSHsltuRjkmErpxUEe3YJJM3Q==",
+ "SigningCertURL": "https://sns.us-east-2.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem",
+ "UnsubscribeURL": "https://sns.us-east-2.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-2:798276211865:cloud-archive-client-sns:461222e7-0abf-40c6-acf7-4825cef65cce"
+}
+
+def handler(recs, log_level):
+ logger = ClientLogger.get_logger('s3_notification_handler.handler', log_level, False)
+ logger.info('In Handler')
-def handler(recs):
- print("Handling message...")
+ if recs is None:
+ logger.info('No records retrieved, doing nothing.')
+ return
- # Now get boto client for object-uuid retrieval
- object_uuid = None
+ rec = recs[0]
+ logger.info('Record:%s'%rec)
- if recs is None:
- print("No records retrieved" + date.today())
+ if 'ObjectRemoved' in rec['eventName']:
+ delete_handler(recs)
else:
- rec = recs[0]
- print(rec)
- if 'ObjectRemoved' in rec['eventName']:
- print("SME - calling delete handler")
- print(rec['eventName'])
- delete_handler(recs)
- else:
- print("SME - calling upload handler")
- upload_handler(recs)
- #copy_handler(recs)
-
+ upload_handler(recs)
if __name__ == '__main__':
-
- parser = argparse.ArgumentParser(description="Launch SQS to Registry consumer")
- parser.add_argument('-conf', dest="conf", required=False,
- help="Config filepath")
-
- parser.add_argument('-cred', dest="cred", required=False,
+ # Example command: python3 archive_client_integration.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml
+ # python3 archive_client_integration.py -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml
+ parser = argparse.ArgumentParser(description="Launches archive client integration")
+ # Set default config location to the Helm mounted pod configuration location
+ parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml',
+ help="AWS config filepath")
+ parser.add_argument('-cred', dest="cred", required=True,
help="Credentials filepath")
-
args = vars(parser.parse_args())
- cred_loc = args.pop('cred')
- #credentials from either file or env
- registry_username = None
- registry_password = None
- access_key = None
- access_secret = None
+ # Generate configuration dictionary
+ conf_loc = args.pop('conf')
+ with open(conf_loc) as f:
+ config_dict.update(yaml.load(f, Loader=yaml.FullLoader))
+ # Get credentials from passed in fully qualified path or ENV.
+ cred_loc = args.pop('cred')
if cred_loc is not None:
with open(cred_loc) as f:
creds = yaml.load(f, Loader=yaml.FullLoader)
@@ -64,60 +96,34 @@ def handler(recs):
access_key = os.environ.get("ACCESS_KEY")
access_secret = os.environ.get("SECRET_KEY")
- # default config location mounted in pod
- if args.pop('conf') is None:
- conf_loc = "/etc/config/config.yml"
- else:
- conf_loc = args.pop('conf')
-
- conf = None
- with open(conf_loc) as f:
- conf = yaml.load(f, Loader=yaml.FullLoader)
-
- #TODO organize the config
- #System
- log_level = conf['log_level']
- sqs_max_polls = conf['sqs_max_polls']
-
- #Destination
- registry_base_url = conf['registry_base_url']
- onestop_base_url = conf['onestop_base_url']
-
- #Source
- access_bucket = conf['access_bucket']
- sqs_url = conf['sqs_url']
- s3_region = conf['s3_region']
- s3_bucket2 = conf['s3_bucket2']
- s3_region2 = conf['s3_region2']
-
-
- #Onestop related
- prefix_map = conf['prefixMap']
- file_id_prefix = conf['file_identifier_prefix']
- file_format = conf['format']
- headers = conf['headers']
- type = conf['type']
+ config_dict.update({
+ 'registry_username' : registry_username,
+ 'registry_password' : registry_password,
+ 'access_key' : access_key,
+ 'secret_key' : access_secret
+ })
+ sqs_consumer = SqsConsumer(**config_dict)
- sqs_consumer = SqsConsumer(access_key, access_secret, s3_region, sqs_url, log_level)
+ wp = WebPublisher(**config_dict)
- wp = WebPublisher(registry_base_url=registry_base_url, username=registry_username, password=registry_password,
- onestop_base_url=onestop_base_url, log_level=log_level)
+ s3_utils = S3Utils(**config_dict)
- s3_utils = S3Utils(access_key, access_secret, log_level)
- s3ma = S3MessageAdapter(access_bucket, prefix_map, format, headers, type, file_id_prefix, log_level)
+ s3ma = S3MessageAdapter(**config_dict)
delete_handler = create_delete_handler(wp)
upload_handler = create_upload_handler(wp, s3_utils, s3ma)
- queue = sqs_consumer.connect()
+ s3_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region'])
+ queue = sqs_consumer.connect(s3_resource, config_dict['sqs_name'])
- try:
- debug = False
- # # Pass in the handler method
- #Hack to make this stay up forever
- #TODO add feature to client library for polling indefinitely
- while True:
- sqs_consumer.receive_messages(queue, sqs_max_polls, handler)
+ # Send a test message
+# sqs_client = s3_utils.connect('client', 'sqs' , config_dict['s3_region'])
+# sqs_client.send_message(
+# QueueUrl='https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs',
+# MessageBody=json.dumps(test_message)
+# )
- except Exception as e:
- print("Message queue consumption failed: {}".format(e))
+ #Hack to make this stay up forever
+ #TODO add feature to client library for polling indefinitely
+ while True:
+ sqs_consumer.receive_messages(queue, config_dict['sqs_max_polls'], handler)
diff --git a/serverless/conf.py b/serverless/conf.py
index b41eb0b..26ef3cd 100644
--- a/serverless/conf.py
+++ b/serverless/conf.py
@@ -3,6 +3,6 @@
HEADERS = 'UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER'
TYPE = 'COLLECTION'
COLLECTION_ID = 'fdb56230-87f4-49f2-ab83-104cfd073177'
-PSI_REGISTRY_URL = 'http://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com'
+REGISTRY_BASE_URL = 'http://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com'
ACCESS_BUCKET = 'https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com'
FILE_IDENTIFIER_PREFIX = 'gov.noaa.ncei.csb:'
diff --git a/serverless/lambda_function.py b/serverless/lambda_function.py
index abe8fb7..3b6cd97 100644
--- a/serverless/lambda_function.py
+++ b/serverless/lambda_function.py
@@ -9,7 +9,7 @@
def lambda_handler(event, context):
- registry_url = conf.PSI_REGISTRY_URL + "/metadata/granule"
+ registry_url = conf.REGISTRY_BASE_URL + "/metadata/granule"
for rec in event['Records']: