Put inspection results in an S3 bucket.

Katherine Black · Katherine Black · commit 01c722e50607 · 2021-10-21T10:17:07.000-07:00
diff --git a/README.md b/README.md
@@ -57,23 +57,11 @@ Finally, if you need to install a dev only dependency, use:
 
 ### Running
 
-Before running, you must have set and exported the following environment variables so houndigrade can talk to Amazon SQS to share its results:
+Before running, you must have set and exported the following environment variables so houndigrade can talk to Amazon S3 to share its results:
 
-    - `QUEUE_CONNECTION_URL`
-    - `AWS_SQS_QUEUE_NAME_PREFIX`
+    - `RESULTS_BUCKET_NAME`
 
-`AWS_SQS_QUEUE_NAME_PREFIX` should match what you use when running cloudigrade, and that is probably `${USER}-`.
-
-`QUEUE_CONNECTION_URL` must be a well-formed SQS URL that includes your Amazon SQS access key and secret key. Many Amazon keys have URL-unfriendly characters. You may want to use a small helper script like this to generate a valid URL:
-
-```python
-from os import environ
-from urllib.parse import quote
-print('sqs://{}:{}@'.format(
-    quote(environ['AWS_SQS_ACCESS_KEY_ID'], safe=''),
-    quote(environ['AWS_SQS_SECRET_ACCESS_KEY'], safe='')
-))
-```
+`RESULTS_BUCKET_NAME` should match the bucket name in which you want your results, the rest of the credentials are gathered from the environment.
 
 To run houndigrade locally against minimal test disk images, follow these steps:
 
@@ -131,34 +119,19 @@ If you wish to run a higher-level suite of integration tests, see
 
 ### Manually running in AWS
 
-If you want to manually run houndigrade in AWS so that you can watch its output in real-time, you can *simulate* how the cloudigrade ECS task runs houndigrade by SSH-ing to an EC2 instance (running an ECS AMI) and running Docker with the arguments that would be used in the ECS task definition. For example:
+If you want to manually run houndigrade in AWS so that you can watch its output in real-time, you can *simulate* how the cloudigrade CloudInit task runs houndigrade by SSH-ing to an EC2 instance (running an ECS AMI) and running Docker with the arguments that would be used in the CloudInit task definition. For example:
 
     docker run \
         --mount type=bind,source=/dev,target=/dev \
         --privileged --rm -i -t \
-        -e AWS_ACCESS_KEY_ID=AWS_SQS_ACCESS_KEY_ID \
-        -e AWS_DEFAULT_REGION="us-east-1" \
-        -e AWS_SECRET_ACCESS_KEY="AWS_SQS_SECRET_ACCESS_KEY" \
-        -e EXCHANGE_NAME="" \
-        -e QUEUE_CONNECTION_URL="sqs://AWS_SQS_ACCESS_KEY_ID:AWS_SQS_SECRET_ACCESS_KEY@" \
-        -e RESULTS_QUEUE_NAME="HOUNDIGRADE_RESULTS_QUEUE_NAME" \
+        -e RESULTS_BUCKET_NAME=RESULTS_BUCKET_NAME \
         --name houndi \
         "registry.gitlab.com/cloudigrade/houndigrade:latest" \
         -c aws \
-        -t ami-13469000000000000 /dev/sdf \
-        -t ami-12345678900000000 /dev/sdg
+        -t ami-13469000000000000 /dev/sdf
 
 You will need to set appropriate values for the `-e` variables passed into the environment, each of the `-t` arguments that define the inspection targets, and the specific version of the houndigrade image you wish to use. When you attach volumes in AWS, you can define the device paths they'll use, and they should match your target arguments here. Alternatively, you can describe the running EC2 instance to get the device paths.
 
 # Releasing Houndigrade
 
-Releasing houndigrade is a simple process of tagging a new version in GitHub. 
-
-1. Navigate to the [releases page](https://github.com/cloudigrade/houndigrade/releases)
-2. Draft a new release
-3. Check the [Pull Requests page](https://github.com/cloudigrade/houndigrade/pulls) to see all the new changes since the last release
-4. For Tag Version we use [Semantic Versioning](https://semver.org/)
-5. For the main release body, please include merged PRs that will be part of this release, ideally linking to the PR itself.
-6. Press button, receive release. The [tag github actions workflow](https://github.com/cloudigrade/houndigrade/blob/master/.github/workflows/tag.yml) will test, build, tag, and get the image copied to quay.
-
-You will find your image in both the [Github Container Registry](https://github.com/orgs/cloudigrade/packages/container/package/houndigrade) and in [Quay.io](https://quay.io/repository/cloudservices/houndigrade)
+Please refer to the [wiki](https://github.com/cloudigrade/houndigrade/wiki/Releasing-Houndigrade).
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -5,12 +5,7 @@ services:
     build: .
     entrypoint: ./entrypoint.sh
     environment:
-      - AWS_DEFAULT_REGION
-      - AWS_ACCESS_KEY_ID
-      - AWS_SECRET_ACCESS_KEY
-      - EXCHANGE_NAME
-      - QUEUE_CONNECTION_URL
-      - RESULTS_QUEUE_NAME=${AWS_SQS_QUEUE_NAME_PREFIX}inspection_results
+      - RESULTS_BUCKET_NAME=${CLOUDIGRADE_ENVIRONMENT}-cloudigrade-inspections
     volumes:
       - ./docker/dev-entrypoint.sh:/opt/houndigrade/entrypoint.sh
       - ./test-data:/test-data:ro
diff --git a/houndigrade/cli.py b/houndigrade/cli.py
@@ -6,14 +6,17 @@
 import os
 import subprocess
 import sys
+from base64 import b64encode
 from contextlib import contextmanager
+from datetime import datetime
 from gettext import gettext as _
+from hashlib import md5
+from uuid import uuid4
 
 import boto3
 import click
 import jsonpickle
 import sh
-from botocore.exceptions import ClientError
 from sentry_sdk import init
 
 INSPECT_PATH = "/mnt/inspect"
@@ -322,46 +325,42 @@ def mount(partition, inspect_path):
     click.echo(_("UnMounting result {}.").format(unmount_result.exit_code))
 
 
-def _get_sqs_queue_url(queue_name):
+def generate_results_key():
     """
-    Get the SQS queue URL for the given queue name.
-
-    This has the side-effect on ensuring that the queue exists.
-
-    Note: This function was copied verbatim from `cloudigrade`.
-
-    FIXME: Move this function to a shared library.
-
-    Args:
-        queue_name (str): the name of the target SQS queue
+    Generate the key at which the results object will be placed.
 
     Returns:
-        str: the queue's URL.
+        (str): String representation of the object key.
 
     """
-    sqs = boto3.client("sqs")
-    try:
-        return sqs.get_queue_url(QueueName=queue_name)["QueueUrl"]
-    except ClientError as e:
-        if e.response["Error"]["Code"].endswith(".NonExistentQueue"):
-            return sqs.create_queue(QueueName=queue_name)["QueueUrl"]
-        raise
+    now = datetime.now()
+    time_path = now.strftime("%Y-%m/%d/%H.%M.%S")
+
+    return f"InspectionResults/{time_path}-{uuid4()}.json"
 
 
 def report_results(results):
     """
-    Places the results on a queue.
+    Places results in the bucket.
 
     Args:
-        results (dict): The results of the finished inspection.
+        results (s3.Object): Object representing the results stored in our S3 bucket.
 
     """
-    message_body = jsonpickle.encode(results)
-    queue_name = os.getenv("RESULTS_QUEUE_NAME")
-    queue_url = _get_sqs_queue_url(queue_name)
+    json_results = jsonpickle.encode(results)
+    encoded_results = json_results.encode()
+    results_md5 = b64encode(md5(encoded_results).digest()).decode()
+
+    bucket_name = os.getenv("RESULTS_BUCKET_NAME")
+
+    s3 = boto3.resource("s3")
+    bucket = s3.Bucket(bucket_name)
+
+    results = bucket.put_object(
+        Body=encoded_results, ContentMD5=results_md5, Key=generate_results_key()
+    )
 
-    sqs = boto3.client("sqs")
-    sqs.send_message(QueueUrl=queue_url, MessageBody=message_body)
+    return results
 
 
 def check_for_rhel_certs(partition, results):
diff --git a/houndigrade/tests/test_get_sqs_queue_url.py b/houndigrade/tests/test_get_sqs_queue_url.py
diff --git a/houndigrade/tests/test_report_results.py b/houndigrade/tests/test_report_results.py
@@ -0,0 +1,61 @@
+"""Collection of tests for ``cli.report_results`` function."""
+from datetime import datetime
+from unittest import TestCase
+from unittest.mock import patch
+from uuid import uuid4
+
+from cli import generate_results_key, report_results
+
+
+class TestGenerateResultsKey(TestCase):
+    """Test suite for houndigrade CLI's "generate_results_key" function."""
+
+    def test_generate_results_key(self):
+        """Test generating the key."""
+        test_now = datetime.now()
+        test_uuid = uuid4()
+        test_time_path = test_now.strftime("%Y-%m/%d/%H.%M.%S")
+
+        test_key = f"InspectionResults/{test_time_path}-{test_uuid}.json"
+
+        with patch("cli.datetime") as mock_datetime, patch("cli.uuid4") as mock_uuid4:
+            mock_datetime.now.return_value = test_now
+            mock_uuid4.return_value = test_uuid
+
+            result_key = generate_results_key()
+
+        self.assertEqual(test_key, result_key)
+
+    @patch("cli.boto3")
+    @patch("cli.b64encode")
+    @patch("cli.md5")
+    @patch("cli.jsonpickle")
+    def test_report_results(
+        self, mock_jsonpickle, mock_md5, mock_b64encode, mock_boto3
+    ):
+        """Verify we correctly report results."""
+        mock_results = {"test": "results"}
+        mock_results_bucket_name = "TestBucket"
+        mock_json_encode = mock_jsonpickle.encode
+        mock_utf_json = mock_json_encode.return_value.encode
+        mock_md5_digest = mock_md5.return_value.digest
+        mock_b64_decode = mock_b64encode.return_value.decode
+        mock_resource = mock_boto3.resource
+        mock_bucket = mock_resource.return_value.Bucket
+        mock_put_object = mock_bucket.return_value.put_object
+
+        with patch.dict(
+            "os.environ", {"RESULTS_BUCKET_NAME": mock_results_bucket_name}
+        ):
+            report_results(mock_results)
+
+        mock_json_encode.assert_called_once_with(mock_results)
+        mock_utf_json.assert_called_once()
+        mock_md5.assert_called_once_with(mock_utf_json.return_value)
+        mock_md5_digest.assert_called_once()
+        mock_b64encode.assert_called_once_with(mock_md5_digest.return_value)
+        mock_b64_decode.assert_called_once()
+
+        mock_resource.assert_called_once_with("s3")
+        mock_bucket.assert_called_once_with(mock_results_bucket_name)
+        mock_put_object.assert_called_once()