From 4e12e1dd793df67b88fef38a24a84d5e0f12cd04 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 13 Apr 2021 13:21:42 -0600 Subject: [PATCH 001/100] Changed S3Utils, S3MessageAdapter classes' constructors(adjusted documentation) to take dictionary with extra parameters allowed as well as methods within this class not to reference config but the variable that was set. Adjusted effected tests. --- .../config/aws-util-config-dev.yml | 2 + .../onestop/util/S3MessageAdapter.py | 110 ++++++------- onestop-python-client/onestop/util/S3Utils.py | 113 +++++++------ .../tests/SqsHandlersTest.py | 40 +++-- .../tests/util/S3MessageAdapterTest.py | 41 ++++- .../tests/util/S3UtilsTest.py | 151 +++++++++--------- scripts/launch_e2e.py | 45 ++++-- scripts/launch_pyconsumer.py | 12 +- 8 files changed, 291 insertions(+), 223 deletions(-) diff --git a/onestop-python-client/config/aws-util-config-dev.yml b/onestop-python-client/config/aws-util-config-dev.yml index ee1ad95..c30683e 100644 --- a/onestop-python-client/config/aws-util-config-dev.yml +++ b/onestop-python-client/config/aws-util-config-dev.yml @@ -3,9 +3,11 @@ log_level: INFO # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs +sqs_name: 'foobar' sqs_max_polls: 2 s3_region: "us-east-2" s3_bucket: archive-testing-demo +s3_key: 'ABI-L1b-RadF/2019/298/15/OR_ABI-L1b-RadF-M6C15_G16_s20192981500369_e20192981510082_c20192981510166.nc' #AWS config values for 2nd vault in different region vault_name: archive-vault-new diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index d640b77..1dda78c 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -1,10 +1,4 @@ -import yaml from onestop.util.ClientLogger import ClientLogger -""" -from onestop.info.ImMessage import ImMessage -from onestop.info.FileMessage import FileMessage -from onestop.info.Link import Link -""" from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord, Publishing, ErrorEvent from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location import FileLocation,FileLocationType @@ -14,81 +8,67 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.discovery import Discovery, Link - class S3MessageAdapter: """ A class used to extract information from sqs messages that have been triggered by s3 events and transform it into correct format for publishing to IM Registry Attributes ---------- - conf: yaml file - csb-data-stream-config.yml - s3_utils: S3Utils object - used to access objects inside of s3 buckets - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated - prefix_mapping: Dict - contains mapping of various line offices and their associated collection id + access_bucket: str + Cloud bucket to put in the links field when transformed. + type: str + COLLECTION or GRANULE + file_id_prefix: str + File prefix returned as fileIdentifier + collection_id: str + Collection this data belongs to. Returned as parent identifier. + log_level: str + The log level to use for this class (Defaults to 'INFO') - Methods - ------- - collection_id_map(s3_key) - given an s3 key that contains one of the NESDIS line offices in its path, it will provide the corresponding collection id - - transform(recs) - transforms sqs message triggered by s3 event to correct format for publishing to IM registry - """ - def __init__(self, conf_loc, s3_utils): - """ - - :param conf_loc: yaml file - csb-data-stream-config.yml - :param s3_utils: S3Utils object - used to access objects inside of s3 buckets - - Other Attributes - ---------------- logger: ClientLogger object utilizes python logger library and creates logging for our specific needs logger.info: ClientLogger object logging statement that occurs when the class is instantiated - prefix_mapping: Dict - contains mapping of various line offices and their associated collection id - - """ - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False) - self.logger.info("Initializing " + self.__class__.__name__) - self.s3_utils = s3_utils - self.prefix_mapping = self.conf['prefixMap'] - - def collection_id_map(self, s3_key): + Methods + ------- + transform(recs) + transforms sqs message triggered by s3 event to correct format for publishing to IM registry + """ + def __init__(self, access_bucket, type, file_id_prefix, collection_id, log_level = 'INFO', **wildargs): """ - Given an s3 key that contains one of the NESDIS line offices in its path, it will provide the corresponding collection id + Parameters + ---------- + access_bucket: str + access bucket to put in the links field when transformed. + type: str + COLLECTION or GRANULE + file_id_prefix: str + File prefix returned as fileIdentifier + collection_id: str + Collection this data belongs to. Returned as parent identifier. + log_level: str + Log level for when logging in class. - :param s3_key: str - key path of object in s3 bucket - - :return: str - associated line office collection id """ - # Looks through our prefix map and returns appropriate collection id - for key in self.prefix_mapping: - if key in s3_key: - return self.prefix_mapping[key] + self.access_bucket = access_bucket + self.type = type + self.file_id_prefix = file_id_prefix + self.collection_id = collection_id + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) + self.logger.info("Initializing " + self.__class__.__name__) + if wildargs: + self.logger.error("There were extra constructor arguments: " + str(wildargs)) def transform(self, recs): """ Transforms sqs message triggered by s3 event to correct format for publishing to IM registry - :param recs: dict - sqs event message + Parameters: + ---------- + recs: dict + sqs event message to transform :return: ParsedRecord Object The Parsed Record class is an avro schema generated class @@ -111,8 +91,8 @@ def transform(self, recs): fileInformation = FileInformation(name=file_name, size=file_size, checksums=[checkSum], optionalAttributes={}) # Relationship - relationshipType = RelationshipType(type=self.conf['type']) - relationship = Relationship(id=self.conf['collection_id'], type=relationshipType) + relationshipType = RelationshipType(type=self.type) + relationship = Relationship(id=self.collection_id, type=relationshipType) # File Location fileLocationType = FileLocationType(type='ARCHIVE') @@ -127,12 +107,12 @@ def transform(self, recs): publishing = Publishing(isPrivate=True) # Discovery - access_obj_uri = self.conf['access_bucket'] + "/" + s3_key + access_obj_uri = self.access_bucket + "/" + s3_key link1 = Link(linkName="Amazon S3", linkUrl=access_obj_uri, linkProtocol="HTTPS", linkFunction="download") link2 = Link(linkName="Amazon S3", linkUrl=s3_obj_uri, linkProtocol="Amazon:AWS:S3", linkFunction="download") # To Change? Come back to this later - parent_identifier = self.conf['collection_id'] - file_identifier = self.conf['file_identifier_prefix'] + file_name[:-4] + parent_identifier = self.collection_id + file_identifier = self.file_id_prefix + file_name[:-4] # Initializing most fields to their default values in the avro schema so that it doesn't cause an error in Kafka discovery = Discovery(links=[link1, link2], title=file_name, parentIdentifier=parent_identifier, diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index 7bb0fbe..60fb876 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -1,5 +1,5 @@ import logging -import yaml + import uuid import boto3 import botocore @@ -15,69 +15,70 @@ class S3Utils: Attributes ---------- - conf: yaml file - aws-util-config-dev.yml - cred: yaml file - credentials.yml - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated + access_key: str + Cloud access key + + secret_key: str + Cloud secret key + + log_level: str + The log level to use for this class (Defaults to 'INFO') + + logger: ClientLogger object + Creates logging for us to log to. Methods ------- - connect(client_type, region) - connects to a boto3 client + connect(client_type, region) + connects to a boto3 client - objectkey_exists(bucket, s3_key) - checks to see if a s3 key path exists in a particular bucket + objectkey_exists(bucket, s3_key) + checks to see if a s3 key path exists in a particular bucket - get_uuid_metadata(boto_client, bucket, s3_key) - returns metadata uuid of an s3 object if it has one, otherwise prints that one does not exist + get_uuid_metadata(boto_client, bucket, s3_key) + returns metadata uuid of an s3 object if it has one, otherwise prints that one does not exist - add_uuid_metadata(boto_client, bucket, s3_key) - adds metadata uuid to an s3 object + add_uuid_metadata(boto_client, bucket, s3_key) + adds metadata uuid to an s3 object - upload_s3(boto_client, local_file, bucket, s3_key, overwrite) - uploads a file to s3 bucket + upload_s3(boto_client, local_file, bucket, s3_key, overwrite) + uploads a file to s3 bucket - get_csv_s3(boto_client, bucket, key) - gets a csv file from s3 bucket using smart open library + get_csv_s3(boto_client, bucket, key) + gets a csv file from s3 bucket using smart open library - read_bytes_s3(boto_client, bucket, key) - returns raw information of s3 object + read_bytes_s3(boto_client, bucket, key) + returns raw information of s3 object - upload_archive(boto_client, vault_name, src_data) - Add an archive to an Amazon S3 Glacier vault. The upload occurs synchronously. + upload_archive(boto_client, vault_name, src_data) + Add an archive to an Amazon S3 Glacier vault. The upload occurs synchronously. - s3_to_glacier(boto_client, bucket_name, key) - Changes storage class of s3 object from s3 -> glacier. Utilizes s3 client type + s3_to_glacier(boto_client, bucket_name, key) + Changes storage class of s3 object from s3 -> glacier. Utilizes s3 client type - s3_to_glacier_object_lock(boto_client, bucket_name, key, object_lock_mode, object_lock_retention) - Changes storage class of s3 object from s3 -> glacier and places it in object lock mode. Utilizes s3 client type + s3_to_glacier_object_lock(boto_client, bucket_name, key, object_lock_mode, object_lock_retention) + Changes storage class of s3 object from s3 -> glacier and places it in object lock mode. Utilizes s3 client type - s3_restore(boto_client, bucket_name, key, days) - Restores an object in S3 glacier back to S3 for specified amount of days + s3_restore(boto_client, bucket_name, key, days) + Restores an object in S3 glacier back to S3 for specified amount of days - retrieve_inventory(boto_client, vault_name) - Initiate an Amazon Glacier inventory-retrieval job + retrieve_inventory(boto_client, vault_name) + Initiate an Amazon Glacier inventory-retrieval job - retrieve_inventory_results(vault_name, boto_client, job_id) - Retrieve the results of an Amazon Glacier inventory-retrieval job + retrieve_inventory_results(vault_name, boto_client, job_id) + Retrieve the results of an Amazon Glacier inventory-retrieval job """ conf = None - def __init__(self, conf_loc, cred_loc): - - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - with open(cred_loc) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False) + def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): + self.access_key = access_key + self.secret_key = secret_key + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) + if wildargs: + self.logger.error("There were extra constructor arguments: " + str(wildargs)) + def connect(self, client_type, region): """ Connects to a boto3 client @@ -92,21 +93,29 @@ def connect(self, client_type, region): """ if client_type == "s3": - boto = boto3.client("s3", aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key'], region_name=region) + boto = boto3.client( + "s3", + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, + region_name=region) if client_type == "s3_resource": - boto = boto3.resource("s3", region_name=region, aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key'] ) + boto = boto3.resource( + "s3", + region_name=region, + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key) if client_type == "glacier": - boto = boto3.client("glacier", region_name=region, aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key']) + boto = boto3.client( + "glacier", + region_name=region,aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key) if client_type == "session": boto = boto3.Session( - aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key'], + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, ) return boto diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/tests/SqsHandlersTest.py index 12323ef..bbe4210 100644 --- a/onestop-python-client/tests/SqsHandlersTest.py +++ b/onestop-python-client/tests/SqsHandlersTest.py @@ -1,7 +1,7 @@ import json import unittest import boto3 - +import yaml from moto import mock_s3 from moto import mock_sqs from tests.utils import abspath_from_relative, create_delete_message @@ -54,9 +54,22 @@ class SqsHandlerTest(unittest.TestCase): def setUp(self): print("Set it up!") + + with open(abspath_from_relative(__file__, "../config/csb-data-stream-config-template.yml")) as f: + self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../config/aws-util-config-dev.yml")) as f: + self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../config/credentials-template.yml")) as f: + self.cred = yaml.load(f, Loader=yaml.FullLoader) + self.wp = WebPublisher(self.wp_config, self.cred_config) - self.su = S3Utils(self.aws_config, self.cred_config) - self.s3ma = S3MessageAdapter(self.csb_config, self.su) + self.su = S3Utils(self.cred['sandbox']['access_key'], + self.cred['sandbox']['secret_key'], + "DEBUG") + self.s3ma = S3MessageAdapter(self.stream_conf['access_bucket'], + self.stream_conf['type'], + self.stream_conf['file_identifier_prefix'], + self.stream_conf['collection_id']) def tearDown(self): print("Tear it down!") @@ -64,19 +77,21 @@ def tearDown(self): @mock_s3 @mock_sqs def init_s3(self): - bucket = self.su.conf['s3_bucket'] - key = self.su.conf['s3_key'] + bucket = self.cloud_conf['s3_bucket'] + key = self.cloud_conf['s3_key'] boto_client = self.su.connect("s3", None) boto_client.create_bucket(Bucket=bucket) boto_client.put_object(Bucket=bucket, Key=key, Body="foobar") - sqs_client = boto3.client('sqs', region_name=self.su.conf['s3_region']) - sqs_queue = sqs_client.create_queue(QueueName=self.su.conf['sqs_name']) + sqs_client = boto3.client('sqs', region_name=self.cloud_conf['s3_region']) + sqs_queue = sqs_client.create_queue(QueueName=self.cloud_conf['sqs_name']) self.sqs = SqsConsumer(self.aws_config, self.cred_config) - message = create_delete_message(self.su.conf['s3_region'], bucket, key) + message = create_delete_message(self.cloud_conf['s3_region'], bucket, key) sqs_client.send_message(QueueUrl=sqs_queue['QueueUrl'], MessageBody=json.dumps(message)) - return sqs_queue['QueueUrl'] + sqs_queue['QueueUrl'] + @mock_s3 + @mock_sqs def delete_handler_wrapper(self, recs): handler = create_delete_handler(self.wp) result = handler(recs) @@ -85,5 +100,8 @@ def delete_handler_wrapper(self, recs): @mock_sqs def test_delete_handler(self): mock_queue_url = self.init_s3() - sqs_queue = boto3.resource('sqs', region_name=self.su.conf['s3_region']).Queue(mock_queue_url) - self.sqs.receive_messages(sqs_queue, self.su.conf['sqs_max_polls'], self.delete_handler_wrapper) + sqs_queue = boto3.resource('sqs', region_name=self.stream_conf['s3_region']).Queue(mock_queue_url) + self.sqs.receive_messages(sqs_queue, self.stream_conf['sqs_max_polls'], self.delete_handler_wrapper) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/tests/util/S3MessageAdapterTest.py index 41a8f9d..a960737 100644 --- a/onestop-python-client/tests/util/S3MessageAdapterTest.py +++ b/onestop-python-client/tests/util/S3MessageAdapterTest.py @@ -1,4 +1,6 @@ import unittest +import yaml + from moto import mock_s3 from tests.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils @@ -51,22 +53,35 @@ class S3MessageAdapterTest(unittest.TestCase): def setUp(self): print("Set it up!") - self.s3_utils = S3Utils(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"), - abspath_from_relative(__file__, "../../config/credentials-template.yml")) - self.s3ma = S3MessageAdapter(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml"), - self.s3_utils) + + with open(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml")) as f: + self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml")) as f: + self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../../config/credentials-template.yml")) as f: + self.cred = yaml.load(f, Loader=yaml.FullLoader) + + self.s3_utils = S3Utils(self.cred['sandbox']['access_key'], + self.cred['sandbox']['secret_key'], + "DEBUG") + self.s3ma = S3MessageAdapter(self.stream_conf['access_bucket'], + self.stream_conf['type'], + self.stream_conf['file_identifier_prefix'], + self.stream_conf['collection_id']) + + self.region = self.cloud_conf['s3_region'] + self.bucket = self.cloud_conf['s3_bucket'] def tearDown(self): print("Tear it down!") def test_parse_config(self): - self.assertFalse(self.s3ma.conf['collection_id']==None) - + self.assertFalse(self.stream_conf['collection_id'] == None) @mock_s3 def test_transform(self): - s3 = self.s3_utils.connect('s3', self.s3_utils.conf['s3_region']) - location = {'LocationConstraint': self.s3_utils.conf['s3_region']} + s3 = self.s3_utils.connect('s3', self.region) + location = {'LocationConstraint': self.region} bucket = 'nesdis-ncei-csb-dev' key = 'csv/file1.csv' key2 = 'csv/file2.csv' @@ -81,4 +96,14 @@ def test_transform(self): print(payload) self.assertTrue(payload!=None) + @mock_s3 + def test_extra_parameters_constructor(self): + testParams = {"access_bucket": "blah1", + "type": "blah2", + "file_id_prefix": "blah3", + "collection_id": "blah4", + "extra": "extra value"} + self.assertRaises(Exception, S3MessageAdapter(**testParams)) +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py index 34850ad..acb0af4 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/tests/util/S3UtilsTest.py @@ -1,126 +1,130 @@ import csv import unittest import uuid +import yaml + from moto import mock_s3 from moto import mock_glacier - from tests.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils class S3UtilsTest(unittest.TestCase): - su = None def setUp(self): print("Set it up!") - self.su = S3Utils(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"), - abspath_from_relative(__file__, "../../config/credentials.yml")) - def tearDown(self): - print("Tear it down!") - # Remove files from bucket + with open(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml")) as f: + self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml")) as f: + self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../../config/credentials-template.yml")) as f: + self.cred = yaml.load(f, Loader=yaml.FullLoader) - def test_parse_config(self): - self.assertFalse(self.su.conf['sqs_url']==None) + self.s3_utils = S3Utils(self.cred['sandbox']['access_key'], + self.cred['sandbox']['secret_key'], + "DEBUG") + + self.region = self.cloud_conf['s3_region'] + self.region2 = self.region + self.bucket = self.cloud_conf['s3_bucket'] @mock_s3 def test_get_uuid_metadata(self): - boto_client = self.su.connect("s3_resource", None) + boto_client = self.s3_utils.connect("s3_resource", None) s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - region = self.su.conf['s3_region'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) + + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) obj_uuid = str(uuid.uuid4()) - boto_client.Object(bucket, s3_key).put(Bucket=bucket, Key=s3_key, Body="my_body", Metadata={'object-uuid': obj_uuid}) + boto_client.Object(self.bucket, s3_key).put(Bucket=self.bucket, Key=s3_key, Body="my_body", Metadata={'object-uuid': obj_uuid}) - self.assertFalse(self.su.get_uuid_metadata(boto_client, bucket, s3_key) == None) + self.assertFalse(self.s3_utils.get_uuid_metadata(boto_client, self.bucket, s3_key) == None) @mock_s3 def test_add_uuid_metadata(self): - region = self.su.conf['s3_region'] - boto_client = self.su.connect("s3_resource", region) + boto_client = self.s3_utils.connect("s3_resource", self.region) s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - boto_client.Object(bucket, s3_key).put(Bucket=bucket, Key=s3_key, Body="my_body") + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + boto_client.Object(self.bucket, s3_key).put(Bucket=self.bucket, Key=s3_key, Body="my_body") - self.assertTrue(self.su.add_uuid_metadata(boto_client, bucket, s3_key)) + self.assertTrue(self.s3_utils.add_uuid_metadata(boto_client, self.bucket, s3_key)) @mock_s3 def test_add_file_s3(self): - boto_client = self.su.connect("s3", None) + boto_client = self.s3_utils.connect("s3", None) local_file = abspath_from_relative(__file__, "../data/file4.csv") s3_key = "csv/file4.csv" - bucket = self.su.conf['s3_bucket'] - region = self.su.conf['s3_region'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) overwrite = True - self.assertTrue(self.su.upload_s3(boto_client, local_file, bucket, s3_key, overwrite)) + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_key, overwrite)) + @mock_s3 def test_get_csv_s3(self): - boto_client = self.su.connect("session", None) + boto_session = self.s3_utils.connect("session", None) + s3 = self.s3_utils.connect('s3', self.cloud_conf['s3_region']) + location = {'LocationConstraint': self.region} s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - sm_open_file = self.su.get_csv_s3(boto_client, bucket, s3_key) + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.put_object(Bucket=self.bucket, Key=s3_key, Body="body") + + sm_open_file = self.s3_utils.get_csv_s3(boto_session, self.bucket, s3_key) # print("reading csv:" + line.decode('utf-8')) csv_reader = csv.DictReader(sm_open_file) for row in csv_reader: print(str(row["LON"])) + @mock_s3 def test_read_bytes_s3(self): - boto_client = self.su.connect("s3", None) + boto_client = self.s3_utils.connect("s3", None) s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - self.assertTrue(self.su.read_bytes_s3(boto_client, bucket, s3_key)) + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) + boto_client.put_object(Bucket=self.bucket, Key=s3_key, Body="body") + + self.assertTrue(self.s3_utils.read_bytes_s3(boto_client, self.bucket, s3_key)) @mock_s3 def test_add_files(self): - boto_client = self.su.connect("s3", None) + boto_client = self.s3_utils.connect("s3", None) local_files = ["file1_s3.csv", "file2.csv", "file3.csv"] - bucket = self.su.conf['s3_bucket'] - region = self.su.conf['s3_region'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) overwrite = True - s3_file = None + for file in local_files: local_file = abspath_from_relative(__file__, "../data/" + file) s3_file = "csv/" + file - self.assertTrue(self.su.upload_s3(boto_client, local_file, bucket, s3_file, overwrite)) + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_file, overwrite)) @mock_s3 @mock_glacier def test_s3_cross_region(self): print('Cross Region Vault Upload ------------- ') key = "csv/file1.csv" - # grabs te region and bucket name from the config file - region = self.su.conf['s3_region'] - bucket = self.su.conf['s3_bucket'] # makes connection to low level s3 client - s3 = self.su.connect('s3', region) - location = {'LocationConstraint': region} - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.put_object(Bucket=bucket, Key=key, Body="body") + s3 = self.s3_utils.connect('s3', self.region) + location = {'LocationConstraint': self.region} + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.put_object(Bucket=self.bucket, Key=key, Body="body") # Reads object data and stores it into a variable - file_data = self.su.read_bytes_s3(s3, bucket, key) + file_data = self.s3_utils.read_bytes_s3(s3, self.bucket, key) # Redirecting upload to vault in second region - glacier = self.su.connect("glacier", self.su.conf['s3_region2']) - vault_name = self.su.conf['vault_name'] + glacier = self.s3_utils.connect("glacier", self.region2) + vault_name = self.cloud_conf['vault_name'] glacier.create_vault(vaultName=vault_name) print('vault name: ' + str(vault_name)) - print('region name: ' + str(self.su.conf['s3_region2'])) + print('region name: ' + str(self.region2)) print('-------file data---------') print(file_data) - response = self.su.upload_archive(glacier, vault_name, file_data) + response = self.s3_utils.upload_archive(glacier, vault_name, file_data) self.assertTrue(response['archiveId']!=None) @@ -134,18 +138,15 @@ def test_s3_to_glacier(self): print("S3 to Glacier---------") key = "csv/file1_s3.csv" - # grabs te region and bucket name from the config file - region = self.su.conf['s3_region'] - bucket = self.su.conf['s3_bucket'] # Create boto3 low level api connection - s3 = self.su.connect('s3', region) - location = {'LocationConstraint': region} - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.put_object(Bucket=bucket, Key=key, Body="body") + s3 = self.s3_utils.connect('s3', self.region) + location = {'LocationConstraint': self.region} + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.put_object(Bucket=self.bucket, Key=key, Body="body") # Using the S3 util class invoke the change of storage class - response = self.su.s3_to_glacier(s3, bucket, key) + response = self.s3_utils.s3_to_glacier(s3, self.bucket, key) print(response['ResponseMetadata']['HTTPHeaders']['x-amz-storage-class']) # Assert 'x-amz-storage-class': 'GLACIER' @@ -157,18 +158,16 @@ def test_s3_restore(self): Uses high level api to restore object from glacier to s3 """ - region = self.su.conf['s3_region2'] - bucket = self.su.conf['s3_bucket'] key = "csv/file1_s3.csv" days = 3 # use high level api - s3 = self.su.connect('s3_resource', region) - location = {'LocationConstraint': region} - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.Object(bucket, key).put(Bucket=bucket, Key=key, Body="body") + s3 = self.s3_utils.connect('s3_resource', self.region2) + location = {'LocationConstraint': self.region2} + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.Object(self.bucket, key).put(Bucket=self.bucket, Key=key, Body="body") - self.assertTrue(self.su.s3_restore(s3, bucket, key, days) != None) + self.assertTrue(self.s3_utils.s3_restore(s3, self.bucket, key, days) != None) @mock_glacier def test_retrieve_inventory(self): @@ -178,12 +177,12 @@ def test_retrieve_inventory(self): # Using glacier api initiates job and returns archive results # Connect to your glacier vault for retrieval - glacier = self.su.connect("glacier", self.su.conf['s3_region2']) - vault_name = self.su.conf['vault_name'] + glacier = self.s3_utils.connect("glacier", self.region2) + vault_name = self.cloud_conf['vault_name'] glacier.create_vault(vaultName=vault_name) - response = self.su.retrieve_inventory(glacier, vault_name) + response = self.s3_utils.retrieve_inventory(glacier, vault_name) self.assertTrue(response['jobId']!= None) ''' @@ -203,7 +202,13 @@ def test_retrieve_inventory_results(self, jobid): self.assertTrue(inventory != None) ''' - + @mock_s3 + def test_extra_parameters_constructor(self): + testParams = {"access_key": "blah", + "secret_key": "blah", + "log_level": "DEBUG", + "extra": "extra value"} + self.assertRaises(Exception, S3Utils(**testParams)) if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/scripts/launch_e2e.py b/scripts/launch_e2e.py index 2d5b79b..6d60b2c 100644 --- a/scripts/launch_e2e.py +++ b/scripts/launch_e2e.py @@ -1,6 +1,8 @@ import argparse import json import os +import yaml + from onestop.util.SqsConsumer import SqsConsumer from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter @@ -55,8 +57,8 @@ def handler(recs): # Upload to archive file_data = s3_utils.read_bytes_s3(s3_client, bucket, s3_key) - glacier = s3_utils.connect("glacier", s3_utils.conf['s3_region']) - vault_name = s3_utils.conf['vault_name'] + glacier = s3_utils.connect("glacier", cloud_conf['s3_region']) + vault_name = cloud_conf['vault_name'] resp_dict = s3_utils.upload_archive(glacier, vault_name, file_data) @@ -106,9 +108,9 @@ def handler(recs): # High-level api s3_resource = s3_utils.connect("s3_resource", None) - bucket = s3_utils.conf['s3_bucket'] + bucket = cloud_conf['s3_bucket'] overwrite = True - sqs_max_polls = s3_utils.conf['sqs_max_polls'] + sqs_max_polls = cloud_conf['sqs_max_polls'] # Add 3 files to bucket local_files = ["file1.csv", "file4.csv"] s3_file = None @@ -141,18 +143,35 @@ def handler(recs): # Get configuration file path locations conf_loc = args.pop('conf') cred_loc = args.pop('cred') + stream_conf_loc = args.pop('cred') - # Upload a test file to s3 bucket - s3_utils = S3Utils(conf_loc, cred_loc) + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), cred_loc))) as f: + cred = yaml.load(f, Loader=yaml.FullLoader) + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), conf_loc))) as f: + cloud_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), stream_conf_loc))) as f: + stream_conf = yaml.load(f, Loader=yaml.FullLoader) - # Low-level api ? Can we just use high level revisit me! - s3_client = s3_utils.connect("s3", None) + s3_utils = S3Utils(cred['sandbox']['access_key'], + cred['sandbox']['secret_key'], + "DEBUG") - bucket = s3_utils.conf['s3_bucket'] + bucket = cloud_conf['s3_bucket'] + sqs_max_polls = cloud_conf['sqs_max_polls'] - sqs_max_polls = s3_utils.conf['sqs_max_polls'] + #Source + access_bucket = stream_conf['access_bucket'] - # Add 3 files to bucket + #Onestop related + file_id_prefix = stream_conf['file_identifier_prefix'] + file_format = stream_conf['format'] + headers = stream_conf['headers'] + type = stream_conf['type'] + + # Low-level api ? Can we just use high level revisit me! + s3_client = s3_utils.connect("s3", None) + + # Upload test files to s3 bucket local_files = ["file1.csv", "file4.csv"] s3_file = None for file in local_files: @@ -162,9 +181,11 @@ def handler(recs): if not s3_utils.upload_s3(s3_client, local_file, bucket, s3_file, True): exit("Error setting up for e2e: The test files were not uploaded to the s3 bucket therefore the tests cannot continue.") + + # Receive s3 message and MVM from SQS queue sqs_consumer = SqsConsumer(conf_loc, cred_loc) - s3ma = S3MessageAdapter("config/csb-data-stream-config.yml", s3_utils) + s3ma = S3MessageAdapter(access_bucket, headers, type, file_id_prefix, "DEBUG") wp = WebPublisher("config/web-publisher-config-dev.yml", cred_loc) queue = sqs_consumer.connect() diff --git a/scripts/launch_pyconsumer.py b/scripts/launch_pyconsumer.py index f9dbcf6..7850f38 100644 --- a/scripts/launch_pyconsumer.py +++ b/scripts/launch_pyconsumer.py @@ -1,4 +1,6 @@ import os +import yaml + from onestop.util.SqsConsumer import SqsConsumer from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter @@ -49,6 +51,10 @@ def handler(recs): if __name__ == '__main__': conf_loc = "/etc/config/config.yml" cred_loc = "creds.yml" + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "creds.yml"))) as f: + cred = yaml.load(f, Loader=yaml.FullLoader) + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "/etc/config/config.yml"))) as f: + conf = yaml.load(f, Loader=yaml.FullLoader) registry_user = os.environ.get("REGISTRY_USERNAME") registry_pwd = os.environ.get("REGISTRY_PASSWORD") @@ -71,8 +77,10 @@ def handler(recs): r = open(cred_loc, "r") # # Receive s3 message and MVM from SQS queue - s3_utils = S3Utils(conf_loc, cred_loc) - sqs_max_polls = s3_utils.conf['sqs_max_polls'] + s3_utils = S3Utils(cred['sandbox']['access_key'], + cred['sandbox']['secret_key'], + "DEBUG") + sqs_max_polls = conf['sqs_max_polls'] sqs_consumer = SqsConsumer(conf_loc, cred_loc) queue = sqs_consumer.connect() From 47d9d335752ac6169849547d941e75958f94ddc7 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 15 Apr 2021 17:05:13 -0600 Subject: [PATCH 002/100] 1500-WebPublisher adjusted some documentation wording and added test_WebPublisher_unit as unit test until create folder structure for integration vs unit tests. --- onestop-python-client/onestop/WebPublisher.py | 28 ++-- .../tests/test_WebPublisher_unit.py | 145 ++++++++++++++++++ 2 files changed, 159 insertions(+), 14 deletions(-) create mode 100644 onestop-python-client/tests/test_WebPublisher_unit.py diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index 55ca06c..d944f8f 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -8,28 +8,28 @@ class WebPublisher: Attributes ---------- registry_base_url: str - url for registry endpoint + URL for registry endpoint registry_username: str - username for posting metadata to registry + Registry username where credentials needed registry_password: str - password for posting metadata to registry + Registry password where credentials needed onestop_base_url: str - url for onestop endpoint + URL for OneStop endpoint logger.info: str logging level Methods ------- publish_registry(metadata_type, uuid, payload, method) - Publish to registry with either POST,PUT, OR PATCH methods + Publish an item to registry with either POST, PUT, OR PATCH methods delete_registry(metadata_type, uuid) - Deletes item from registry + Delete an item from registry search_registry(metadata_type, uuid) - Searches for an item in registry given its metadata type and uuid + Search for an item in registry given its metadata type and uuid search_onestop(metadata_type, payload) - Acquires the item, collection or granule, from OneStop + Search for an item in OneStop given its metadata type and payload search criteria get_granules_onestop(self, uuid) - Acquires granules from OneStop given the uuid + Search for a granule in OneStop given its uuid """ conf = None @@ -84,12 +84,12 @@ def publish_registry(self, metadata_type, uuid, payload, method): def delete_registry(self, metadata_type, uuid): """ - Deletes item from registry + Delete an item from registry :param metadata_type: str metadata type (GRANULE/COLLECTION) :param uuid: str - uuid you want to publish with + uuid you want to delete :return: str response message indicating if delete was successful @@ -105,7 +105,7 @@ def delete_registry(self, metadata_type, uuid): def search_registry(self, metadata_type, uuid): """ - Searches for an item in registry given its metadata type and uuid + Search for an item in registry given its metadata type and uuid :param metadata_type: str metadata type (GRANULE/COLLECTION) @@ -126,7 +126,7 @@ def search_registry(self, metadata_type, uuid): def search_onestop(self, metadata_type, payload): """ - Searches for an item in OneStop given its metadata type and payload search criteria. + Search for an item in OneStop given its metadata type and payload search criteria. :param metadata_type: str metadata type (GRANULE/COLLECTION) @@ -147,7 +147,7 @@ def search_onestop(self, metadata_type, payload): def get_granules_onestop(self, uuid): """ - Searches for a granule in OneStop given its uuid + Search for a granule in OneStop given its uuid :param uuid: str uuid you want search for diff --git a/onestop-python-client/tests/test_WebPublisher_unit.py b/onestop-python-client/tests/test_WebPublisher_unit.py new file mode 100644 index 0000000..3e987fb --- /dev/null +++ b/onestop-python-client/tests/test_WebPublisher_unit.py @@ -0,0 +1,145 @@ +import json +import unittest + +from unittest.mock import ANY +from unittest import mock +from moto import mock_s3 +from onestop.WebPublisher import WebPublisher + +class WebPublisherTest(unittest.TestCase): + username="admin" + password="a_password" + uuid = "9f0a5ff2-fcc0-5bcb-a225-024b669c9bba" + registry_base_url = "https://localhost/onestop/api/registry" + registry_full_url_granule = registry_base_url + "/metadata/granule/" + uuid + registry_full_url_collection = registry_base_url + "/metadata/collection/" + uuid + onestop_base_url = "https://localhost/onestop/api/search" + + payloadDict = { + "fileInformation": { + "name": "file2.csv", + "size": 1385, + "checksums": [{ + "algorithm": "MD5", + "value": "44d2452e8bc2c8013e9c673086fbab7a" + }] + }, + "relationships": [ + {"type": "COLLECTION", + "id": "fdb56230-87f4-49f2-ab83-104cfd073177" + } + ], + "fileLocations": { + "nesdis-ncei-csb-dev/csv/file2.csv": { + "uri": "https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com/csv/file2.csv", + "type": "ACCESS", + "restricted": False, + "serviceType": "HTTPS", + "asynchronous": False + } + }, + "discovery": { + "title": "file2.csv", + "parentIdentifier": "fdb56230-87f4-49f2-ab83-104cfd073177", + "fileIdentifier": "gov.noaa.ncei.csb:file2" + } + } + + addlocDict = { + "fileLocations": { + "Crt3a-Hq2SGUp8n8QSRNpFIf59kmMONqaKlJ_7-Igd8ijMM62deLdtVkiYwlaePbC4JNCsfeg5i-DWDmwxLIx9V-OGgiQp_CZ0rEFXIZxM_ZPyGu7TTv8wwos5SvAI6xDURhzoCH-w": { + "uri": "/282856304593/vaults/noaa-nesdis-ncei-vault-test/archives/Crt3a-Hq2SGUp8n8QSRNpFIf59kmMONqaKlJ_7-Igd8ijMM62deLdtVkiYwlaePbC4JNCsfeg5i-DWDmwxLIx9V-OGgiQp_CZ0rEFXIZxM_ZPyGu7TTv8wwos5SvAI6xDURhzoCH-w", + "type": "ACCESS", + "restricted": True, + "serviceType": "Amazon:AWS:Glacier", + "asynchronous": True + } + } + } + + + def setUp(self): + print("Set it up!") + + self.wp = WebPublisher(self.registry_base_url, + self.username, + self.password, + self.onestop_base_url, + 'DEBUG') + + def tearDown(self): + print("Tear it down!") + + def mocked_requests_patch(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + print ("args: "+str(args)+" kwargs: "+str(kwargs)) + + return MockResponse({"key1":"value1"}, 200) + + @mock_s3 + @mock.patch('requests.post', side_effect=mocked_requests_patch) + def test_publish(self, mock_get): + payload = json.dumps(self.payloadDict) + self.wp.publish_registry("granule", self.uuid, payload, "POST") + + mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.put', side_effect=mocked_requests_patch) + def test_publish(self, mock_get): + payload = json.dumps(self.payloadDict) + self.wp.publish_registry("granule", self.uuid, payload, "PUT") + + mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.patch', side_effect=mocked_requests_patch) + def test_add_glacier_location(self, mock_get): + payload = json.dumps(self.addlocDict) + self.wp.publish_registry("granule", self.uuid, payload, "PATCH") + + mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.delete', side_effect=mocked_requests_patch) + def test_delete_registry_granule(self, mock_get): + self.wp.delete_registry("granule", self.uuid) + + mock_get.assert_called_with(url = self.registry_full_url_granule, headers = ANY, auth = ANY, verify = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.delete', side_effect=mocked_requests_patch) + def test_delete_registry_collection(self, mock_get): + self.wp.delete_registry("collection", self.uuid) + + mock_get.assert_called_with(url = self.registry_full_url_collection, headers = ANY, auth = ANY, verify = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 85a9096d5415bd606934c9d00c7a69b0722f764d Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 15 Apr 2021 17:17:05 -0600 Subject: [PATCH 003/100] 1500-Adjusted documentation indentation in WebPublisher --- onestop-python-client/onestop/WebPublisher.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index d944f8f..75ee99f 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -7,29 +7,29 @@ class WebPublisher: Attributes ---------- - registry_base_url: str - URL for registry endpoint - registry_username: str - Registry username where credentials needed - registry_password: str - Registry password where credentials needed - onestop_base_url: str - URL for OneStop endpoint - logger.info: str - logging level + registry_base_url: str + URL for registry endpoint + registry_username: str + Registry username where credentials needed + registry_password: str + Registry password where credentials needed + onestop_base_url: str + URL for OneStop endpoint + logger.info: str + logging level Methods ------- - publish_registry(metadata_type, uuid, payload, method) - Publish an item to registry with either POST, PUT, OR PATCH methods - delete_registry(metadata_type, uuid) - Delete an item from registry - search_registry(metadata_type, uuid) - Search for an item in registry given its metadata type and uuid - search_onestop(metadata_type, payload) - Search for an item in OneStop given its metadata type and payload search criteria - get_granules_onestop(self, uuid) - Search for a granule in OneStop given its uuid + publish_registry(metadata_type, uuid, payload, method) + Publish an item to registry with either POST, PUT, OR PATCH methods + delete_registry(metadata_type, uuid) + Delete an item from registry + search_registry(metadata_type, uuid) + Search for an item in registry given its metadata type and uuid + search_onestop(metadata_type, payload) + Search for an item in OneStop given its metadata type and payload search criteria + get_granules_onestop(self, uuid) + Search for a granule in OneStop given its uuid """ conf = None From 85ada2290c305218bbe93cae1cccb3e7c622b7e2 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 16 Apr 2021 15:52:18 -0600 Subject: [PATCH 004/100] 1500-Changed CsbExtractor class constructor(adjusted documentation) to take dictionary with extra parameters allowed as well as methods within this class not to reference config but the variable that was set. Adjusted effected tests. --- .../onestop/extract/CsbExtractor.py | 127 ++++++------------ .../tests/extractor/CsbExtractorTest.py | 85 +++++++----- 2 files changed, 98 insertions(+), 114 deletions(-) diff --git a/onestop-python-client/onestop/extract/CsbExtractor.py b/onestop-python-client/onestop/extract/CsbExtractor.py index e79cddc..b1006cb 100644 --- a/onestop-python-client/onestop/extract/CsbExtractor.py +++ b/onestop-python-client/onestop/extract/CsbExtractor.py @@ -2,61 +2,33 @@ from datetime import datetime class CsbExtractor: + """ A class used to extract geospatial data from csv files in an s3 bucket - Attributes - ---------- - su : S3 Utils object - an instance of the s3 utils class used to connect to the corresponding s3 bucket to get access to the csv file for extraction - boto_client: boto3 client - specific boto3 client type (s3, s3_resource, glacier, session) used to access aws resources - bucket: str - the name of the s3 bucket in which you want to access - key: str - the name of key path for the specific item you want to access in the bucket - - Methods ------- is_csv(file_name) - checks to see if the given file is of type csv + Verifies a file name ends with '.csv' get_spatial_temporal_bounds(lon_column_name, lat_column_name, date_column_name) - extracts min/max longitude and latitude values as well as beginning and ending dates from specified csv file + Gets the spacial bounding box for the open file. This seeks to the start of the file at start and the end. extract_coords(max_lon, max_lat, min_lon, min_lat) - extracts specific coordinates corresponding to min/max longitude and latitude values given from get_spatial_temporal_bounds(....) method + Given the max/min lon and lat, the function will parse the csv file to extract the coordinates within the given bounding box. """ - def __init__(self, su, key): - """ - :param su: S3 Utils object - an instance of the s3 utils class used to connect to the corresponding s3 bucket to get access to the csv file for extraction - :param key: str - the name of key path for the specific item you want to access in the bucket - - Other Attributes - ________________ - boto_client: boto3 client - specific boto3 client type (s3, s3_resource, glacier, session) used to access aws resources - bucket: str - the name of the s3 bucket in which you want to access + @staticmethod + def is_csv(file_name): """ - self.su = su - boto_client = self.su.connect("session", None) - bucket = self.su.conf['s3_bucket'] - self.key = key - - def is_csv(self, file_name): - """ - Checks to see if the given file is of type csv + Verifies a file name ends with '.csv' :param file_name: str - the name of the file in the s3 bucket i.e. file1.csv + File name with extension on the end. - :return: boolean - True if the file name contains .csv and False otherwise + :return: str + True if ends with csv + False if doesn't end with csv """ csv_str = '.csv' if file_name.endswith(csv_str): @@ -64,28 +36,22 @@ def is_csv(self, file_name): return False - # def smart_open_read(self, key): - # boto_client = self.su.connect("session", None) - # bucket = self.su.conf['s3_bucket'] - # self.su.read_csv_s3(boto_client, bucket, key) - - - def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_column_name): + @staticmethod + def get_spatial_temporal_bounds(sm_open_file, lon_column_name, lat_column_name, date_column_name): """ - Extracts min/max longitude and latitude values as well as beginning and ending dates from specified csv file + Gets the spacial bounding box for the open file. This seeks to the start of the file at start and the end. + :param sm_open_file: file-like object + A file-like object that is open, say from smart_open's sm_open. :param lon_column_name: str - name of longitude column in the csv file + Longitude column name :param lat_column_name: str - name of the latitude column in the csv file + Latitude column name :param date_column_name: str - name of the date column in the csv file + Date column name :return: dict - Key : Value - geospatial (str) -> List[float] containing min/max longitude and latitude values - temporal (str) -> List[str] containing beginning and end dates - + geospatial and temporal fields of the bounding box for given constraints. """ lon_min_val = None lon_max_val = None @@ -99,9 +65,7 @@ def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_col # variable to be returned in string format begin_date_str = '' - boto_client = self.su.connect("session", None) - bucket = self.su.conf['s3_bucket'] - sm_open_file = self.su.get_csv_s3(boto_client, bucket, self.key) + sm_open_file.seek(0) csv_reader = csv.DictReader(sm_open_file) for row in csv_reader: @@ -151,43 +115,40 @@ def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_col "temporal": [begin_date_str, end_date_str] } + sm_open_file.seek(0) return geospatial_temporal_bounds - - def extract_coords(self, max_lon, max_lat, min_lon, min_lat): + @staticmethod + def extract_coords(sm_open_file, max_lon, max_lat, min_lon, min_lat): """ - Extracts specific coordinates corresponding to min/max longitude and latitude values given from get_spatial_temporal_bounds(....) method - - :param max_lon: float - maximum longitude value - :param max_lat: float - maximum latitude value - :param min_lon: float - minimum longitude value - :param min_lat: float - minimum latitude value - - :return: List[ List[Float] ] - Returns a list of lists. Each list contains floats (longitude and latitude ) value pairs corresponding to - one of the min/max latitude and longitude values that were extracted previously from get_spatial_temporal_bounds (...) + Given the max/min lon and lat, the function will parse the csv file to extract the coordinates within the given bounding box. + + :param sm_open_file: file-like object + A file-like object that is open, say from smart_open's sm_open. + :param max_lon: str + Maximum longitude + :param max_lat: str + Maximum latitude + :param min_lon: str + Minimum longitude + :param min_lat: str + Minimum latitude + + :return: list + List of the the coordinates (no duplicates) within the file that are within the given bounding box. """ - # Keeps track of all coordinates that needs to be added to json payload coords = [] - boto_client = self.su.connect("session", None) - bucket = self.su.conf['s3_bucket'] - sm_open_file = self.su.get_csv_s3(boto_client, bucket, self.key) + sm_open_file.seek(0) csv_reader = csv.DictReader(sm_open_file) - for row in csv_reader: - if float( row['LAT'] ) == min_lat or float( row['LAT'] ) == max_lat or float( - row['LON'] ) == min_lon or float( row['LON'] ) == max_lon: + if float( row['LAT'] ) == min_lat or float( row['LAT'] ) == max_lat or \ + float( row['LON'] ) == min_lon or float( row['LON'] ) == max_lon: coord = [float( row['LON'] ), float( row['LAT'] )] - - # check to see if that coordinate has already been appended to the list that is keeping track of our coordinates + # if this coordinate has already been appended to the list to return (no duplicates) if coord not in coords: coords.append( coord ) + sm_open_file.seek(0) return coords - diff --git a/onestop-python-client/tests/extractor/CsbExtractorTest.py b/onestop-python-client/tests/extractor/CsbExtractorTest.py index 7dbbc9e..72bdbcc 100644 --- a/onestop-python-client/tests/extractor/CsbExtractorTest.py +++ b/onestop-python-client/tests/extractor/CsbExtractorTest.py @@ -1,35 +1,53 @@ import unittest +import os + +from moto import mock_s3 from onestop.extract.CsbExtractor import CsbExtractor from onestop.util.S3Utils import S3Utils -from tests.utils import abspath_from_relative - class CsbExtractorTest(unittest.TestCase): - # def setUp(self): - # print("Set it up!") - # file_name = '../data/file4.csv' - # self.csb_extractor = CsbExtractor(file_name) - def setUp(self): print("Set it up!") - key = "public/NESDIS/CSB/file4.csv" - self.su = S3Utils( abspath_from_relative( __file__, "../../config/aws-util-config-dev.yml" ), - abspath_from_relative(__file__, "../../config/credentials.yml") ) - self.csb_extractor = CsbExtractor(self.su, key) + self.root_proj_path = os.getcwd() + self.assertIsNotNone(self.root_proj_path) + self.key = "tests/data/file4.csv" + # Use open instead of our methodfor simplicity and reliability, plus not testing our code here. + self.file_obj = open(self.root_proj_path + '/' + self.key) + + config_dict = { + "access_key": "test_access_key", + "secret_key": "test_secret_key", + "log_level": "DEBUG" + } + + self.s3_utils = S3Utils(**config_dict) + self.bucket = "bucket" + self.region = "region" def tearDown(self): print("Tear it down!") + self.file_obj.close() def test_is_csv(self): - csv_str = '.csv' - self.assertTrue(self.csb_extractor.is_csv(self.csb_extractor.file_name)) + self.assertTrue(CsbExtractor.is_csv("test/blah/file.csv"), "Failed to determine a csv file name was a csv file.") + def test_is_not_csv(self): + self.assertFalse(CsbExtractor.is_csv("test/blah/file.txt"), "Failed to determine a csv file name was not a csv file.") - def test_get_geospatial_temporal_bounds(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') + @mock_s3 + def test_csb_SME_user_path(self): + # Setup bucket and file to read + s3 = self.s3_utils.connect('s3', self.region) + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) + self.s3_utils.upload_s3(s3, self.root_proj_path + '/' + self.key, self.bucket, self.key, True) + self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key)) + + # This is how we would expect an external user to get the file. + sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect("session", None), self.bucket, self.key) + + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME') coords = bounds_dict["geospatial"] - print(str(coords)) self.assertEqual(coords[0], -96.847995) self.assertEqual(coords[1], 29.373065) self.assertEqual(coords[2], -92.747995) @@ -39,38 +57,43 @@ def test_get_geospatial_temporal_bounds(self): self.assertEqual(date_rng[0], '2018-04-10T14:00:06.000Z' ) self.assertEqual(date_rng[1], '2020-04-10T14:00:06.000Z' ) + def test_get_geospatial_temporal_bounds(self): + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + + coords = bounds_dict["geospatial"] + self.assertEqual(coords[0], -96.847995) + self.assertEqual(coords[1], 29.373065) + self.assertEqual(coords[2], -92.747995) + self.assertEqual(coords[3], 33.373065) + + date_rng = bounds_dict["temporal"] + self.assertEqual(date_rng[0], '2018-04-10T14:00:06.000Z' ) + self.assertEqual(date_rng[1], '2020-04-10T14:00:06.000Z' ) def test_get_min_lon(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + coords = bounds_dict["geospatial"] min_lon = coords[0] self.assertEqual(min_lon, -96.847995) - def test_get_max_datetime(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') + + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + date_rng = bounds_dict["temporal"] end_date = date_rng[1] self.assertEqual(end_date, '2020-04-10T14:00:06.000Z') - def test_get_min_datetime(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + date_rng = bounds_dict["temporal"] begin_date = date_rng[0] self.assertEqual(begin_date, '2018-04-10T14:00:06.000Z') - def test_extract_coords(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') - coords = bounds_dict["geospatial"] - - min_lon = coords[0] - min_lat = coords[1] - max_lon = coords[2] - max_lat = coords[3] - - coords = self.csb_extractor.extract_coords(max_lon, max_lat, min_lon, min_lat) + coords = CsbExtractor.extract_coords(self.file_obj, -92.747995, 33.373065, -96.847995, 29.373065) result = [[ -94.847995, 29.373065 From 15cfaa33a1e1a3fe3b0e8360de01f5483f5bec7d Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 19 Apr 2021 13:28:40 -0600 Subject: [PATCH 005/100] 1500-Added unit tests for WebPublisher. Made sure using autospec=True --- onestop-python-client/tests/test_WebPublisher_unit.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/onestop-python-client/tests/test_WebPublisher_unit.py b/onestop-python-client/tests/test_WebPublisher_unit.py index 3e987fb..4a97f80 100644 --- a/onestop-python-client/tests/test_WebPublisher_unit.py +++ b/onestop-python-client/tests/test_WebPublisher_unit.py @@ -84,7 +84,7 @@ def json(self): return MockResponse({"key1":"value1"}, 200) @mock_s3 - @mock.patch('requests.post', side_effect=mocked_requests_patch) + @mock.patch('requests.post', side_effect=mocked_requests_patch, autospec=True) def test_publish(self, mock_get): payload = json.dumps(self.payloadDict) self.wp.publish_registry("granule", self.uuid, payload, "POST") @@ -96,7 +96,7 @@ def test_publish(self, mock_get): mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) @mock_s3 - @mock.patch('requests.put', side_effect=mocked_requests_patch) + @mock.patch('requests.put', side_effect=mocked_requests_patch, autospec=True) def test_publish(self, mock_get): payload = json.dumps(self.payloadDict) self.wp.publish_registry("granule", self.uuid, payload, "PUT") @@ -108,7 +108,7 @@ def test_publish(self, mock_get): mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) @mock_s3 - @mock.patch('requests.patch', side_effect=mocked_requests_patch) + @mock.patch('requests.patch', side_effect=mocked_requests_patch, autospec=True) def test_add_glacier_location(self, mock_get): payload = json.dumps(self.addlocDict) self.wp.publish_registry("granule", self.uuid, payload, "PATCH") @@ -120,7 +120,7 @@ def test_add_glacier_location(self, mock_get): mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) @mock_s3 - @mock.patch('requests.delete', side_effect=mocked_requests_patch) + @mock.patch('requests.delete', side_effect=mocked_requests_patch, autospec=True) def test_delete_registry_granule(self, mock_get): self.wp.delete_registry("granule", self.uuid) @@ -131,7 +131,7 @@ def test_delete_registry_granule(self, mock_get): mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) @mock_s3 - @mock.patch('requests.delete', side_effect=mocked_requests_patch) + @mock.patch('requests.delete', side_effect=mocked_requests_patch, autospec=True) def test_delete_registry_collection(self, mock_get): self.wp.delete_registry("collection", self.uuid) From 1e629ab31e343e52c3fc93f839b26dc566e710a0 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 10:08:53 -0600 Subject: [PATCH 006/100] 1500-Changed KafkaConsumer class constructors(adjusted documentation) to take dictionary with extra parameters allowed as well as methods within this class not to reference config but the variable that was set. Adjusted effected tests. Removed get_logger method as it wasn't used and we used a different logger then. Added checks for if security wasn't enabled. --- .../onestop/KafkaConsumer.py | 182 ++++++------ .../tests/KafkaConsumerTest.py | 264 ++++++++++++++++++ scripts/sme/smeFunc.py | 2 +- 3 files changed, 349 insertions(+), 99 deletions(-) create mode 100644 onestop-python-client/tests/KafkaConsumerTest.py diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index e45d6cc..a3d1e95 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -1,11 +1,9 @@ -import logging -import yaml - from confluent_kafka.schema_registry import SchemaRegistryClient from confluent_kafka.error import KafkaError from confluent_kafka import DeserializingConsumer from confluent_kafka.schema_registry.avro import AvroDeserializer from confluent_kafka.serialization import StringDeserializer +from onestop.util.ClientLogger import ClientLogger class KafkaConsumer: """ @@ -13,109 +11,97 @@ class KafkaConsumer: Attributes ---------- - conf: yaml file - kafka-publisher-config-dev.yml - logger: Logger object - utilizes python logger library and creates logging for our specific needs - logger.info: Logger object - logging statement that occurs when the class is instantiated - metadata_type: str - type of metadata (COLLECTION or GRANULE) - brokers: str - brokers (kubernetes service) - group_id: str - Client group id string. All clients sharing the same group.id belong to the same group - auto_offset_reset: str - Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) - schema_registry: str - schema registry (kubernetes service) - security: boolean - defines if security is in place - collection_topic: str - collection topic you want to consume - granule_topic: str - granule topic you want to consume + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + group_id: str + Client group id string. All clients sharing the same group.id belong to the same group + auto_offset_reset: str + Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) + schema_registry: str + schema registry (kubernetes service) + security_enabled: boolean + Whether to use security for the kafka schema registry client. + security_caLoc: str + Kafka schema registry certification authority (CA) file location. + security_keyLoc: str + Kafka schema registry client's private key file location. + security_certLoc: str + Kafka schema registry client's public key file location. + collection_topic_consume: str + collection topic you want to consume + granule_topic_consume: str + granule topic you want to consume + logger: Logger object + utilizes python logger library and creates logging for our specific needs Methods ------- - get_logger(log_name, create_file) - creates logger file - - register_client() - registers to schema registry client based on configs + register_client() + registers to schema registry client based on configs - create_consumer(registry_client) - subscribes to topic defined in configs and creates a consumer to deserialize messages from topic + connect() + utilizes register_client() and create_consumer(registry_client) to connect to schema registry and allow for consumption of topics - connect() - utilizes register_client() and create_consumer(registry_client) to connect to schema registry and allow for consumption of topics + create_consumer(registry_client) + subscribes to topic defined in configs and creates a consumer to deserialize messages from topic - consume(metadata_consumer, handler) - asynchronously polls for messages in the connected topic, results vary depending on the handler function that is passed into it + consume(metadata_consumer, handler) + asynchronously polls for messages in the connected topic, results vary depending on the handler function that is passed into it """ - conf = None - - def __init__(self, conf_loc): - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = self.get_logger(self.__class__.__name__, False) - self.logger.info("Initializing " + self.__class__.__name__) - self.metadata_type = self.conf['metadata_type'] - self.brokers = self.conf['brokers'] - self.group_id = self.conf['group_id'] - self.auto_offset_reset = self.conf['auto_offset_reset'] - self.schema_registry = self.conf['schema_registry'] - self.security = self.conf['security']['enabled'] - - self.collection_topic = self.conf['collection_topic_consume'] - self.granule_topic = self.conf['granule_topic_consume'] - if self.metadata_type not in ['COLLECTION', 'GRANULE']: - raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - - def get_logger(self, log_name, create_file): + def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_registry, security, collection_topic_consume, granule_topic_consume, log_level = 'INFO', **wildargs): """ - Utilizes python logger library and creates logging - - :param log_name: str - name of log to be created - :param create_file: boolean - defines whether of not you want a logger file to be created - - :return: Logger object + Attributes + ---------- + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + group_id: str + Client group id string. All clients sharing the same group.id belong to the same group + auto_offset_reset: str + Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) + schema_registry: str + schema registry (kubernetes service) URL + security: dict + enabled boolean: Whether to use security for kafka schema registry client. + caLoc str: Kafka schema registry certification authority (CA) file location. + keyLoc str: Kafka schema registry client's private key file location. + certLoc str: Kafka schema registry client's public key file location. + + collection_topic_consume: str + collection topic you want to consume + granule_topic_consume: str + granule topic you want to consume + log_level: str + What log level to use for this class """ - # create logger - log = logging.getLogger() - - # create formatter and add it to the handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + self.metadata_type = metadata_type + self.brokers = brokers + self.group_id = group_id + self.auto_offset_reset = auto_offset_reset + self.schema_registry = schema_registry + self.security_enabled = security['enabled'] - if self.conf['log_level'] == "DEBUG": - log.setLevel(level=logging.DEBUG) - else: - if self.conf['log_level'] == "INFO": - log.setLevel(level=logging.INFO) - else: - log.setLevel(level=logging.ERROR) + if self.security_enabled: + self.security_caLoc = security['caLoc'] + self.security_keyLoc = security['keyLoc'] + self.security_certLoc = security['certLoc'] - fh = None - if create_file: - # create file handler for logger. - fh = logging.FileHandler(log_name) - fh.setFormatter(formatter) + self.collection_topic_consume = collection_topic_consume + self.granule_topic_consume = granule_topic_consume - # create console handler for logger. - ch = logging.StreamHandler() - ch.setFormatter(formatter) + if self.metadata_type not in ['COLLECTION', 'GRANULE']: + raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - # add handlers to logger. - if create_file: - log.addHandler(fh) + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) + self.logger.info("Initializing " + self.__class__.__name__) - log.addHandler(ch) - return log + if wildargs: + self.logger.error("There were extra constructor arguments: " + str(wildargs)) def register_client(self): """ @@ -125,10 +111,10 @@ def register_client(self): """ reg_conf = {'url': self.schema_registry} - if self.security: - reg_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - reg_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - reg_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + if self.security_enabled: + reg_conf['ssl.ca.location'] = self.security_caLoc + reg_conf['ssl.key.location'] = self.security_keyLoc + reg_conf['ssl.certificate.location'] = self.security_certLoc registry_client = SchemaRegistryClient(reg_conf) return registry_client @@ -166,11 +152,11 @@ def create_consumer(self, registry_client): consumer_conf = {'bootstrap.servers': self.brokers} - if self.security: + if self.security_enabled: consumer_conf['security.protocol'] = 'SSL' - consumer_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - consumer_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - consumer_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + consumer_conf['ssl.ca.location'] = self.security_caLoc + consumer_conf['ssl.key.location'] = self.security_keyLoc + consumer_conf['ssl.certificate.location'] = self.security_certLoc meta_consumer_conf = consumer_conf meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8') diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/tests/KafkaConsumerTest.py new file mode 100644 index 0000000..e7c3f08 --- /dev/null +++ b/onestop-python-client/tests/KafkaConsumerTest.py @@ -0,0 +1,264 @@ +import unittest + +from unittest.mock import ANY, patch, MagicMock, call +from onestop.KafkaConsumer import KafkaConsumer +from confluent_kafka.schema_registry import SchemaRegistryClient +from confluent_kafka.serialization import StringDeserializer + +class KafkaConsumerTest(unittest.TestCase): + kp = None + conf_w_security = None + conf_wo_security = None + + @classmethod + def setUp(cls): + print("Set it up!") + cls.conf_w_security = { + "metadata_type" : "GRANULE", + "brokers" : "onestop-dev-cp-kafka:9092", + "group_id" : "sme-test", + "auto_offset_reset" : "earliest", + "schema_registry" : "http://onestop-dev-cp-schema-registry:8081", + "security" : { + "enabled" : True, + "caLoc" : "/etc/pki/tls/cert.pem", + "keyLoc" : "/etc/pki/tls/private/kafka-user.key", + "certLoc" : "/etc/pki/tls/certs/kafka-user.crt" + }, + "collection_topic_consume" : "psi-collection-input-unknown", + "granule_topic_consume" : "psi-granule-input-unknown", + "log_level" : "DEBUG" + } + cls.conf_wo_security = dict(cls.conf_w_security) + # Remove security credential section. + cls.conf_wo_security['security'] = { + "enabled":False + } + + @classmethod + def tearDown(self): + print("Tear it down!") + + def test_init_happy_nonconditional_params(self): + consumer = KafkaConsumer(**self.conf_w_security) + + self.assertEqual(consumer.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(consumer.brokers, self.conf_w_security['brokers']) + self.assertEqual(consumer.group_id, self.conf_w_security['group_id']) + self.assertEqual(consumer.auto_offset_reset, self.conf_w_security['auto_offset_reset']) + self.assertEqual(consumer.schema_registry, self.conf_w_security['schema_registry']) + self.assertEqual(consumer.security_enabled, self.conf_w_security['security']['enabled']) + self.assertEqual(consumer.collection_topic_consume, self.conf_w_security['collection_topic_consume']) + self.assertEqual(consumer.granule_topic_consume, self.conf_w_security['granule_topic_consume']) + + def test_init_security_enabled(self): + consumer = KafkaConsumer(**self.conf_w_security) + + self.assertEqual(consumer.security_caLoc, self.conf_w_security['security']['caLoc']) + self.assertEqual(consumer.security_keyLoc, self.conf_w_security['security']['keyLoc']) + self.assertEqual(consumer.security_certLoc, self.conf_w_security['security']['certLoc']) + + def test_init_security_disabled(self): + consumer = KafkaConsumer(**self.conf_wo_security) + + self.assertRaises(AttributeError, getattr, consumer, "security_caLoc") + self.assertRaises(AttributeError, getattr, consumer, "security_keyLoc") + self.assertRaises(AttributeError, getattr, consumer, "security_certLoc") + + def test_init_metadata_type_valid(self): + consumer = KafkaConsumer(**self.conf_w_security) + + self.assertEqual(consumer.metadata_type, self.conf_w_security['metadata_type']) + + def test_init_metadata_type_invalid(self): + wrong_metadata_type_config = dict(self.conf_w_security) + wrong_metadata_type_config['metadata_type'] = "invalid_type" + + self.assertRaises(ValueError, KafkaConsumer, **wrong_metadata_type_config) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_w_security(self, mock_client): + schema_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] + } + mock_client.return_value = None + + consumer = KafkaConsumer(**self.conf_w_security) + consumer.register_client() + + mock_client.assert_called() + mock_client.assert_called_with(ANY, schema_conf) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_wo_security(self, mock_client): + schema_conf = { + 'url' : self.conf_wo_security['schema_registry'] + } + mock_client.return_value = None + + consumer = KafkaConsumer(**self.conf_wo_security) + consumer.register_client() + + mock_client.assert_called_with(ANY, schema_conf) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_w_security_collection = dict(self.conf_w_security) + conf_w_security_collection['metadata_type'] = "COLLECTION" + + consumer = KafkaConsumer(**conf_w_security_collection) + reg_client = consumer.register_client() + reg_client.get_latest_version = MagicMock() + consumer.create_consumer(reg_client) + + # Verify AvroDeserializer called with expected registry client + mock_avro_deserializer.assert_called_with(ANY, reg_client) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_collection_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_w_security_collection = dict(self.conf_w_security) + topic = conf_w_security_collection['collection_topic_consume'] + conf_w_security_collection['metadata_type'] = 'COLLECTION' + + consumer = KafkaConsumer(**conf_w_security_collection) + reg_client = MagicMock() + consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into DeserializingConsumer + mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_w_security_collection['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], + 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_collection['group_id'], + 'auto.offset.reset': conf_w_security_collection['auto_offset_reset'] + }) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_collection_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_wo_security_collection = dict(self.conf_wo_security) + topic = conf_wo_security_collection['collection_topic_consume'] + conf_wo_security_collection['metadata_type'] = 'COLLECTION' + + consumer = KafkaConsumer(**conf_wo_security_collection) + reg_client = MagicMock() + consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify no security passed into DeserializingConsumer + mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_wo_security_collection['brokers'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_wo_security_collection['group_id'], + 'auto.offset.reset': conf_wo_security_collection['auto_offset_reset'] + }) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_w_security_granule = dict(self.conf_w_security) + topic = conf_w_security_granule['granule_topic_consume'] + conf_w_security_granule['metadata_type'] = 'GRANULE' + + consumer = KafkaConsumer(**conf_w_security_granule) + reg_client = MagicMock() + consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into DeserializingConsumer + mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_w_security_granule['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], + 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_granule['group_id'], + 'auto.offset.reset': conf_w_security_granule['auto_offset_reset'] + }) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_wo_security_granule = dict(self.conf_wo_security) + topic = conf_wo_security_granule['granule_topic_consume'] + conf_wo_security_granule['metadata_type'] = 'GRANULE' + + # Verify security taken into consideration + meta_consumer_conf = {'bootstrap.servers': conf_wo_security_granule['brokers'], + 'key.deserializer': StringDeserializer('utf-8'), + 'value.deserializer': mock_avro_deserializer, + 'group.id': conf_wo_security_granule['group_id'], + 'auto.offset.reset': conf_wo_security_granule['auto_offset_reset'] + } + + consumer = KafkaConsumer(**conf_wo_security_granule) + reg_client = MagicMock() + consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify no security passed into DeserializingConsumer called with expected configuration + meta_consumer_conf['key.deserializer'] = ANY + meta_consumer_conf['value.deserializer'] = ANY + mock_deserializing_consumer.assert_called_with(meta_consumer_conf) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + def test_connect(self): + mock_client = MagicMock() + + consumer = KafkaConsumer(**self.conf_w_security) + consumer.register_client = MagicMock(return_value=mock_client) + consumer.create_consumer = MagicMock(return_value=MagicMock(mock_client)) + consumer.connect() + + consumer.register_client.assert_called_once() + consumer.create_consumer.assert_called_with(mock_client) + + @patch('confluent_kafka.cimpl.Message') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_consume(self, mock_metadata_consumer, mock_message): + mock_message_key = 'key1' + mock_message_value = 'value1' + consumer = KafkaConsumer(**self.conf_w_security) + consumer.register_client = MagicMock(return_value=MagicMock()) + mock_message.key.return_value = mock_message_key + mock_message.value.return_value = mock_message_value + mock_metadata_consumer.poll.side_effect = [None, mock_message, Exception] + mock_handler = MagicMock() + + # Would have liked not having the try/catch but it wasn't ignoring the exception. Just need to not fail due to end of loop. + try: + self.assertRaises(Exception, consumer.consume(mock_metadata_consumer, mock_handler)) + except Exception as e: + print("Ignoring exception: {}".format(e)) + + # Verify kafka consumer poll called expected number of times + self.assertTrue(mock_metadata_consumer.poll.call_count == 3) + mock_metadata_consumer.poll.assert_has_calls([call(10), call(10), call(10)]) + + # Verify callback function was called once with expected message attributes + mock_handler.assert_called_once() + mock_handler.assert_called_with(mock_message_key, mock_message_value) + + if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/scripts/sme/smeFunc.py b/scripts/sme/smeFunc.py index 2e11d51..084e15b 100644 --- a/scripts/sme/smeFunc.py +++ b/scripts/sme/smeFunc.py @@ -27,7 +27,7 @@ def handler(key,value): if __name__ == '__main__': kafka_consumer = KafkaConsumer("scripts/config/kafka-publisher-config-dev.yml") - kafka_consumer.granule_topic = 'psi-granule-parsed' + kafka_consumer.granule_topic_consume = 'psi-granule-parsed' metadata_consumer = kafka_consumer.connect() kafka_consumer.consume(metadata_consumer, lambda k, v: handler(k, v)) """ From b63bfe868d48e3e574e3a5f0b75b1d6dff5897ef Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 10:11:07 -0600 Subject: [PATCH 007/100] 1500-Adjusted KafkaConsumer create_consumer to not do duplicate code, using a topic variable instead. Removed changing name of the consumer_conf to meta_consumer_conf. Added additional logging. --- .../onestop/KafkaConsumer.py | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index a3d1e95..54744cb 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -108,7 +108,7 @@ def register_client(self): Registers to schema registry client based on configs :return: SchemaRegistryClient (confluent kafka library) - """ + """ reg_conf = {'url': self.schema_registry} if self.security_enabled: @@ -116,6 +116,7 @@ def register_client(self): reg_conf['ssl.key.location'] = self.security_keyLoc reg_conf['ssl.certificate.location'] = self.security_certLoc + self.logger.info("Creating SchemaRegistryClient with configuration:"+str(reg_conf)) registry_client = SchemaRegistryClient(reg_conf) return registry_client @@ -138,18 +139,21 @@ def create_consumer(self, registry_client): :return: DeserializingConsumer object """ - metadata_schema = None topic = None if self.metadata_type == "COLLECTION": - metadata_schema = registry_client.get_latest_version(self.collection_topic + '-value').schema.schema_str - topic = self.collection_topic + topic = self.collection_topic_consume if self.metadata_type == "GRANULE": - metadata_schema = registry_client.get_latest_version(self.granule_topic + '-value').schema.schema_str - topic = self.granule_topic + topic = self.granule_topic_consume - metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) + self.logger.debug("topic: "+str(topic)) + + # This topic naming scheme is how OneStop creates the topics. + latest_schema = registry_client.get_latest_version(topic + '-value') + metadata_schema = latest_schema.schema.schema_str + self.logger.debug("metadata_schema: "+metadata_schema) + metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) consumer_conf = {'bootstrap.servers': self.brokers} if self.security_enabled: @@ -158,13 +162,14 @@ def create_consumer(self, registry_client): consumer_conf['ssl.key.location'] = self.security_keyLoc consumer_conf['ssl.certificate.location'] = self.security_certLoc - meta_consumer_conf = consumer_conf - meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8') - meta_consumer_conf['value.deserializer'] = metadata_deserializer - meta_consumer_conf['group.id'] = self.group_id - meta_consumer_conf['auto.offset.reset'] = self.auto_offset_reset + consumer_conf['key.deserializer'] = StringDeserializer('utf-8') + consumer_conf['value.deserializer'] = metadata_deserializer + consumer_conf['group.id'] = self.group_id + consumer_conf['auto.offset.reset'] = self.auto_offset_reset - metadata_consumer = DeserializingConsumer(meta_consumer_conf) + self.logger.debug("meta_consumer_conf: "+str(consumer_conf)) + metadata_consumer = DeserializingConsumer(consumer_conf) + self.logger.debug("topic: "+str(topic)) metadata_consumer.subscribe([topic]) return metadata_consumer @@ -183,15 +188,16 @@ def consume(self, metadata_consumer, handler): while True: try: msg = metadata_consumer.poll(10) + self.logger.debug("Message received: "+str(msg)) if msg is None: - print('No Messages') + self.logger.info('No Messages') continue + self.logger.debug("Message key="+str(msg.key())+" value="+str(msg.value())) key = msg.key() value = msg.value() - except KafkaError: raise try: @@ -199,4 +205,5 @@ def consume(self, metadata_consumer, handler): except Exception as e: self.logger.error("Message handler failed: {}".format(e)) break + self.logger.debug("Closing metadata_consumer") metadata_consumer.close() From 74e7f6f5c03c5943d6b1cdd98dc04bdcc68d9fc9 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 10:19:42 -0600 Subject: [PATCH 008/100] 1500-Added __init__.py to tests directory so was discoverable/module. --- onestop-python-client/tests/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 onestop-python-client/tests/__init__.py diff --git a/onestop-python-client/tests/__init__.py b/onestop-python-client/tests/__init__.py new file mode 100644 index 0000000..e69de29 From a07e642ee13f34b6baa5d3679292a5403cdfed8c Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 12:47:34 -0600 Subject: [PATCH 009/100] 1500-in KafkaConsumer renamed variables so tad more generic. Makes it clearer how similar code is to KafkaPublisher class. --- .../onestop/KafkaConsumer.py | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 54744cb..18c489e 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -91,8 +91,8 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r self.security_keyLoc = security['keyLoc'] self.security_certLoc = security['certLoc'] - self.collection_topic_consume = collection_topic_consume - self.granule_topic_consume = granule_topic_consume + self.collection_topic = collection_topic_consume + self.granule_topic = granule_topic_consume if self.metadata_type not in ['COLLECTION', 'GRANULE']: raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") @@ -109,15 +109,15 @@ def register_client(self): :return: SchemaRegistryClient (confluent kafka library) """ - reg_conf = {'url': self.schema_registry} + conf = {'url': self.schema_registry} if self.security_enabled: - reg_conf['ssl.ca.location'] = self.security_caLoc - reg_conf['ssl.key.location'] = self.security_keyLoc - reg_conf['ssl.certificate.location'] = self.security_certLoc + conf['ssl.ca.location'] = self.security_caLoc + conf['ssl.key.location'] = self.security_keyLoc + conf['ssl.certificate.location'] = self.security_certLoc - self.logger.info("Creating SchemaRegistryClient with configuration:"+str(reg_conf)) - registry_client = SchemaRegistryClient(reg_conf) + self.logger.info("Creating SchemaRegistryClient with configuration:"+str(conf)) + registry_client = SchemaRegistryClient(conf) return registry_client def connect(self): @@ -141,10 +141,10 @@ def create_consumer(self, registry_client): """ topic = None if self.metadata_type == "COLLECTION": - topic = self.collection_topic_consume + topic = self.collection_topic if self.metadata_type == "GRANULE": - topic = self.granule_topic_consume + topic = self.granule_topic self.logger.debug("topic: "+str(topic)) @@ -154,21 +154,21 @@ def create_consumer(self, registry_client): metadata_schema = latest_schema.schema.schema_str self.logger.debug("metadata_schema: "+metadata_schema) metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) - consumer_conf = {'bootstrap.servers': self.brokers} + conf = {'bootstrap.servers': self.brokers} if self.security_enabled: - consumer_conf['security.protocol'] = 'SSL' - consumer_conf['ssl.ca.location'] = self.security_caLoc - consumer_conf['ssl.key.location'] = self.security_keyLoc - consumer_conf['ssl.certificate.location'] = self.security_certLoc - - consumer_conf['key.deserializer'] = StringDeserializer('utf-8') - consumer_conf['value.deserializer'] = metadata_deserializer - consumer_conf['group.id'] = self.group_id - consumer_conf['auto.offset.reset'] = self.auto_offset_reset - - self.logger.debug("meta_consumer_conf: "+str(consumer_conf)) - metadata_consumer = DeserializingConsumer(consumer_conf) + conf['security.protocol'] = 'SSL' + conf['ssl.ca.location'] = self.security_caLoc + conf['ssl.key.location'] = self.security_keyLoc + conf['ssl.certificate.location'] = self.security_certLoc + + conf['key.deserializer'] = StringDeserializer('utf-8') + conf['value.deserializer'] = metadata_deserializer + conf['group.id'] = self.group_id + conf['auto.offset.reset'] = self.auto_offset_reset + + self.logger.debug("conf: "+str(conf)) + metadata_consumer = DeserializingConsumer(conf) self.logger.debug("topic: "+str(topic)) metadata_consumer.subscribe([topic]) return metadata_consumer From da934c40324310b255dfee857ef356b21cfd7fe8 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 12:59:07 -0600 Subject: [PATCH 010/100] 1500-Fixed KafkaConsumerTest (thought intellij refactor of var name would notice this usage). --- onestop-python-client/tests/KafkaConsumerTest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/tests/KafkaConsumerTest.py index e7c3f08..776e21a 100644 --- a/onestop-python-client/tests/KafkaConsumerTest.py +++ b/onestop-python-client/tests/KafkaConsumerTest.py @@ -48,8 +48,8 @@ def test_init_happy_nonconditional_params(self): self.assertEqual(consumer.auto_offset_reset, self.conf_w_security['auto_offset_reset']) self.assertEqual(consumer.schema_registry, self.conf_w_security['schema_registry']) self.assertEqual(consumer.security_enabled, self.conf_w_security['security']['enabled']) - self.assertEqual(consumer.collection_topic_consume, self.conf_w_security['collection_topic_consume']) - self.assertEqual(consumer.granule_topic_consume, self.conf_w_security['granule_topic_consume']) + self.assertEqual(consumer.collection_topic, self.conf_w_security['collection_topic_consume']) + self.assertEqual(consumer.granule_topic, self.conf_w_security['granule_topic_consume']) def test_init_security_enabled(self): consumer = KafkaConsumer(**self.conf_w_security) From 92e12c9dc1253ecd0acb3434936c0d25f2276b72 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 12:59:42 -0600 Subject: [PATCH 011/100] 1500-KafkaConsumer consolidated config for deserializer. --- onestop-python-client/onestop/KafkaConsumer.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 18c489e..76078cc 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -154,7 +154,13 @@ def create_consumer(self, registry_client): metadata_schema = latest_schema.schema.schema_str self.logger.debug("metadata_schema: "+metadata_schema) metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) - conf = {'bootstrap.servers': self.brokers} + conf = { + 'bootstrap.servers': self.brokers, + 'key.deserializer': StringDeserializer('utf-8'), + 'value.deserializer': metadata_deserializer, + 'group.id': self.group_id, + 'auto.offset.reset': self.auto_offset_reset + } if self.security_enabled: conf['security.protocol'] = 'SSL' @@ -162,11 +168,6 @@ def create_consumer(self, registry_client): conf['ssl.key.location'] = self.security_keyLoc conf['ssl.certificate.location'] = self.security_certLoc - conf['key.deserializer'] = StringDeserializer('utf-8') - conf['value.deserializer'] = metadata_deserializer - conf['group.id'] = self.group_id - conf['auto.offset.reset'] = self.auto_offset_reset - self.logger.debug("conf: "+str(conf)) metadata_consumer = DeserializingConsumer(conf) self.logger.debug("topic: "+str(topic)) From ba1740838c3ef9c6c488374228b37eb45155bf30 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 17:50:02 -0600 Subject: [PATCH 012/100] 1500-Changed KafkaPublisher class constructors(adjusted documentation) to take dictionary with extra parameters allowed as well as methods within this class not to reference config but the variable that was set. Removed logging import and changed to ClientLogger. --- .../onestop/KafkaPublisher.py | 182 ++++++++---------- 1 file changed, 82 insertions(+), 100 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index d357de8..125174b 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -1,13 +1,11 @@ -import logging -from uuid import UUID import json -import yaml +from uuid import UUID from confluent_kafka.schema_registry import SchemaRegistryClient from confluent_kafka.error import KafkaError from confluent_kafka import SerializingProducer from confluent_kafka.schema_registry.avro import AvroSerializer - +from onestop.util.ClientLogger import ClientLogger class KafkaPublisher: """ @@ -15,114 +13,98 @@ class KafkaPublisher: Attributes ---------- - conf: yaml file - config/kafka-publisher-config-dev.yml - logger: Logger object - utilizes python logger library and creates logging for our specific needs - logger.info: Logger object - logging statement that occurs when the class is instantiated - metadata_type: str - type of metadata (COLLECTION or GRANULE) - brokers: str - brokers (kubernetes service) - schema_registry: str - schema registry (kubernetes service) - security: boolean - defines if security is in place - collection_topic: str - collection topic you want to consume - granule_topic: str - granule topic you want to consume + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + schema_registry: str + schema registry (kubernetes service) + security_enabled: boolean + defines if security is in place + security_caLoc: str + Kafka schema registry certification authority (CA) file location. + security_keyLoc: str + Kafka schema registry client's private key file location. + security_certLoc: str + Kafka schema registry client's public key file location. + collection_topic: str + collection topic you want to produce to + granule_topic: str + granule topic you want to produce to + logger: Logger object + utilizes python logger library and creates logging for our specific needs Methods ------- - get_logger(log_name, create_file) - creates logger file + register_client() + registers to schema registry client based on configs - register_client() - registers to schema registry client based on configs + create_producer(registry_client) + creates a SerializingProducer object to produce to kafka topic - create_producer(registry_client) - creates a SerializingProducer object to produce to kafka topic + connect() + utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics - connect() - utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics + publish_collection(collection_producer, collection_uuid, content_dict, method) + Publish collection to collection topic - publish_collection(collection_producer, collection_uuid, content_dict, method) - Publish collection to collection topic - - publish_granule(granule_producer, record_uuid, collection_uuid, content_dict) - Publish granule to granule topic + publish_granule(granule_producer, record_uuid, collection_uuid, content_dict) + Publish granule to granule topic """ - conf = None - - def __init__(self, conf_loc): - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = self.get_logger(self.__class__.__name__, False) - self.logger.info("Initializing " + self.__class__.__name__) - self.metadata_type = self.conf['metadata_type'] - self.brokers = self.conf['brokers'] - self.schema_registry = self.conf['schema_registry'] - self.security = self.conf['security']['enabled'] - - self.collection_topic = self.conf['collection_topic_produce'] - self.granule_topic = self.conf['granule_topic_produce'] - - if self.metadata_type not in ['COLLECTION', 'GRANULE']: - raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - - def get_logger(self, log_name, create_file): + def __init__(self, metadata_type, brokers, schema_registry, security, collection_topic_publish, granule_topic_publish, log_level='INFO', **wildargs): """ - Utilizes python logger library and creates logging - - :param log_name: str - name of log to be created - :param create_file: boolean - defines whether of not you want a logger file to be created - - :return: Logger object + Attributes + ---------- + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + group_id: str + Client group id string. All clients sharing the same group.id belong to the same group + auto_offset_reset: str + Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) + schema_registry: str + schema registry (kubernetes service) URL + security: dict + enabled boolean: Whether to use security for kafka schema registry client. + caLoc str: Kafka schema registry certification authority (CA) file location. + keyLoc str: Kafka schema registry client's private key file location. + certLoc str: Kafka schema registry client's public key file location. + + collection_topic: str + collection topic you want to produce to + granule_topic: str + granule topic you want to produce to """ + self.metadata_type = metadata_type + self.brokers = brokers + self.schema_registry = schema_registry + self.security_enabled = security['enabled'] - # create logger - log = logging.getLogger() - - # create formatter and add it to the handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + if self.security_enabled: + self.security_caLoc = security['caLoc'] + self.security_keyLoc = security['keyLoc'] + self.security_certLoc = security['certLoc'] - if self.conf['log_level'] == "DEBUG": - log.setLevel(level=logging.DEBUG) - else: - if self.conf['log_level'] == "INFO": - log.setLevel(level=logging.INFO) - else: - log.setLevel(level=logging.ERROR) - - fh = None - if create_file: - # create file handler for logger. - fh = logging.FileHandler(log_name) - fh.setFormatter(formatter) + self.collection_topic = collection_topic_publish + self.granule_topic = granule_topic_publish - # create console handler for logger. - ch = logging.StreamHandler() - ch.setFormatter(formatter) + if self.metadata_type not in ['COLLECTION', 'GRANULE']: + raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - # add handlers to logger. - if create_file: - log.addHandler(fh) + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) + self.logger.info("Initializing " + self.__class__.__name__) - log.addHandler(ch) - return log + if wildargs: + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def connect(self): """ Utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics :return: SerializingProducer Object - based on config values + based on initial constructor values """ registry_client = self.register_client() metadata_producer = self.create_producer(registry_client) @@ -137,10 +119,10 @@ def register_client(self): reg_conf = {'url': self.schema_registry} - if self.security: - reg_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - reg_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - reg_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + if self.security_enabled: + reg_conf['ssl.ca.location'] = self.security_caLoc + reg_conf['ssl.key.location'] = self.security_keyLoc + reg_conf['ssl.certificate.location'] = self.security_certLoc registry_client = SchemaRegistryClient(reg_conf) return registry_client @@ -153,7 +135,7 @@ def create_producer(self, registry_client): get this from register_client() :return: SerializingProducer Object - based on config values + based on initial constructor values """ metadata_schema = None @@ -166,11 +148,11 @@ def create_producer(self, registry_client): metadata_serializer = AvroSerializer(metadata_schema, registry_client) producer_conf = {'bootstrap.servers': self.brokers} - if self.security: + if self.security_enabled: producer_conf['security.protocol'] = 'SSL' - producer_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - producer_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - producer_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + producer_conf['ssl.ca.location'] = self.security_caLoc + producer_conf['ssl.key.location'] = self.security_keyLoc + producer_conf['ssl.certificate.location'] = self.security_certLoc meta_producer_conf = producer_conf meta_producer_conf['value.serializer'] = metadata_serializer @@ -180,7 +162,7 @@ def create_producer(self, registry_client): def delivery_report(self, err, msg): """ - Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). + Called once for each message produced to indicate delivery of message. Triggered by poll() or flush(). :param err: str err produced after publishing, if there is one From 392788aee0f8714b021d9a8fb96b89f2278a686b Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 18:06:35 -0600 Subject: [PATCH 013/100] 1500-Changed KafkaConsumerTest(s) to have vars named exp where it makes sense. Added test for extra arguments via constructor. Fixed test for testing less parameters passed in, because more could have made it through. --- .../tests/KafkaConsumerTest.py | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/tests/KafkaConsumerTest.py index 776e21a..d1e6195 100644 --- a/onestop-python-client/tests/KafkaConsumerTest.py +++ b/onestop-python-client/tests/KafkaConsumerTest.py @@ -76,9 +76,14 @@ def test_init_metadata_type_invalid(self): self.assertRaises(ValueError, KafkaConsumer, **wrong_metadata_type_config) + def test_init_extra_params(self): + conf = dict(self.conf_wo_security) + conf['junk_key'] = 'junk_value' + KafkaConsumer(**conf) + @patch.object(SchemaRegistryClient, '__init__', autospec=True) def test_register_client_w_security(self, mock_client): - schema_conf = { + exp_security_conf = { 'url':self.conf_w_security['schema_registry'], 'ssl.ca.location': self.conf_w_security['security']['caLoc'], 'ssl.key.location': self.conf_w_security['security']['keyLoc'], @@ -90,19 +95,25 @@ def test_register_client_w_security(self, mock_client): consumer.register_client() mock_client.assert_called() - mock_client.assert_called_with(ANY, schema_conf) + mock_client.assert_called_with(ANY, exp_security_conf) @patch.object(SchemaRegistryClient, '__init__', autospec=True) def test_register_client_wo_security(self, mock_client): - schema_conf = { - 'url' : self.conf_wo_security['schema_registry'] + exp_security_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] } mock_client.return_value = None consumer = KafkaConsumer(**self.conf_wo_security) consumer.register_client() - - mock_client.assert_called_with(ANY, schema_conf) + try: + mock_client.assert_called_with(ANY, exp_security_conf) + except: + return + raise AssertionError('Expected register_client() to not have been called with security arguments.') @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') @@ -199,7 +210,7 @@ def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, m @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): conf_wo_security_granule = dict(self.conf_wo_security) - topic = conf_wo_security_granule['granule_topic_consume'] + exp_topic = conf_wo_security_granule['granule_topic_consume'] conf_wo_security_granule['metadata_type'] = 'GRANULE' # Verify security taken into consideration @@ -215,13 +226,14 @@ def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information - reg_client.get_latest_version.assert_called_with(topic + '-value') + reg_client.get_latest_version.assert_called_with(exp_topic + '-value') # Verify no security passed into DeserializingConsumer called with expected configuration - meta_consumer_conf['key.deserializer'] = ANY - meta_consumer_conf['value.deserializer'] = ANY - mock_deserializing_consumer.assert_called_with(meta_consumer_conf) - mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + exp_arguments = dict(meta_consumer_conf) + exp_arguments['key.deserializer'] = ANY + exp_arguments['value.deserializer'] = ANY + mock_deserializing_consumer.assert_called_with(exp_arguments) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([exp_topic]) def test_connect(self): mock_client = MagicMock() @@ -260,5 +272,5 @@ def test_consume(self, mock_metadata_consumer, mock_message): mock_handler.assert_called_once() mock_handler.assert_called_with(mock_message_key, mock_message_value) - if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 3426472c26738ae8866329fecfad58335f440bac Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 09:56:56 -0600 Subject: [PATCH 014/100] 1500-KafkaConsumerTest improved the formatting of dicts, tested return values a little better where could, and changed a test of assertTrue to assertEqual so got to see what the actual value was when failed. --- .../tests/KafkaConsumerTest.py | 101 ++++++++++-------- 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/tests/KafkaConsumerTest.py index d1e6195..1246789 100644 --- a/onestop-python-client/tests/KafkaConsumerTest.py +++ b/onestop-python-client/tests/KafkaConsumerTest.py @@ -3,7 +3,6 @@ from unittest.mock import ANY, patch, MagicMock, call from onestop.KafkaConsumer import KafkaConsumer from confluent_kafka.schema_registry import SchemaRegistryClient -from confluent_kafka.serialization import StringDeserializer class KafkaConsumerTest(unittest.TestCase): kp = None @@ -124,11 +123,13 @@ def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consume consumer = KafkaConsumer(**conf_w_security_collection) reg_client = consumer.register_client() reg_client.get_latest_version = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify AvroDeserializer called with expected registry client mock_avro_deserializer.assert_called_with(ANY, reg_client) + self.assertIsNotNone(deser_consumer) + @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_collection_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): @@ -138,24 +139,28 @@ def test_create_consumer_collection_w_security(self, mock_deserializing_consumer consumer = KafkaConsumer(**conf_w_security_collection) reg_client = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information reg_client.get_latest_version.assert_called_with(topic + '-value') # Verify security passed into DeserializingConsumer - mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_w_security_collection['brokers'], - 'security.protocol': 'SSL', - 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], - 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], - 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], - 'key.deserializer': ANY, - 'value.deserializer': ANY, - 'group.id': conf_w_security_collection['group_id'], - 'auto.offset.reset': conf_w_security_collection['auto_offset_reset'] - }) + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_collection['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], + 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_collection['group_id'], + 'auto.offset.reset': conf_w_security_collection['auto_offset_reset'] + }) mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + self.assertIsNotNone(deser_consumer) + @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_collection_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): @@ -165,20 +170,24 @@ def test_create_consumer_collection_wo_security(self, mock_deserializing_consume consumer = KafkaConsumer(**conf_wo_security_collection) reg_client = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information reg_client.get_latest_version.assert_called_with(topic + '-value') # Verify no security passed into DeserializingConsumer - mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_wo_security_collection['brokers'], - 'key.deserializer': ANY, - 'value.deserializer': ANY, - 'group.id': conf_wo_security_collection['group_id'], - 'auto.offset.reset': conf_wo_security_collection['auto_offset_reset'] - }) + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_collection['brokers'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_wo_security_collection['group_id'], + 'auto.offset.reset': conf_wo_security_collection['auto_offset_reset'] + }) mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + self.assertIsNotNone(deser_consumer) + @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): @@ -188,24 +197,28 @@ def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, m consumer = KafkaConsumer(**conf_w_security_granule) reg_client = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information reg_client.get_latest_version.assert_called_with(topic + '-value') # Verify security passed into DeserializingConsumer - mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_w_security_granule['brokers'], - 'security.protocol': 'SSL', - 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], - 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], - 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], - 'key.deserializer': ANY, - 'value.deserializer': ANY, - 'group.id': conf_w_security_granule['group_id'], - 'auto.offset.reset': conf_w_security_granule['auto_offset_reset'] - }) + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_granule['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], + 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_granule['group_id'], + 'auto.offset.reset': conf_w_security_granule['auto_offset_reset'] + }) mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + self.assertIsNotNone(deser_consumer) + @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): @@ -213,28 +226,26 @@ def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, exp_topic = conf_wo_security_granule['granule_topic_consume'] conf_wo_security_granule['metadata_type'] = 'GRANULE' - # Verify security taken into consideration - meta_consumer_conf = {'bootstrap.servers': conf_wo_security_granule['brokers'], - 'key.deserializer': StringDeserializer('utf-8'), - 'value.deserializer': mock_avro_deserializer, - 'group.id': conf_wo_security_granule['group_id'], - 'auto.offset.reset': conf_wo_security_granule['auto_offset_reset'] - } - consumer = KafkaConsumer(**conf_wo_security_granule) reg_client = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information reg_client.get_latest_version.assert_called_with(exp_topic + '-value') # Verify no security passed into DeserializingConsumer called with expected configuration - exp_arguments = dict(meta_consumer_conf) - exp_arguments['key.deserializer'] = ANY - exp_arguments['value.deserializer'] = ANY - mock_deserializing_consumer.assert_called_with(exp_arguments) + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_granule['brokers'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_wo_security_granule['group_id'], + 'auto.offset.reset': conf_wo_security_granule['auto_offset_reset'] + }) mock_deserializing_consumer.return_value.subscribe.assert_called_with([exp_topic]) + self.assertIsNotNone(deser_consumer) + def test_connect(self): mock_client = MagicMock() @@ -265,7 +276,7 @@ def test_consume(self, mock_metadata_consumer, mock_message): print("Ignoring exception: {}".format(e)) # Verify kafka consumer poll called expected number of times - self.assertTrue(mock_metadata_consumer.poll.call_count == 3) + self.assertEqual(mock_metadata_consumer.poll.call_count, 3) mock_metadata_consumer.poll.assert_has_calls([call(10), call(10), call(10)]) # Verify callback function was called once with expected message attributes From 47c9dce8de9aa898e6997aecddf2e9c93687ffd8 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 10:32:47 -0600 Subject: [PATCH 015/100] 1500-Changed/added to KafkaPublisherTest(s). --- .../tests/KafkaPublisherTest.py | 326 +++++++++++++++++- 1 file changed, 318 insertions(+), 8 deletions(-) diff --git a/onestop-python-client/tests/KafkaPublisherTest.py b/onestop-python-client/tests/KafkaPublisherTest.py index 7d992ae..643d4f5 100644 --- a/onestop-python-client/tests/KafkaPublisherTest.py +++ b/onestop-python-client/tests/KafkaPublisherTest.py @@ -1,25 +1,335 @@ import unittest - import json from onestop.KafkaPublisher import KafkaPublisher +from unittest.mock import ANY, patch, MagicMock +from confluent_kafka.schema_registry import SchemaRegistryClient class KafkaPublisherTest(unittest.TestCase): kp = None + conf_w_security = None + conf_wo_security = None - def setUp(self): + @classmethod + def setUp(cls): print("Set it up!") - self.kp = KafkaPublisher("../config/kafka-publisher-config-dev.yml") + cls.conf_w_security = { + "metadata_type" : "GRANULE", + "brokers" : "onestop-dev-cp-kafka:9092", + "schema_registry" : "http://onestop-dev-cp-schema-registry:8081", + "security" : { + "enabled" : True, + "caLoc" : "/etc/pki/tls/cert.pem", + "keyLoc" : "/etc/pki/tls/private/kafka-user.key", + "certLoc" : "/etc/pki/tls/certs/kafka-user.crt" + }, + "collection_topic_publish" : "psi-collection-input-unknown", + "granule_topic_publish" : "psi-granule-input-unknown", + "log_level" : "DEBUG" + } + cls.conf_wo_security = dict(cls.conf_w_security) + # Remove security credential section. + cls.conf_wo_security['security'] = { + "enabled":False + } + @classmethod def tearDown(self): print("Tear it down!") - def test_parse_config(self): - self.assertFalse(self.kp.conf['brokers']==None) + def test_init_happy_nonconditional_params(self): + publisher = KafkaPublisher(**self.conf_w_security) + + self.assertEqual(publisher.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(publisher.brokers, self.conf_w_security['brokers']) + self.assertEqual(publisher.schema_registry, self.conf_w_security['schema_registry']) + self.assertEqual(publisher.security_enabled, self.conf_w_security['security']['enabled']) + self.assertEqual(publisher.collection_topic, self.conf_w_security['collection_topic_publish']) + self.assertEqual(publisher.granule_topic, self.conf_w_security['granule_topic_publish']) + + def test_init_security_enabled(self): + publisher = KafkaPublisher(**self.conf_w_security) + + self.assertEqual(publisher.security_caLoc, self.conf_w_security['security']['caLoc']) + self.assertEqual(publisher.security_keyLoc, self.conf_w_security['security']['keyLoc']) + self.assertEqual(publisher.security_certLoc, self.conf_w_security['security']['certLoc']) + + def test_init_security_disabled(self): + publisher = KafkaPublisher(**self.conf_wo_security) + + self.assertRaises(AttributeError, getattr, publisher, "security_caLoc") + self.assertRaises(AttributeError, getattr, publisher, "security_keyLoc") + self.assertRaises(AttributeError, getattr, publisher, "security_certLoc") + + def test_init_metadata_type_valid(self): + publisher = KafkaPublisher(**self.conf_w_security) + + self.assertEqual(publisher.metadata_type, self.conf_w_security['metadata_type']) + + def test_init_metadata_type_invalid(self): + wrong_metadata_type_config = dict(self.conf_w_security) + wrong_metadata_type_config['metadata_type'] = "invalid_type" + + self.assertRaises(ValueError, KafkaPublisher, **wrong_metadata_type_config) + + def test_init_extra_params(self): + conf = dict(self.conf_wo_security) + conf['junk_key'] = 'junk_value' + KafkaPublisher(**conf) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_w_security(self, mock_client): + exp_security_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] + } + mock_client.return_value = None + + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client() + + mock_client.assert_called() + mock_client.assert_called_with(ANY, exp_security_conf) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_wo_security(self, mock_client): + exp_security_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] + } + mock_client.return_value = None + + publisher = KafkaPublisher(**self.conf_wo_security) + publisher.register_client() + try: + mock_client.assert_called_with(ANY, exp_security_conf) + except: + return + raise AssertionError('Expected register_client() to not have been called with security arguments.') + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_calls_AvroSerializer(self, mock_serializing_publisher, mock_avro_serializer): + conf_w_security_collection = dict(self.conf_w_security) + conf_w_security_collection['metadata_type'] = "COLLECTION" + + publisher = KafkaPublisher(**conf_w_security_collection) + reg_client = publisher.register_client() + reg_client.get_latest_version = MagicMock() + publisher.create_producer(reg_client) + + # Verify AvroSerializer called with expected registry client + mock_avro_serializer.assert_called_with(ANY, reg_client) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_collection_w_security(self, mock_serializing_producer, mock_avro_serializer): + conf_w_security_collection = dict(self.conf_w_security) + topic = conf_w_security_collection['collection_topic_publish'] + conf_w_security_collection['metadata_type'] = 'COLLECTION' + + publisher = KafkaPublisher(**conf_w_security_collection) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into SerializingProducer + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_collection['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], + 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_collection_wo_security(self, mock_serializing_producer, mock_avro_serializer): + conf_wo_security_collection = dict(self.conf_wo_security) + topic = conf_wo_security_collection['collection_topic_publish'] + conf_wo_security_collection['metadata_type'] = 'COLLECTION' + + publisher = KafkaPublisher(**conf_wo_security_collection) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify no security passed into SerializingProducer + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_collection['brokers'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_granule_w_security(self, mock_serializing_producer, mock_avro_serializer): + conf_w_security_granule = dict(self.conf_w_security) + topic = conf_w_security_granule['granule_topic_publish'] + conf_w_security_granule['metadata_type'] = 'GRANULE' + + publisher = KafkaPublisher(**conf_w_security_granule) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into SerializingProducer + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_granule['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], + 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_granule_wo_security(self, mock_serializing_producer, mock_avro_serializer): + conf_wo_security_granule = dict(self.conf_wo_security) + exp_topic = conf_wo_security_granule['granule_topic_publish'] + conf_wo_security_granule['metadata_type'] = 'GRANULE' + + publisher = KafkaPublisher(**conf_wo_security_granule) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(exp_topic + '-value') + + # Verify no security passed into SerializingProducer called with expected configuration + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_granule['brokers'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + def test_connect(self): + mock_client = MagicMock() + + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client = MagicMock(return_value=mock_client) + publisher.create_producer = MagicMock(return_value=MagicMock(mock_client)) + publisher.connect() + + publisher.register_client.assert_called_once() + publisher.create_producer.assert_called_with(mock_client) + + def test_get_collection_key_from_uuid(self): + expKey = '12345678-1234-5678-1234-567812345678' + for uuid in [ + '{12345678-1234-5678-1234-567812345678}', + '12345678123456781234567812345678', + 'urn:uuid:12345678-1234-5678-1234-567812345678', + b'\x12\x34\x56\x78'*4, +# b'\x78\x56\x34\x12\x34\x12\x78\x56' + b'\x12\x34\x56\x78\x12\x34\x56\x78', +# {0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678}, +# 0x12345678123456781234567812345678, + ]: + with self.subTest(uuid=uuid): + print ("Testing uuid "+str(uuid)) + key = KafkaPublisher.get_collection_key_from_uuid(uuid) + print("Acquired uuid="+str(key)) + self.assertEqual(key, expKey) + + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_publish_collection(self, mock_collection_producer): + uuid = '{12345678-1234-5678-1234-567812345678}' + content_dict = { + 'title': 'this is a test', + 'location': 'somewhere in space' + } + method = 'PUT' + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client = MagicMock(return_value=MagicMock()) + mock_collection_producer.produce = MagicMock() + mock_collection_producer.poll.side_effect = [1] + + publisher.publish_collection(mock_collection_producer, uuid, content_dict, method) + + # Verify kafka produce called once + mock_collection_producer.produce.assert_called_with( + topic=self.conf_w_security['collection_topic_publish'], + value={ + 'type': 'collection', + 'content': json.dumps(content_dict), + 'contentType': 'application/json', + 'method': method, + 'source': 'unknown', + }, + key=publisher.get_collection_key_from_uuid(uuid), + on_delivery=publisher.delivery_report + ) + + # Verify kafka produce poll called once + mock_collection_producer.poll.assert_called_once() + + + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_publish_granule(self, mock_collection_producer): + uuid = '{12345678-1234-5678-1234-567812345678}' + content_dict = { + 'title': 'this is a test', + 'location': 'somewhere in space', + 'relationships': [{"type": "COLLECTION", + "id": '{12345678-1234-5678-1234-567812345678}'}], + 'errors': [], + 'analysis': 'No analysis', + 'fileLocations': 'archived', + 'fileInformation': 'no information', + 'discovery': 'AWS' + } + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client = MagicMock(return_value=MagicMock()) + mock_collection_producer.produce = MagicMock() + mock_collection_producer.poll.side_effect = [1] + + publisher.publish_granule(mock_collection_producer, uuid, content_dict) + + # Verify kafka produce called once + mock_collection_producer.produce.assert_called_with( + topic=self.conf_w_security['granule_topic_publish'], + value={ + 'type': 'granule', + 'content': json.dumps(content_dict), + #'contentType': 'application/json', + 'method': 'PUT', + 'source': 'unknown', + 'operation': None, + 'relationships': content_dict['relationships'], + 'errors': content_dict['errors'], + 'analysis': content_dict['analysis'], + 'fileLocations': {'fileLocation': content_dict['fileLocations']}, + 'fileInformation': content_dict['fileInformation'], + 'discovery': content_dict['discovery'] + }, + key=publisher.get_collection_key_from_uuid(uuid), + on_delivery=publisher.delivery_report + ) - def test_publish_collection(self): - print("Publish collection") - # Integration test TBD + # Verify kafka produce poll called once + mock_collection_producer.poll.assert_called_once() if __name__ == '__main__': unittest.main() \ No newline at end of file From 921490ac1b3d0cc113e1c8a62ef8ab3f00cd1aa8 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 10:34:45 -0600 Subject: [PATCH 016/100] 1500-In KafkaPublisher cleaned up documentation, added method to consolidate generating the key from UUID, and added a little logging. Consolidated topic generation code too. --- .../onestop/KafkaPublisher.py | 77 +++++++++++-------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index 125174b..047783c 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -48,7 +48,7 @@ class KafkaPublisher: publish_collection(collection_producer, collection_uuid, content_dict, method) Publish collection to collection topic - publish_granule(granule_producer, record_uuid, collection_uuid, content_dict) + publish_granule(granule_producer, collection_uuid, content_dict) Publish granule to granule topic """ @@ -137,27 +137,27 @@ def create_producer(self, registry_client): :return: SerializingProducer Object based on initial constructor values """ - metadata_schema = None + topic = None if self.metadata_type == "COLLECTION": - metadata_schema = registry_client.get_latest_version(self.collection_topic + '-value').schema.schema_str + topic = self.collection_topic if self.metadata_type == "GRANULE": - metadata_schema = registry_client.get_latest_version(self.granule_topic + '-value').schema.schema_str + topic = self.granule_topic + metadata_schema = registry_client.get_latest_version(topic + '-value').schema.schema_str metadata_serializer = AvroSerializer(metadata_schema, registry_client) - producer_conf = {'bootstrap.servers': self.brokers} + conf = {'bootstrap.servers': self.brokers} if self.security_enabled: - producer_conf['security.protocol'] = 'SSL' - producer_conf['ssl.ca.location'] = self.security_caLoc - producer_conf['ssl.key.location'] = self.security_keyLoc - producer_conf['ssl.certificate.location'] = self.security_certLoc + conf['security.protocol'] = 'SSL' + conf['ssl.ca.location'] = self.security_caLoc + conf['ssl.key.location'] = self.security_keyLoc + conf['ssl.certificate.location'] = self.security_certLoc - meta_producer_conf = producer_conf - meta_producer_conf['value.serializer'] = metadata_serializer + conf['value.serializer'] = metadata_serializer - metadata_producer = SerializingProducer(meta_producer_conf) + metadata_producer = SerializingProducer(conf) return metadata_producer def delivery_report(self, err, msg): @@ -174,14 +174,27 @@ def delivery_report(self, err, msg): else: self.logger.error('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) + @staticmethod + def get_collection_key_from_uuid(collection_uuid): + """ + Create a key to use in a kafka message from the given string representation of the collection UUID. + :param collection_uuid: str + collection string to turn into a key. + :return: + """ + if type(collection_uuid) == bytes: + return str(UUID(bytes=collection_uuid)) + else: + return str(UUID(hex=collection_uuid)) + def publish_collection(self, collection_producer, collection_uuid, content_dict, method): """ - Publish collection to collection topic + Publish a collection to the collection topic :param collection_producer: SerializingProducer use connect() :param collection_uuid: str - collection uuid that you want colelction to have + collection uuid that you want the collection to have :param content_dict: dict dictionary containing information you want to publish :param method: str @@ -190,11 +203,9 @@ def publish_collection(self, collection_producer, collection_uuid, content_dict, :return: str returns msg if publish is successful, kafka error if it wasn't successful """ - self.logger.info('Publish collection') - if type(collection_uuid) == bytes: - key = str(UUID(bytes=collection_uuid)) - else: - key = str(UUID(hex=collection_uuid)) + self.logger.info('Publishing collection') + + key = self.get_collection_key_from_uuid(collection_uuid) value_dict = { 'type': 'collection', @@ -204,20 +215,22 @@ def publish_collection(self, collection_producer, collection_uuid, content_dict, 'source': 'unknown', } try: - collection_producer.produce(topic=self.collection_topic, value=value_dict, key=key, - on_delivery=self.delivery_report) + self.logger.debug('Publishing collection with topic='+self.collection_topic+' key='+key+' value='+str(value_dict)) + collection_producer.produce( + topic=self.collection_topic, + value=value_dict, + key=key, + on_delivery=self.delivery_report) except KafkaError: raise collection_producer.poll() - def publish_granule(self, granule_producer, record_uuid, collection_uuid, content_dict): + def publish_granule(self, granule_producer, collection_uuid, content_dict): """ - Publishes granule to granule topic + Publish a granule to the granule topic :param granule_producer: SerializingProducer use connect() - :param record_uuid: str - record uuid associated with the granule :param collection_uuid: str collection uuid associated with the granule :param content_dict: dict @@ -228,10 +241,8 @@ def publish_granule(self, granule_producer, record_uuid, collection_uuid, conten """ self.logger.info('Publish granule') - if type(record_uuid) == bytes: - key = str(UUID(bytes=collection_uuid)) - else: - key = str(UUID(hex=collection_uuid)) + key = self.get_collection_key_from_uuid(collection_uuid) + """ if type(collection_uuid) == bytes: content_dict['relationships'] = [{"type": "COLLECTION", "id": collection_uuid.hex()}] @@ -264,8 +275,12 @@ def publish_granule(self, granule_producer, record_uuid, collection_uuid, conten } try: - granule_producer.produce(topic=self.granule_topic, value=value_dict, key=key, - on_delivery=self.delivery_report) + self.logger.debug('Publishing granule with topic='+self.granule_topic+' key='+key+' value='+str(value_dict)) + granule_producer.produce( + topic=self.granule_topic, + value=value_dict, + key=key, + on_delivery=self.delivery_report) except KafkaError: raise granule_producer.poll() From a337c39f88e953d1a99fc9bfc3d15677e63864bd Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 10:59:27 -0600 Subject: [PATCH 017/100] 1500-Adjusted csb config variable name from file_identifier_prefix to file_id_prefix as it is in half the other places including S3MessageAdapter constructor. --- .../config/csb-data-stream-config-template.yml | 2 +- scripts/config/csb-data-stream-config.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/config/csb-data-stream-config-template.yml b/onestop-python-client/config/csb-data-stream-config-template.yml index 887c9be..56bad99 100644 --- a/onestop-python-client/config/csb-data-stream-config-template.yml +++ b/onestop-python-client/config/csb-data-stream-config-template.yml @@ -9,7 +9,7 @@ registry_base_url: http://localhost/onestop/api/registry onestop_base_url: http://localhost/onestop/api/search/search access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com -file_identifier_prefix: "gov.noaa.ncei.csb:" +file_id_prefix: "gov.noaa.ncei.csb:" prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' diff --git a/scripts/config/csb-data-stream-config.yml b/scripts/config/csb-data-stream-config.yml index 1556ab9..24a7cf6 100644 --- a/scripts/config/csb-data-stream-config.yml +++ b/scripts/config/csb-data-stream-config.yml @@ -6,7 +6,7 @@ collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 psi_registry_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com -file_identifier_prefix: "gov.noaa.ncei.csb:" +file_id_prefix: "gov.noaa.ncei.csb:" prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' From 5aab6d67df06f6faed7e43343b2dcfa2cf271a38 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 11:28:56 -0600 Subject: [PATCH 018/100] 1500-Fixed lack of carriage returnin S3Utils for legibility. --- onestop-python-client/onestop/util/S3Utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index 60fb876..e2f2e32 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -109,7 +109,8 @@ def connect(self, client_type, region): if client_type == "glacier": boto = boto3.client( "glacier", - region_name=region,aws_access_key_id=self.access_key, + region_name=region, + aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key) if client_type == "session": From 5975e1d24d47ccb80fa758b71b18019b1e57f7c6 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 7 May 2021 11:21:15 -0600 Subject: [PATCH 019/100] 1500-Changed SqsConsumer class constructor to take dictionary with extra parameters allowed. Refactored out of SqsConsumer the connecting part and put into S3Utils, this left only log_level as class var. Put creating a Queue object into receive_messages (can refactor out if ever need again, but single line didn't seem to warrent its own method). Added debug logging. --- .../onestop/util/SqsConsumer.py | 95 +++++++------------ 1 file changed, 33 insertions(+), 62 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index f782cc5..e7ceed4 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -1,10 +1,7 @@ -import logging -from datetime import datetime, timezone -import yaml -import boto3 import json -from onestop.util.ClientLogger import ClientLogger +from datetime import datetime, timezone +from onestop.util.ClientLogger import ClientLogger class SqsConsumer: """ @@ -12,101 +9,75 @@ class SqsConsumer: Attributes ---------- - conf: yaml file - aws-util-config-dev.yml - cred: yaml file - credentials.yml - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated + logger: ClientLogger object + utilizes python logger library and creates logging for our specific needs Methods ------- - connect() - connects a boto sqs instance based on configurations in conf and cred yml files - - receive_messages(queue, sqs_max_polls, cb) - polls for messages in the queue + receive_messages(sqs_client, sqs_queue_name, sqs_max_polls, cb) + polls for messages in the queue """ - conf = None - def __init__(self, conf_loc, cred_loc): + def __init__(self, log_level = 'INFO', **wildargs): """ - - :param conf_loc: yaml file - aws-util-config-dev.yml - :param cred_loc: yaml file - credentials.yml - - Other Attributes - ---------------- - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated - + Attributes + ---------- + log_level: str + The log level to use for this class (Defaults to 'INFO') """ - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - with open(cred_loc) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False) + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) - def connect(self): - """ - Connects a boto sqs instance based on configurations in conf and cred yml files + if wildargs: + self.logger.error("There were extra constructor arguments: " + str(wildargs)) - :return: boto sqs - returns instance of boto sqs resource - """ - boto_session = boto3.Session(aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key']) - # Get the queue. This returns an SQS.Queue instance - sqs_session = boto_session.resource('sqs', region_name=self.conf['s3_region']) - sqs_queue = sqs_session.Queue(self.conf['sqs_url']) - self.logger.info("Connecting to " + self.conf['sqs_url']) - return sqs_queue - - def receive_messages(self, queue, sqs_max_polls, cb): + def receive_messages(self, sqs_client, sqs_queue_name, sqs_max_polls, cb): """ - Polls for messages in the queue + Polls for messages from an sqs queue - :param queue: boto sqs resource - instance of boto sqs resource given from connect() + :param sqs_client: boto SQS.Client + instance of boto sqs Client + :param sqs_queue_name: str + name of the queue to connect to. :param sqs_max_polls: int number of polls :param cb: function call back function - :return: Dependent on the call back function + :return: If the Message has a Records key then the call back function gets called on the Message. """ self.logger.info("Receive messages") + self.logger.info("Polling %d time(s) for SQS messages" % sqs_max_polls) + + sqs_queue = sqs_client.Queue(sqs_queue_name) i = 1 while i <= sqs_max_polls: self.logger.info("Polling attempt: " + str(i)) i = i + 1 - sqs_messages = queue.receive_messages(MaxNumberOfMessages=10, WaitTimeSeconds=10) + sqs_messages = sqs_queue.receive_messages( + MaxNumberOfMessages=10, + WaitTimeSeconds=10 + ) self.logger.info("Received %d messages." % len(sqs_messages)) + self.logger.debug("Messages: %s" % sqs_messages) for sqs_message in sqs_messages: try: # Log start time dt_start = datetime.now(tz=timezone.utc) - self.logger.info("Started processing message") + self.logger.info("Starting processing message") + self.logger.debug("Message: %s" % sqs_message) + self.logger.debug("Message body: %s" % sqs_message.body) message_body = json.loads(sqs_message.body) + self.logger.debug("Message body message: %s" % message_body['Message']) message_content = json.loads(message_body['Message']) if 'Records' in message_content: recs = message_content['Records'] - self.logger.info("Received message") self.logger.debug('Records: ' + str(recs)) else: self.logger.info("s3 event without records content received.") From 204a2bd0c4d7101476db87322bf621e1fdd34a07 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 7 May 2021 14:41:42 -0600 Subject: [PATCH 020/100] 1500-Decided to put "connect" back into SqsConsumer. Adjusted input parameters for receive_message so a user could create their own queue or use our connect to do so and pass either in. --- .../onestop/util/SqsConsumer.py | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index e7ceed4..bd7f98f 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -31,14 +31,24 @@ def __init__(self, log_level = 'INFO', **wildargs): if wildargs: self.logger.error("There were extra constructor arguments: " + str(wildargs)) - def receive_messages(self, sqs_client, sqs_queue_name, sqs_max_polls, cb): + def connect(self, sqs_resource, sqs_queue_name): + """ + Gets a boto SQS.Queue resource. + :param sqs_resource: boto SQS.Resource + SQS resource to create the queue from. + :param sqs_queue_name: str + SQS queue name to create and return a boto SQS.Queue object to. + :return: SQS.Queue + An SQS.Queue resource to use for Queue operations. + """ + return sqs_resource.create_queue(QueueName=sqs_queue_name) + + def receive_messages(self, sqs_queue, sqs_max_polls, cb): """ Polls for messages from an sqs queue - :param sqs_client: boto SQS.Client - instance of boto sqs Client - :param sqs_queue_name: str - name of the queue to connect to. + :param sqs_queue: boto SQS.Queue object + boto SQS Queue object. Can be generated by the method in this class. :param sqs_max_polls: int number of polls :param cb: function @@ -50,8 +60,6 @@ def receive_messages(self, sqs_client, sqs_queue_name, sqs_max_polls, cb): self.logger.info("Receive messages") self.logger.info("Polling %d time(s) for SQS messages" % sqs_max_polls) - sqs_queue = sqs_client.Queue(sqs_queue_name) - i = 1 while i <= sqs_max_polls: self.logger.info("Polling attempt: " + str(i)) From 47b3e5bd187e082c45af07aab585126afdf1828e Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 7 May 2021 17:26:56 -0600 Subject: [PATCH 021/100] 1500-Fixed some bugs in SqsConsumer. --- .../onestop/util/SqsConsumer.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index bd7f98f..4f503d8 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -60,11 +60,17 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): self.logger.info("Receive messages") self.logger.info("Polling %d time(s) for SQS messages" % sqs_max_polls) - i = 1 - while i <= sqs_max_polls: + if sqs_max_polls < 1: + raise ValueError('Max polling value should be greater than 0.') + + for i in range(1, sqs_max_polls+1): self.logger.info("Polling attempt: " + str(i)) - i = i + 1 + # boto3 SQS.Queue appears to have a subset of SQS.Client methods plus a few management queue ones. + # The ones they do share seem to have different return types. + # The message method names are different and return types different: + # Client.send_message and Queue.send_message and Queue.send_messages + # Client.receive_message and Queue.receive_messages sqs_messages = sqs_queue.receive_messages( MaxNumberOfMessages=10, WaitTimeSeconds=10 @@ -86,9 +92,10 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): if 'Records' in message_content: recs = message_content['Records'] - self.logger.debug('Records: ' + str(recs)) + self.logger.debug('Message "Records": %s' % recs) + cb(recs) else: - self.logger.info("s3 event without records content received.") + self.logger.info("s3 event message without 'Records' content received.") sqs_message.delete() @@ -98,9 +105,8 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): processing_time = dt_end - dt_start self.logger.info("Completed processing message (s):" + str(processing_time.microseconds * 1000)) - cb(recs) except: self.logger.exception( "An exception was thrown while processing a message, but this program will continue. The " - "message will not be deleted from the SQS queue. The message was: %s" % sqs_message.body) + "message will not be deleted from the SQS queue. The message was: %s" % sqs_message) From b2143aefb499ac3faff7d10b6b9f74b49ff2ae72 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 7 May 2021 17:27:15 -0600 Subject: [PATCH 022/100] 1500-Due to changing SqsConsumer class constructor to take dictionary with extra parameters allowed so adjusted the tests and added more with more verification. --- .../tests/util/SqsConsumerTest.py | 175 ++++++++++++++++-- 1 file changed, 159 insertions(+), 16 deletions(-) diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/tests/util/SqsConsumerTest.py index 4d6be77..87f9005 100644 --- a/onestop-python-client/tests/util/SqsConsumerTest.py +++ b/onestop-python-client/tests/util/SqsConsumerTest.py @@ -1,34 +1,177 @@ import unittest -import boto3 +import json + from moto import mock_sqs -from tests.utils import abspath_from_relative +from unittest.mock import ANY, patch, MagicMock, call +from onestop.util.S3Utils import S3Utils from onestop.util.SqsConsumer import SqsConsumer class SqsConsumerTest(unittest.TestCase): - sc = None + config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 's3_region': 'us-east-2', + 's3_bucket': 'archive-testing-demo', + 'sqs_url': 'https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs', + 'type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'registry_base_url': 'http://localhost/onestop/api/registry', + 'registry_username': 'admin', + 'registry_password': 'whoknows', + 'onestop_base_url': 'http://localhost/onestop/api/search/search', + 'log_level': 'DEBUG' + } + + records = [{"eventVersion":"2.1"}] + message = json.dumps( + {"Type": "Notification", + "MessageId": "9d0691d2-ae9c-58f9-a9f4-c8dcf05d87be", + "TopicArn": "arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1", + "Subject": "Amazon S3 Notification", + "Message": json.dumps({"Records": records}), + "Timestamp": "2021-05-06T21:15:45.427Z", + "SignatureVersion": "1", + "Signature": "Ui5s4uVgcMr5fjGmePCMgmi14Dx9oS8hIpjXXiQo+xZPgsHkUayz7dEeGmMGGt45l8blmZTZEbxJG+HVGfIUmQGRqoimwiLm+mIAaNIN/BV76FVFcQUIkORX8gYN0a4RS3HU8/ElrKFK8Iz0zpxJdjwxa3xPCDwu+dTotiLTJxSouvg8MmkkDnq758a8vZ9WK2PaOlZiZ3m8Mv2ZvLrozZ/DAAz48HSad6Mymhit82RpGCUxy4SDwXVlP/nLB01AS11Gp2HowJR8NXyStrZYzzQEc+PebITaExyikgTMiVhRHkmb7JrtZPpgZu2daQsSooqpwyIzb6pvgwu9W54jkw==", + "SigningCertURL": "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem", + "UnsubscribeURL": "https://sns.us-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1:e7a9a9f5-792e-48a6-9ec8-40f7f5a8f600" + }) + message_wo_records = json.dumps( + {"Type": "Notification", + "MessageId": "9d0691d2-ae9c-58f9-a9f4-c8dcf05d87be", + "TopicArn": "arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1", + "Subject": "Amazon S3 Notification", + "Message": "{}", + "Timestamp": "2021-05-06T21:15:45.427Z", + "SignatureVersion": "1", + "Signature": "Ui5s4uVgcMr5fjGmePCMgmi14Dx9oS8hIpjXXiQo+xZPgsHkUayz7dEeGmMGGt45l8blmZTZEbxJG+HVGfIUmQGRqoimwiLm+mIAaNIN/BV76FVFcQUIkORX8gYN0a4RS3HU8/ElrKFK8Iz0zpxJdjwxa3xPCDwu+dTotiLTJxSouvg8MmkkDnq758a8vZ9WK2PaOlZiZ3m8Mv2ZvLrozZ/DAAz48HSad6Mymhit82RpGCUxy4SDwXVlP/nLB01AS11Gp2HowJR8NXyStrZYzzQEc+PebITaExyikgTMiVhRHkmb7JrtZPpgZu2daQsSooqpwyIzb6pvgwu9W54jkw==", + "SigningCertURL": "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem", + "UnsubscribeURL": "https://sns.us-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1:e7a9a9f5-792e-48a6-9ec8-40f7f5a8f600" + }) + + @mock_sqs def setUp(self): print("Set it up!") - self.sc = SqsConsumer(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"), - abspath_from_relative(__file__, "../../config/credentials-template.yml")) + + self.s3_utils = S3Utils(**self.config_dict) + self.sqs_consumer = SqsConsumer(**self.config_dict) def tearDown(self): print("Tear it down!") - def test_parse_config(self): - self.assertFalse(self.sc.conf['sqs_url']==None) + @mock_sqs + def test_connect(self): + queue_name = 'test' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + expQueue = sqs_resource.create_queue(QueueName=queue_name) + queue = self.sqs_consumer.connect(sqs_resource, queue_name) + + self.assertEqual(expQueue.url, queue.url) + + # Kind of pointless since we catch every exception this doesn't fail when it should.... + @mock_sqs + def test_receive_messages_no_records(self): + mock_cb = MagicMock() + + # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + + # Send a test message lacking Records field + sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody= self.message_wo_records + ) + queue = sqs_resource.Queue(queue_name) + + self.sqs_consumer.receive_messages(queue, 1, mock_cb) + + # Verify callback function was called once with expected message attributes + mock_cb.assert_not_called() + + @mock_sqs + def test_receive_messages_fails_invalid_sqs_max_polls(self): + with self.assertRaises(ValueError): + self.sqs_consumer.receive_messages(MagicMock(), 0, MagicMock()) + + @mock_sqs + def test_receive_messages_polls_msgs_expected_times(self): + mock_cb = MagicMock() + queue = MagicMock() + + sqs_max_polls = 2 + self.sqs_consumer.receive_messages(queue, sqs_max_polls, mock_cb) + + # Verify polling called expected times + self.assertEqual(queue.receive_messages.call_count, sqs_max_polls) + + @mock_sqs + def test_receive_messages_callback_occurs(self): + mock_cb = MagicMock() + + # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody= self.message + ) + queue = sqs_resource.Queue(queue_name) + + self.sqs_consumer.receive_messages(queue, 1, mock_cb) + + # Verify callback function was called once with expected message attributes + mock_cb.assert_called_with(self.records) + + @mock_sqs + def test_happy_path(self): + mock_cb = MagicMock() + + # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + queue = self.sqs_consumer.connect(sqs_resource, queue_name) #sqs_resource.create_queue(QueueName=queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client.send_message( + QueueUrl=queue.url, + MessageBody= self.message + ) + + self.sqs_consumer.receive_messages(queue, 1, mock_cb) + + # Verify callback function was called once with expected message attributes + mock_cb.assert_called_with(self.records) + # An example using external send/receive methods @mock_sqs - def test_poll_messages(self): - # Create the mock queue beforehand and set its mock URL as the 'sqs_url' config value for SqsConsumer - boto_session = boto3.Session(aws_access_key_id=self.sc.cred['sandbox']['access_key'], - aws_secret_access_key=self.sc.cred['sandbox']['secret_key']) - sqs_session = boto_session.resource('sqs', region_name=self.sc.conf['s3_region']) - res = sqs_session.create_queue(QueueName="test_queue") - self.sc.conf['sqs_url'] = res.url - queue = self.sc.connect() - self.sc.receive_messages(queue, self.sc.conf['sqs_max_polls'], lambda *args, **kwargs: None) + def test_write_message_valid(self): + "Test the write_message method with a valid message" + sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + queue = sqs.create_queue(QueueName='test-skype-sender') + self.sqs_consumer.sqs_url = queue.url + skype_message = 'Testing with a valid message' + channel = 'test' + expected_message = str({'msg':f'{skype_message}', 'channel':channel}) + message = str({'msg':f'{skype_message}', 'channel':channel}) + queue.send_message(MessageBody=(message)) + sqs_messages = queue.receive_messages() + print('Message: %s'%sqs_messages) + print('Message0: %s'%sqs_messages[0]) + assert sqs_messages[0].body == expected_message, 'Message in skype-sender does not match expected' + print(f'The message in skype-sender SQS matches what we sent') + assert len(sqs_messages) == 1, 'Expected exactly one message in SQS' + print(f'\nExactly one message in skype-sender SQS') if __name__ == '__main__': unittest.main() \ No newline at end of file From 11f8845ef8f111c7d3a2632cace3c208751e13b2 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:00:06 -0600 Subject: [PATCH 023/100] 1500-fixed bug in tests/utils of message missing a carriage return. Just looks. --- onestop-python-client/tests/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/tests/utils.py b/onestop-python-client/tests/utils.py index 2f1e6d5..9cb7913 100644 --- a/onestop-python-client/tests/utils.py +++ b/onestop-python-client/tests/utils.py @@ -15,7 +15,8 @@ def create_delete_message(region, bucket, key): "Message": '''{ "Records": [{ "eventVersion": "2.1", "eventSource": "aws:s3", "awsRegion": "''' + region + '''", - "eventTime": "2020-12-14T20:56:08.725Z", "eventName": "ObjectRemoved:Delete", + "eventTime": "2020-12-14T20:56:08.725Z", + "eventName": "ObjectRemoved:Delete", "userIdentity": {"principalId": "AX8TWPQYA8JEM"}, "requestParameters": {"sourceIPAddress": "65.113.158.185"}, "responseElements": {"x-amz-request-id": "D8059E6A1D53597A", From 9048e5326d48cd85f059e487068cd07f464fb35e Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:01:30 -0600 Subject: [PATCH 024/100] 1500-Added logging to SqsHandlers and log_level method parameter. Adjusted SqsConsumer callback parameters to pass along log_level. --- .../onestop/util/SqsConsumer.py | 3 ++- .../onestop/util/SqsHandlers.py | 21 +++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index 4f503d8..4d97c34 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -25,6 +25,7 @@ def __init__(self, log_level = 'INFO', **wildargs): log_level: str The log level to use for this class (Defaults to 'INFO') """ + self.log_level = log_level self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) @@ -93,7 +94,7 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): if 'Records' in message_content: recs = message_content['Records'] self.logger.debug('Message "Records": %s' % recs) - cb(recs) + cb(recs, self.log_level) else: self.logger.info("s3 event message without 'Records' content received.") diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index 57be8da..ce0f010 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -1,3 +1,5 @@ +from onestop.util.ClientLogger import ClientLogger + def create_delete_handler(web_publisher): """ Creates a delete function handler to be used with SqsConsumer.receive_messages. @@ -7,21 +9,36 @@ def create_delete_handler(web_publisher): :param: web_publisher: WebPublisher object """ - def delete(records): - if records is None: + def delete(records, log_level='INFO'): + + logger = ClientLogger.get_logger('SqsHandlers', log_level, False) + logger.info("In create_delete_handler.delete() handler") + logger.debug("Records: %s"%records) + + if not records or records is None: + logger.info("Ending handler, records empty, records=%s"%records) return + record = records[0] if record['eventName'] != 'ObjectRemoved:Delete': + logger.info("Ending handler, eventName=%s"%record['eventName']) return + bucket = record['s3']['bucket']['name'] s3_key = record['s3']['object']['key'] s3_url = "s3://" + bucket + "/" + s3_key payload = '{"queries":[{"type": "fieldQuery", "field": "links.linkUrl", "value": "' + s3_url + '"}] }' search_response = web_publisher.search_onestop('granule', payload) + logger.debug('OneStop search response=%s'%search_response) response_json = search_response.json() + logger.debug('OneStop search response json=%s'%response_json) + logger.debug('OneStop search response data=%s'%response_json['data']) if len(response_json['data']) != 0: granule_uuid = response_json['data'][0]['id'] response = web_publisher.delete_registry('granule', granule_uuid) + print('delete_registry response: %s'%response) return response + logger.warning("OneStop search response has no 'data' field. Response=%s"%response_json) + return delete From 5e0d3ba87eed10f2078c591ae8f2b3bc575de13a Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:02:45 -0600 Subject: [PATCH 025/100] 1500-Added tests to SqsHandlersTest and removed config usage. --- .../tests/SqsHandlersTest.py | 274 +++++++++++++----- 1 file changed, 194 insertions(+), 80 deletions(-) diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/tests/SqsHandlersTest.py index bbe4210..3897169 100644 --- a/onestop-python-client/tests/SqsHandlersTest.py +++ b/onestop-python-client/tests/SqsHandlersTest.py @@ -1,8 +1,8 @@ import json import unittest -import boto3 -import yaml -from moto import mock_s3 + +from unittest import mock +from unittest.mock import patch from moto import mock_sqs from tests.utils import abspath_from_relative, create_delete_message from onestop.WebPublisher import WebPublisher @@ -13,95 +13,209 @@ class SqsHandlerTest(unittest.TestCase): - wp = None - su = None - s3ma = None - sqs = None - wp_config = abspath_from_relative(__file__, "../config/web-publisher-config-local.yml") - aws_config = abspath_from_relative(__file__, "../config/aws-util-config-dev.yml") - cred_config = abspath_from_relative(__file__, "../config/credentials-template.yml") - csb_config = abspath_from_relative(__file__, "../config/csb-data-stream-config.yml") - - collection_uuid = '5b58de08-afef-49fb-99a1-9c5d5c003bde' - payloadDict = { - "fileInformation": { - "name": "OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc", - "size": 30551050, - "checksums": [{ - "algorithm": "SHA1", - "value": "bf4c5b58f8d5f9445f7b277f988e5861184f775a" - }], - "format": "NetCDF" - }, - "relationships": [{ - "type": "COLLECTION", - "id": collection_uuid - }], - "fileLocations": { - "s3://noaa-goes16/ABI-L1b-RadF/2019/298/17/OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc": { - "uri": "s3://noaa-goes16/ABI-L1b-RadF/2019/298/17/OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc", - "type": "ACCESS", - "deleted": "false", - "restricted": "false", - "asynchronous": "false", - "locality": "us-east-2", - "lastModified": 1572025823000, - "serviceType": "Amazon:AWS:S3", - "optionalAttributes": {} - } - } - } def setUp(self): print("Set it up!") - with open(abspath_from_relative(__file__, "../config/csb-data-stream-config-template.yml")) as f: - self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../config/aws-util-config-dev.yml")) as f: - self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../config/credentials-template.yml")) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) - - self.wp = WebPublisher(self.wp_config, self.cred_config) - self.su = S3Utils(self.cred['sandbox']['access_key'], - self.cred['sandbox']['secret_key'], - "DEBUG") - self.s3ma = S3MessageAdapter(self.stream_conf['access_bucket'], - self.stream_conf['type'], - self.stream_conf['file_identifier_prefix'], - self.stream_conf['collection_id']) + self.config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', + 'type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'registry_base_url': 'http://localhost/onestop/api/registry', + 'registry_username': 'admin', + 'registry_password': 'whoknows', + 'onestop_base_url': 'http://localhost/onestop/api/search/search', + 'log_level': 'DEBUG' + } + + self.wp = WebPublisher(**self.config_dict) + self.s3_utils = S3Utils(**self.config_dict) + self.s3ma = S3MessageAdapter(**self.config_dict) + self.sqs_consumer = SqsConsumer(**self.config_dict) + + self.sqs_max_polls = 3 + self.region = 'us-east-2' + self.bucket = 'archive-testing-demo' + self.key = 'ABI-L1b-RadF/2019/298/15/OR_ABI-L1b-RadF-M6C15_G16_s20192981500369_e20192981510082_c20192981510166.nc' def tearDown(self): print("Tear it down!") - @mock_s3 + def mocked_search_response_data(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + print ("args: "+str(args)+" kwargs: "+str(kwargs)) + onestop_search_response = { + "data":[ + { + "attributes":{ + "serviceLinks":[ + + ], + "citeAsStatements":[ + + ], + "links":[ + { + "linkFunction":"download", + "linkUrl":"s3://archive-testing-demo-backup/public/NESDIS/CSB/csv/2019/12/01/20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv", + "linkName":"Amazon S3", + "linkProtocol":"Amazon:AWS:S3" + }, + { + "linkFunction":"download", + "linkUrl":"https://archive-testing-demo.s3-us-east-2.amazonaws.com/public/NESDIS/CSB/csv/2019/12/01/20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv", + "linkName":"Amazon S3", + "linkProtocol":"HTTPS" + } + ], + "internalParentIdentifier":"fdb56230-87f4-49f2-ab83-104cfd073177", + "filesize":63751, + "title":"20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv" + }, + "id":"77b11a1e-1b75-46e1-b7d6-99b5022ed113", + "type":"granule" + } + ], + "meta":{ + "took":1, + "total":6, + "exactCount":True + } + } + return MockResponse(onestop_search_response, 200) + + + def mocked_search_response_data_empty(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + print ("args: "+str(args)+" kwargs: "+str(kwargs)) + onestop_search_response = { + "data":[], + "meta":{ + "took":1, + "total":6, + "exactCount":True + } + } + return MockResponse(onestop_search_response, 200) + @mock_sqs - def init_s3(self): - bucket = self.cloud_conf['s3_bucket'] - key = self.cloud_conf['s3_key'] - boto_client = self.su.connect("s3", None) - boto_client.create_bucket(Bucket=bucket) - boto_client.put_object(Bucket=bucket, Key=key, Body="foobar") - - sqs_client = boto3.client('sqs', region_name=self.cloud_conf['s3_region']) - sqs_queue = sqs_client.create_queue(QueueName=self.cloud_conf['sqs_name']) - self.sqs = SqsConsumer(self.aws_config, self.cred_config) - message = create_delete_message(self.cloud_conf['s3_region'], bucket, key) - sqs_client.send_message(QueueUrl=sqs_queue['QueueUrl'], MessageBody=json.dumps(message)) - sqs_queue['QueueUrl'] - - @mock_s3 + @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_happy(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.region) + message = create_delete_message(self.region, self.bucket, self.key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_called_once() + mock_wp.delete_registry.assert_called_once() + @mock_sqs - def delete_handler_wrapper(self, recs): - handler = create_delete_handler(self.wp) - result = handler(recs) - self.assertTrue(result) + @mock.patch('requests.get', side_effect=mocked_search_response_data_empty, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_data_empty_ends_cb(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.region) + message = create_delete_message(self.region, self.bucket, self.key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_called_once() + mock_wp.delete_registry.assert_not_called() @mock_sqs - def test_delete_handler(self): - mock_queue_url = self.init_s3() - sqs_queue = boto3.resource('sqs', region_name=self.stream_conf['s3_region']).Queue(mock_queue_url) - self.sqs.receive_messages(sqs_queue, self.stream_conf['sqs_max_polls'], self.delete_handler_wrapper) + @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_no_records_ends_cb(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps({"Message":'''{"Records":[]}'''}) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_not_called() + mock_wp.delete_registry.assert_not_called() + + @mock_sqs + @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_eventName_not_delete_ends_cb(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps({"Message":'''{"Records":[{"eventName":"Unknown"}]}'''}) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_not_called() + mock_wp.delete_registry.assert_not_called() if __name__ == '__main__': unittest.main() \ No newline at end of file From 918c378b053da7918f679cf1806a5f68440f190c Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:17:33 -0600 Subject: [PATCH 026/100] 1500-Fixed SqsConsumerTest due to parameters into CB changing. skipped example test. --- onestop-python-client/tests/util/SqsConsumerTest.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/tests/util/SqsConsumerTest.py index 87f9005..7b5785f 100644 --- a/onestop-python-client/tests/util/SqsConsumerTest.py +++ b/onestop-python-client/tests/util/SqsConsumerTest.py @@ -2,7 +2,7 @@ import json from moto import mock_sqs -from unittest.mock import ANY, patch, MagicMock, call +from unittest.mock import MagicMock, ANY from onestop.util.S3Utils import S3Utils from onestop.util.SqsConsumer import SqsConsumer @@ -128,7 +128,7 @@ def test_receive_messages_callback_occurs(self): self.sqs_consumer.receive_messages(queue, 1, mock_cb) # Verify callback function was called once with expected message attributes - mock_cb.assert_called_with(self.records) + mock_cb.assert_called_with(self.records, ANY) @mock_sqs def test_happy_path(self): @@ -149,9 +149,10 @@ def test_happy_path(self): self.sqs_consumer.receive_messages(queue, 1, mock_cb) # Verify callback function was called once with expected message attributes - mock_cb.assert_called_with(self.records) + mock_cb.assert_called_with(self.records, ANY) # An example using external send/receive methods + @unittest.skip @mock_sqs def test_write_message_valid(self): "Test the write_message method with a valid message" From 3f39966f8fe4f42c3e385ad79e04ba014fdc1b17 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:22:33 -0600 Subject: [PATCH 027/100] 1500-Removed unused conf variable. --- onestop-python-client/onestop/util/S3Utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index e2f2e32..eebafe9 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -68,7 +68,6 @@ class S3Utils: retrieve_inventory_results(vault_name, boto_client, job_id) Retrieve the results of an Amazon Glacier inventory-retrieval job """ - conf = None def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): self.access_key = access_key From 4cffc3884e7d41e074494ca6bd122f0ce4cde9eb Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 09:21:19 -0600 Subject: [PATCH 028/100] 1500-Removed unused var conf from classes. --- onestop-python-client/onestop/WebPublisher.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index 75ee99f..d04eacc 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -31,7 +31,6 @@ class WebPublisher: get_granules_onestop(self, uuid) Search for a granule in OneStop given its uuid """ - conf = None def __init__(self, registry_base_url, registry_username, registry_password, onestop_base_url, log_level="INFO", **kwargs): self.registry_base_url = registry_base_url From 8280a374901c62c68eea722ebd6a7c087a57ce1f Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 09:21:58 -0600 Subject: [PATCH 029/100] 1500-Changed mock tests to not load configs but use mock data. --- .../tests/util/S3MessageAdapterTest.py | 32 +++++------- .../tests/util/S3UtilsTest.py | 52 +++++++++---------- 2 files changed, 38 insertions(+), 46 deletions(-) diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/tests/util/S3MessageAdapterTest.py index a960737..671695a 100644 --- a/onestop-python-client/tests/util/S3MessageAdapterTest.py +++ b/onestop-python-client/tests/util/S3MessageAdapterTest.py @@ -1,8 +1,6 @@ import unittest -import yaml from moto import mock_s3 -from tests.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter @@ -54,30 +52,24 @@ class S3MessageAdapterTest(unittest.TestCase): def setUp(self): print("Set it up!") - with open(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml")) as f: - self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml")) as f: - self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../../config/credentials-template.yml")) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) + config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', + 'type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'log_level': 'DEBUG' + } - self.s3_utils = S3Utils(self.cred['sandbox']['access_key'], - self.cred['sandbox']['secret_key'], - "DEBUG") - self.s3ma = S3MessageAdapter(self.stream_conf['access_bucket'], - self.stream_conf['type'], - self.stream_conf['file_identifier_prefix'], - self.stream_conf['collection_id']) + self.s3_utils = S3Utils(**config_dict) + self.s3ma = S3MessageAdapter(**config_dict) - self.region = self.cloud_conf['s3_region'] - self.bucket = self.cloud_conf['s3_bucket'] + self.region = 'us-east-2' def tearDown(self): print("Tear it down!") - def test_parse_config(self): - self.assertFalse(self.stream_conf['collection_id'] == None) - @mock_s3 def test_transform(self): s3 = self.s3_utils.connect('s3', self.region) diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py index acb0af4..47c8ade 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/tests/util/S3UtilsTest.py @@ -1,7 +1,6 @@ import csv import unittest import uuid -import yaml from moto import mock_s3 from moto import mock_glacier @@ -13,20 +12,21 @@ class S3UtilsTest(unittest.TestCase): def setUp(self): print("Set it up!") - with open(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml")) as f: - self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml")) as f: - self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../../config/credentials-template.yml")) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) + config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', + 'type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'log_level': 'DEBUG' + } - self.s3_utils = S3Utils(self.cred['sandbox']['access_key'], - self.cred['sandbox']['secret_key'], - "DEBUG") + self.s3_utils = S3Utils(**config_dict) - self.region = self.cloud_conf['s3_region'] - self.region2 = self.region - self.bucket = self.cloud_conf['s3_bucket'] + self.region = 'us-east-2' + self.region2 = 'eu-north-1' + self.bucket = 'archive-testing-demo' @mock_s3 def test_get_uuid_metadata(self): @@ -54,7 +54,7 @@ def test_add_uuid_metadata(self): @mock_s3 def test_add_file_s3(self): - boto_client = self.s3_utils.connect("s3", None) + boto_client = self.s3_utils.connect('client', 's3', None) local_file = abspath_from_relative(__file__, "../data/file4.csv") s3_key = "csv/file4.csv" location = {'LocationConstraint': self.region} @@ -65,8 +65,8 @@ def test_add_file_s3(self): @mock_s3 def test_get_csv_s3(self): - boto_session = self.s3_utils.connect("session", None) - s3 = self.s3_utils.connect('s3', self.cloud_conf['s3_region']) + boto_session = self.s3_utils.connect('session', None, None) + s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} s3_key = "csv/file1.csv" s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) @@ -81,7 +81,7 @@ def test_get_csv_s3(self): @mock_s3 def test_read_bytes_s3(self): - boto_client = self.s3_utils.connect("s3", None) + boto_client = self.s3_utils.connect('client', 's3', None) s3_key = "csv/file1.csv" boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) boto_client.put_object(Bucket=self.bucket, Key=s3_key, Body="body") @@ -90,7 +90,7 @@ def test_read_bytes_s3(self): @mock_s3 def test_add_files(self): - boto_client = self.s3_utils.connect("s3", None) + boto_client = self.s3_utils.connect('client', 's3', None) local_files = ["file1_s3.csv", "file2.csv", "file3.csv"] location = {'LocationConstraint': self.region} boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) @@ -108,7 +108,7 @@ def test_s3_cross_region(self): key = "csv/file1.csv" # makes connection to low level s3 client - s3 = self.s3_utils.connect('s3', self.region) + s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) s3.put_object(Bucket=self.bucket, Key=key, Body="body") @@ -117,8 +117,8 @@ def test_s3_cross_region(self): file_data = self.s3_utils.read_bytes_s3(s3, self.bucket, key) # Redirecting upload to vault in second region - glacier = self.s3_utils.connect("glacier", self.region2) - vault_name = self.cloud_conf['vault_name'] + glacier = self.s3_utils.connect('client', 'glacier', self.region2) + vault_name = 'archive-vault-new' glacier.create_vault(vaultName=vault_name) print('vault name: ' + str(vault_name)) print('region name: ' + str(self.region2)) @@ -140,7 +140,7 @@ def test_s3_to_glacier(self): key = "csv/file1_s3.csv" # Create boto3 low level api connection - s3 = self.s3_utils.connect('s3', self.region) + s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) s3.put_object(Bucket=self.bucket, Key=key, Body="body") @@ -172,13 +172,13 @@ def test_s3_restore(self): @mock_glacier def test_retrieve_inventory(self): """ - Initiates job for archive retrieval. Takes 3-5 hours to complete + Initiates job for archive retrieval. Takes 3-5 hours to complete if not mocked. """ # Using glacier api initiates job and returns archive results # Connect to your glacier vault for retrieval - glacier = self.s3_utils.connect("glacier", self.region2) - vault_name = self.cloud_conf['vault_name'] + glacier = self.s3_utils.connect('client', 'glacier', self.region2) + vault_name = 'archive-vault-new' glacier.create_vault(vaultName=vault_name) @@ -193,7 +193,7 @@ def test_retrieve_inventory_results(self, jobid): """ # Connect to your glacier vault for retrieval - glacier = self.su.connect("glacier", self.su.conf['region']) + glacier = self.su.connect('client', 'glacier', self.su.conf['region']) vault_name = self.su.conf['vault_name'] # Retrieve the job results From c16302eaf845fbab4cfe1862163e97cab563182e Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 09:25:02 -0600 Subject: [PATCH 030/100] 1500-refactored S3Utils connect to take in type parameter instead of us assuming if they say "glacier" they mean a client of service type "glacier". Little clearer to the user and in the code. This allows boto to catch the error of wrong service name specified and it gives a nice list of choices. Added else statement too for cases user specifies a type we don't expect, will add tests to. --- onestop-python-client/onestop/util/S3Utils.py | 61 +++++++++---------- .../tests/SqsHandlersTest.py | 16 ++--- .../tests/extractor/CsbExtractorTest.py | 4 +- .../tests/util/S3MessageAdapterTest.py | 2 +- .../tests/util/S3UtilsTest.py | 6 +- .../tests/util/SqsConsumerTest.py | 18 +++--- 6 files changed, 51 insertions(+), 56 deletions(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index eebafe9..f1bb8e2 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -30,7 +30,7 @@ class S3Utils: Methods ------- connect(client_type, region) - connects to a boto3 client + connects to a boto3 service objectkey_exists(bucket, s3_key) checks to see if a s3 key path exists in a particular bucket @@ -78,46 +78,41 @@ def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): if wildargs: self.logger.error("There were extra constructor arguments: " + str(wildargs)) - def connect(self, client_type, region): + def connect(self, type, service_name, region): """ - Connects to a boto3 client + Connects to a boto3 of specified type using the credentials provided in the constructor. - :param client_type: str - boto client type in which you want to access + :param type: str + boto object type to return, see return type. + :param service_name: str + (Optional for session type) boto service name in which you want to access :param region: str - name of aws region you want to access + (Optional for session type) name of aws region you want to access - :return: boto3 client - dependent on the client_type parameter + :return: boto3 connection object + A boto3 connection object; Client, Session, or Resource. """ - - if client_type == "s3": - boto = boto3.client( - "s3", + type = type.lower() + if type == 'session': + return boto3.Session( aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, - region_name=region) - - if client_type == "s3_resource": - boto = boto3.resource( - "s3", - region_name=region, + ) + elif type == 'client': + return boto3.client( + service_name, aws_access_key_id=self.access_key, - aws_secret_access_key=self.secret_key) - - if client_type == "glacier": - boto = boto3.client( - "glacier", + aws_secret_access_key=self.secret_key, + region_name=region) + elif type == 'resource': + return boto3.resource( + service_name, region_name=region, aws_access_key_id=self.access_key, - aws_secret_access_key=self.secret_key) - - if client_type == "session": - boto = boto3.Session( - aws_access_key_id=self.access_key, - aws_secret_access_key=self.secret_key, + aws_secret_access_key=self.secret_key ) - return boto + else: + raise Exception('Unknown boto3 type of %s'%type) def objectkey_exists(self, bucket, s3_key): """ @@ -235,11 +230,11 @@ def upload_s3(self, boto_client, local_file, bucket, s3_key, overwrite): self.logger.error("File to upload was not found. Path: "+local_file) return False - def get_csv_s3(self, boto_client, bucket, key): + def get_csv_s3(self, boto_session, bucket, key): """ gets a csv file from s3 bucket using smart open library - :param boto_client: session + :param boto_session: session utilizes boto session type :param bucket: str name of bucket @@ -249,7 +244,7 @@ def get_csv_s3(self, boto_client, bucket, key): :return: smart open file """ url = "s3://" + bucket + "/" + key - sm_open_file = sm_open(url, 'r', transport_params={'session': boto_client}) + sm_open_file = sm_open(url, 'r', transport_params={'session': boto_session}) return sm_open_file def read_bytes_s3(self, boto_client, bucket, key): diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/tests/SqsHandlersTest.py index 3897169..4dd2c9e 100644 --- a/onestop-python-client/tests/SqsHandlersTest.py +++ b/onestop-python-client/tests/SqsHandlersTest.py @@ -120,12 +120,12 @@ def json(self): @patch('onestop.WebPublisher') def test_delete_handler_happy(self, mock_wp, mock_response): queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url sqs_queue = sqs_resource.Queue(queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) message = create_delete_message(self.region, self.bucket, self.key) sqs_client.send_message( QueueUrl=sqs_queue_url, @@ -146,12 +146,12 @@ def test_delete_handler_happy(self, mock_wp, mock_response): @patch('onestop.WebPublisher') def test_delete_handler_data_empty_ends_cb(self, mock_wp, mock_response): queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url sqs_queue = sqs_resource.Queue(queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) message = create_delete_message(self.region, self.bucket, self.key) sqs_client.send_message( QueueUrl=sqs_queue_url, @@ -172,12 +172,12 @@ def test_delete_handler_data_empty_ends_cb(self, mock_wp, mock_response): @patch('onestop.WebPublisher') def test_delete_handler_no_records_ends_cb(self, mock_wp, mock_response): queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url sqs_queue = sqs_resource.Queue(queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) sqs_client.send_message( QueueUrl=sqs_queue_url, MessageBody=json.dumps({"Message":'''{"Records":[]}'''}) @@ -197,12 +197,12 @@ def test_delete_handler_no_records_ends_cb(self, mock_wp, mock_response): @patch('onestop.WebPublisher') def test_delete_handler_eventName_not_delete_ends_cb(self, mock_wp, mock_response): queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url sqs_queue = sqs_resource.Queue(queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) sqs_client.send_message( QueueUrl=sqs_queue_url, MessageBody=json.dumps({"Message":'''{"Records":[{"eventName":"Unknown"}]}'''}) diff --git a/onestop-python-client/tests/extractor/CsbExtractorTest.py b/onestop-python-client/tests/extractor/CsbExtractorTest.py index 72bdbcc..2c3ff72 100644 --- a/onestop-python-client/tests/extractor/CsbExtractorTest.py +++ b/onestop-python-client/tests/extractor/CsbExtractorTest.py @@ -38,13 +38,13 @@ def test_is_not_csv(self): @mock_s3 def test_csb_SME_user_path(self): # Setup bucket and file to read - s3 = self.s3_utils.connect('s3', self.region) + s3 = self.s3_utils.connect('client', 's3', self.region) s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) self.s3_utils.upload_s3(s3, self.root_proj_path + '/' + self.key, self.bucket, self.key, True) self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key)) # This is how we would expect an external user to get the file. - sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect("session", None), self.bucket, self.key) + sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect('session', None, None), self.bucket, self.key) bounds_dict = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME') coords = bounds_dict["geospatial"] diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/tests/util/S3MessageAdapterTest.py index 671695a..925be2e 100644 --- a/onestop-python-client/tests/util/S3MessageAdapterTest.py +++ b/onestop-python-client/tests/util/S3MessageAdapterTest.py @@ -72,7 +72,7 @@ def tearDown(self): @mock_s3 def test_transform(self): - s3 = self.s3_utils.connect('s3', self.region) + s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} bucket = 'nesdis-ncei-csb-dev' key = 'csv/file1.csv' diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py index 47c8ade..83be8f2 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/tests/util/S3UtilsTest.py @@ -30,7 +30,7 @@ def setUp(self): @mock_s3 def test_get_uuid_metadata(self): - boto_client = self.s3_utils.connect("s3_resource", None) + boto_client = self.s3_utils.connect('resource', 's3', None) s3_key = "csv/file1.csv" location = {'LocationConstraint': self.region} @@ -42,7 +42,7 @@ def test_get_uuid_metadata(self): @mock_s3 def test_add_uuid_metadata(self): - boto_client = self.s3_utils.connect("s3_resource", self.region) + boto_client = self.s3_utils.connect('resource', 's3', self.region) s3_key = "csv/file1.csv" @@ -162,7 +162,7 @@ def test_s3_restore(self): days = 3 # use high level api - s3 = self.s3_utils.connect('s3_resource', self.region2) + s3 = self.s3_utils.connect('resource', 's3' , self.region2) location = {'LocationConstraint': self.region2} s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) s3.Object(self.bucket, key).put(Bucket=self.bucket, Key=key, Body="body") diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/tests/util/SqsConsumerTest.py index 7b5785f..ef50b20 100644 --- a/onestop-python-client/tests/util/SqsConsumerTest.py +++ b/onestop-python-client/tests/util/SqsConsumerTest.py @@ -63,7 +63,7 @@ def tearDown(self): @mock_sqs def test_connect(self): queue_name = 'test' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) expQueue = sqs_resource.create_queue(QueueName=queue_name) queue = self.sqs_consumer.connect(sqs_resource, queue_name) @@ -76,11 +76,11 @@ def test_receive_messages_no_records(self): # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url # Send a test message lacking Records field - sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) sqs_client.send_message( QueueUrl=sqs_queue_url, MessageBody= self.message_wo_records @@ -114,11 +114,11 @@ def test_receive_messages_callback_occurs(self): # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) sqs_client.send_message( QueueUrl=sqs_queue_url, MessageBody= self.message @@ -136,11 +136,11 @@ def test_happy_path(self): # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) queue = self.sqs_consumer.connect(sqs_resource, queue_name) #sqs_resource.create_queue(QueueName=queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) sqs_client.send_message( QueueUrl=queue.url, MessageBody= self.message @@ -156,8 +156,8 @@ def test_happy_path(self): @mock_sqs def test_write_message_valid(self): "Test the write_message method with a valid message" - sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) - sqs = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) + sqs = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) queue = sqs.create_queue(QueueName='test-skype-sender') self.sqs_consumer.sqs_url = queue.url skype_message = 'Testing with a valid message' From f8c5bd0fb922e63030bb63b34dead24dc23d65c1 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 10:39:14 -0600 Subject: [PATCH 031/100] 1500-Changed moto dependency to moto[all] because of some issues with a moto version issue. https://github.com/spulec/moto/issues/3297 --- onestop-python-client/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 735dad7..9783885 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -8,5 +8,5 @@ argparse~=1.4.0 boto3~=1.15.11 requests~=2.24.0 botocore~=1.18.11 -moto==1.3.16.dev122 +moto[all]==2.0.5 undictify From ecfec1e61e411b67300c7050b7eb701c67a8b454 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 13:47:37 -0600 Subject: [PATCH 032/100] 1500-added tests for different connect types for S3Utils --- .../tests/util/S3UtilsTest.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py index 83be8f2..c002003 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/tests/util/S3UtilsTest.py @@ -2,7 +2,7 @@ import unittest import uuid -from moto import mock_s3 +from moto import mock_s3, mock_sqs from moto import mock_glacier from tests.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils @@ -28,6 +28,28 @@ def setUp(self): self.region2 = 'eu-north-1' self.bucket = 'archive-testing-demo' + @mock_sqs + def test_connect_session(self): + session = self.s3_utils.connect('Session', None, None) + + # No exception is called for unique method call + session.client('sqs') + session.resource('s3') + + @mock_sqs + def test_connect_client(self): + client = self.s3_utils.connect('Client', 'sqs', self.region) + + # No exception is called for unique method call + client.list_queues() + + @mock_sqs + def test_connect_resource(self): + resource = self.s3_utils.connect('Resource', 'sqs', self.region) + + # No exception is called for unique method call + resource.Queue(url='test') + @mock_s3 def test_get_uuid_metadata(self): boto_client = self.s3_utils.connect('resource', 's3', None) From e4c7fb46c6455e62bf9d8371ad35c7148c68904a Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 13:55:23 -0600 Subject: [PATCH 033/100] 1500-Changed class constructors checking extra arguments and logging of an error to warning. --- onestop-python-client/onestop/KafkaConsumer.py | 2 +- onestop-python-client/onestop/WebPublisher.py | 2 +- onestop-python-client/onestop/util/S3MessageAdapter.py | 2 +- onestop-python-client/onestop/util/S3Utils.py | 2 +- onestop-python-client/onestop/util/SqsConsumer.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 76078cc..747b0e4 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -101,7 +101,7 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.error("There were extra constructor arguments: " + str(wildargs)) + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def register_client(self): """ diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index d04eacc..7b1c6bd 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -42,7 +42,7 @@ def __init__(self, registry_base_url, registry_username, registry_password, ones self.logger.info("Initializing " + self.__class__.__name__) if kwargs: - self.logger.info("There were extra constructor arguments: " + str(kwargs)) + self.logger.warning("There were extra constructor arguments: " + str(kwargs)) def publish_registry(self, metadata_type, uuid, payload, method): """ diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index 1dda78c..6bd832d 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -59,7 +59,7 @@ def __init__(self, access_bucket, type, file_id_prefix, collection_id, log_leve self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.error("There were extra constructor arguments: " + str(wildargs)) + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def transform(self, recs): """ diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index f1bb8e2..0f86e2b 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -76,7 +76,7 @@ def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.error("There were extra constructor arguments: " + str(wildargs)) + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def connect(self, type, service_name, region): """ diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index 4d97c34..39356da 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -30,7 +30,7 @@ def __init__(self, log_level = 'INFO', **wildargs): self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.error("There were extra constructor arguments: " + str(wildargs)) + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def connect(self, sqs_resource, sqs_queue_name): """ From f5370eacd32992af4fc3b8a59ef15b1975a0cc77 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 15:23:38 -0600 Subject: [PATCH 034/100] 1500-Moved unit tests to tests/unit and integration tests to tests/integration. Change circle ci config to run all onestop-python-client tests. --- .circleci/config.yml | 10 ++++++++-- onestop-python-client/{tests => test}/__init__.py | 0 onestop-python-client/{tests => test}/data/file1.csv | 0 .../{tests => test}/data/file1_s3.csv | 0 onestop-python-client/{tests => test}/data/file2.csv | 0 onestop-python-client/{tests => test}/data/file3.csv | 0 onestop-python-client/{tests => test}/data/file4.csv | 0 onestop-python-client/test/integration/__init__.py | 0 .../{tests => test/integration}/test_WebPublisher.py | 4 ++-- onestop-python-client/test/unit/__init__.py | 0 onestop-python-client/test/unit/extractor/__init__.py | 0 .../unit/extractor/test_CsbExtractor.py} | 9 +++++---- .../unit/test_KafkaConsumer.py} | 2 +- .../unit/test_KafkaPublisher.py} | 2 +- .../unit/test_SqsHandlers.py} | 6 ++---- .../unit/test_WebPublisher.py} | 2 +- onestop-python-client/test/unit/util/__init__.py | 0 .../unit/util/test_S3MessageAdapter.py} | 0 .../S3UtilsTest.py => test/unit/util/test_S3Utils.py} | 6 +++--- .../unit/util/test_SqsConsumer.py} | 0 onestop-python-client/{tests => test}/utils.py | 0 onestop-python-client/tests/util/IntegrationTest.py | 1 - 22 files changed, 23 insertions(+), 19 deletions(-) rename onestop-python-client/{tests => test}/__init__.py (100%) rename onestop-python-client/{tests => test}/data/file1.csv (100%) rename onestop-python-client/{tests => test}/data/file1_s3.csv (100%) rename onestop-python-client/{tests => test}/data/file2.csv (100%) rename onestop-python-client/{tests => test}/data/file3.csv (100%) rename onestop-python-client/{tests => test}/data/file4.csv (100%) create mode 100644 onestop-python-client/test/integration/__init__.py rename onestop-python-client/{tests => test/integration}/test_WebPublisher.py (98%) create mode 100644 onestop-python-client/test/unit/__init__.py create mode 100644 onestop-python-client/test/unit/extractor/__init__.py rename onestop-python-client/{tests/extractor/CsbExtractorTest.py => test/unit/extractor/test_CsbExtractor.py} (92%) rename onestop-python-client/{tests/KafkaConsumerTest.py => test/unit/test_KafkaConsumer.py} (99%) rename onestop-python-client/{tests/KafkaPublisherTest.py => test/unit/test_KafkaPublisher.py} (99%) rename onestop-python-client/{tests/SqsHandlersTest.py => test/unit/test_SqsHandlers.py} (98%) rename onestop-python-client/{tests/test_WebPublisher_unit.py => test/unit/test_WebPublisher.py} (99%) create mode 100644 onestop-python-client/test/unit/util/__init__.py rename onestop-python-client/{tests/util/S3MessageAdapterTest.py => test/unit/util/test_S3MessageAdapter.py} (100%) rename onestop-python-client/{tests/util/S3UtilsTest.py => test/unit/util/test_S3Utils.py} (97%) rename onestop-python-client/{tests/util/SqsConsumerTest.py => test/unit/util/test_SqsConsumer.py} (100%) rename onestop-python-client/{tests => test}/utils.py (100%) delete mode 100644 onestop-python-client/tests/util/IntegrationTest.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 99f7692..dbaddb4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -107,10 +107,16 @@ jobs: app-dir: ./onestop-python-client pkg-manager: pip - run: - name: "Run util tests" + name: "Run unit tests" command: > cd onestop-python-client/; - python -m unittest tests/util/*.py + python -m unittest discover -s test/unit + + - run: + name: "Run integration tests" + command: > + cd onestop-python-client/; + python -m unittest discover -s test/integration orbs: slack: circleci/slack@3.4.2 diff --git a/onestop-python-client/tests/__init__.py b/onestop-python-client/test/__init__.py similarity index 100% rename from onestop-python-client/tests/__init__.py rename to onestop-python-client/test/__init__.py diff --git a/onestop-python-client/tests/data/file1.csv b/onestop-python-client/test/data/file1.csv similarity index 100% rename from onestop-python-client/tests/data/file1.csv rename to onestop-python-client/test/data/file1.csv diff --git a/onestop-python-client/tests/data/file1_s3.csv b/onestop-python-client/test/data/file1_s3.csv similarity index 100% rename from onestop-python-client/tests/data/file1_s3.csv rename to onestop-python-client/test/data/file1_s3.csv diff --git a/onestop-python-client/tests/data/file2.csv b/onestop-python-client/test/data/file2.csv similarity index 100% rename from onestop-python-client/tests/data/file2.csv rename to onestop-python-client/test/data/file2.csv diff --git a/onestop-python-client/tests/data/file3.csv b/onestop-python-client/test/data/file3.csv similarity index 100% rename from onestop-python-client/tests/data/file3.csv rename to onestop-python-client/test/data/file3.csv diff --git a/onestop-python-client/tests/data/file4.csv b/onestop-python-client/test/data/file4.csv similarity index 100% rename from onestop-python-client/tests/data/file4.csv rename to onestop-python-client/test/data/file4.csv diff --git a/onestop-python-client/test/integration/__init__.py b/onestop-python-client/test/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/tests/test_WebPublisher.py b/onestop-python-client/test/integration/test_WebPublisher.py similarity index 98% rename from onestop-python-client/tests/test_WebPublisher.py rename to onestop-python-client/test/integration/test_WebPublisher.py index c81a7de..9263938 100644 --- a/onestop-python-client/tests/test_WebPublisher.py +++ b/onestop-python-client/test/integration/test_WebPublisher.py @@ -56,8 +56,8 @@ class WebPublisherTest(unittest.TestCase): def setUpClass(cls): print("Set it up!") - cred_loc = "../config/credentials.yml" - conf_loc = "../config/csb-data-stream-config-template.yml" + cred_loc = "config/credentials.yml" + conf_loc = "config/csb-data-stream-config-template.yml" with open(cred_loc) as f: creds = yaml.load(f, Loader=yaml.FullLoader) diff --git a/onestop-python-client/test/unit/__init__.py b/onestop-python-client/test/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/extractor/__init__.py b/onestop-python-client/test/unit/extractor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/tests/extractor/CsbExtractorTest.py b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py similarity index 92% rename from onestop-python-client/tests/extractor/CsbExtractorTest.py rename to onestop-python-client/test/unit/extractor/test_CsbExtractor.py index 2c3ff72..415bb26 100644 --- a/onestop-python-client/tests/extractor/CsbExtractorTest.py +++ b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py @@ -11,9 +11,10 @@ def setUp(self): print("Set it up!") self.root_proj_path = os.getcwd() self.assertIsNotNone(self.root_proj_path) - self.key = "tests/data/file4.csv" - # Use open instead of our methodfor simplicity and reliability, plus not testing our code here. - self.file_obj = open(self.root_proj_path + '/' + self.key) + self.data_file_path = os.getcwd() + '/test/data/file4.csv' + self.key = "file4.csv" + # Use open instead of our method because we aren't testing our code here. + self.file_obj = open(self.data_file_path) config_dict = { "access_key": "test_access_key", @@ -40,7 +41,7 @@ def test_csb_SME_user_path(self): # Setup bucket and file to read s3 = self.s3_utils.connect('client', 's3', self.region) s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) - self.s3_utils.upload_s3(s3, self.root_proj_path + '/' + self.key, self.bucket, self.key, True) + self.s3_utils.upload_s3(s3, self.data_file_path, self.bucket, self.key, True) self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key)) # This is how we would expect an external user to get the file. diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/test/unit/test_KafkaConsumer.py similarity index 99% rename from onestop-python-client/tests/KafkaConsumerTest.py rename to onestop-python-client/test/unit/test_KafkaConsumer.py index 1246789..b119e9a 100644 --- a/onestop-python-client/tests/KafkaConsumerTest.py +++ b/onestop-python-client/test/unit/test_KafkaConsumer.py @@ -4,7 +4,7 @@ from onestop.KafkaConsumer import KafkaConsumer from confluent_kafka.schema_registry import SchemaRegistryClient -class KafkaConsumerTest(unittest.TestCase): +class test_KafkaConsumer(unittest.TestCase): kp = None conf_w_security = None conf_wo_security = None diff --git a/onestop-python-client/tests/KafkaPublisherTest.py b/onestop-python-client/test/unit/test_KafkaPublisher.py similarity index 99% rename from onestop-python-client/tests/KafkaPublisherTest.py rename to onestop-python-client/test/unit/test_KafkaPublisher.py index 643d4f5..1c9497b 100644 --- a/onestop-python-client/tests/KafkaPublisherTest.py +++ b/onestop-python-client/test/unit/test_KafkaPublisher.py @@ -5,7 +5,7 @@ from unittest.mock import ANY, patch, MagicMock from confluent_kafka.schema_registry import SchemaRegistryClient -class KafkaPublisherTest(unittest.TestCase): +class test_KafkaPublisher(unittest.TestCase): kp = None conf_w_security = None conf_wo_security = None diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/test/unit/test_SqsHandlers.py similarity index 98% rename from onestop-python-client/tests/SqsHandlersTest.py rename to onestop-python-client/test/unit/test_SqsHandlers.py index 4dd2c9e..b881fc9 100644 --- a/onestop-python-client/tests/SqsHandlersTest.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -4,15 +4,14 @@ from unittest import mock from unittest.mock import patch from moto import mock_sqs -from tests.utils import abspath_from_relative, create_delete_message +from test.utils import abspath_from_relative, create_delete_message from onestop.WebPublisher import WebPublisher from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.util.SqsConsumer import SqsConsumer from onestop.util.SqsHandlers import create_delete_handler - -class SqsHandlerTest(unittest.TestCase): +class test_SqsHandler(unittest.TestCase): def setUp(self): print("Set it up!") @@ -94,7 +93,6 @@ def json(self): } return MockResponse(onestop_search_response, 200) - def mocked_search_response_data_empty(*args, **kwargs): class MockResponse: def __init__(self, json_data, status_code): diff --git a/onestop-python-client/tests/test_WebPublisher_unit.py b/onestop-python-client/test/unit/test_WebPublisher.py similarity index 99% rename from onestop-python-client/tests/test_WebPublisher_unit.py rename to onestop-python-client/test/unit/test_WebPublisher.py index 4a97f80..af0802f 100644 --- a/onestop-python-client/tests/test_WebPublisher_unit.py +++ b/onestop-python-client/test/unit/test_WebPublisher.py @@ -6,7 +6,7 @@ from moto import mock_s3 from onestop.WebPublisher import WebPublisher -class WebPublisherTest(unittest.TestCase): +class test_WebPublisher(unittest.TestCase): username="admin" password="a_password" uuid = "9f0a5ff2-fcc0-5bcb-a225-024b669c9bba" diff --git a/onestop-python-client/test/unit/util/__init__.py b/onestop-python-client/test/unit/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py similarity index 100% rename from onestop-python-client/tests/util/S3MessageAdapterTest.py rename to onestop-python-client/test/unit/util/test_S3MessageAdapter.py diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/test/unit/util/test_S3Utils.py similarity index 97% rename from onestop-python-client/tests/util/S3UtilsTest.py rename to onestop-python-client/test/unit/util/test_S3Utils.py index c002003..70f3385 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -4,7 +4,7 @@ from moto import mock_s3, mock_sqs from moto import mock_glacier -from tests.utils import abspath_from_relative +from test.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils class S3UtilsTest(unittest.TestCase): @@ -77,7 +77,7 @@ def test_add_uuid_metadata(self): @mock_s3 def test_add_file_s3(self): boto_client = self.s3_utils.connect('client', 's3', None) - local_file = abspath_from_relative(__file__, "../data/file4.csv") + local_file = abspath_from_relative(__file__, "../../data/file4.csv") s3_key = "csv/file4.csv" location = {'LocationConstraint': self.region} boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) @@ -119,7 +119,7 @@ def test_add_files(self): overwrite = True for file in local_files: - local_file = abspath_from_relative(__file__, "../data/" + file) + local_file = abspath_from_relative(__file__, "../../data/" + file) s3_file = "csv/" + file self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_file, overwrite)) diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/test/unit/util/test_SqsConsumer.py similarity index 100% rename from onestop-python-client/tests/util/SqsConsumerTest.py rename to onestop-python-client/test/unit/util/test_SqsConsumer.py diff --git a/onestop-python-client/tests/utils.py b/onestop-python-client/test/utils.py similarity index 100% rename from onestop-python-client/tests/utils.py rename to onestop-python-client/test/utils.py diff --git a/onestop-python-client/tests/util/IntegrationTest.py b/onestop-python-client/tests/util/IntegrationTest.py deleted file mode 100644 index 381e4d7..0000000 --- a/onestop-python-client/tests/util/IntegrationTest.py +++ /dev/null @@ -1 +0,0 @@ -#TBD \ No newline at end of file From c93bab294aca7990a6927335b5e69550a1a20cb6 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 11:56:23 -0600 Subject: [PATCH 035/100] 1500-Fixed one of the test_S3Utils tests that was commented out. Removed some blank lines from S3Utils. --- onestop-python-client/onestop/util/S3Utils.py | 1 - .../test/unit/util/test_S3Utils.py | 40 ++++++++++++++----- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index 0f86e2b..d5de564 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -390,7 +390,6 @@ def s3_restore(self, boto_client, bucket_name, key, days): # returns status of object retrieval return obj.restore - def retrieve_inventory(self, boto_client, vault_name): """ Initiate an Amazon Glacier inventory-retrieval job diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py index 70f3385..6b3321f 100644 --- a/onestop-python-client/test/unit/util/test_S3Utils.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -1,11 +1,16 @@ import csv import unittest import uuid +import json +from unittest import mock from moto import mock_s3, mock_sqs from moto import mock_glacier from test.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils +from boto.glacier.layer1 import Layer1 +from botocore.response import StreamingBody +from io import StringIO class S3UtilsTest(unittest.TestCase): @@ -203,26 +208,39 @@ def test_retrieve_inventory(self): vault_name = 'archive-vault-new' glacier.create_vault(vaultName=vault_name) - response = self.s3_utils.retrieve_inventory(glacier, vault_name) - self.assertTrue(response['jobId']!= None) + print('jobid %s'%response['jobId']) + self.assertTrue(response['jobId'] != None) - ''' - Excluding for now because it's an asynchronous test - def test_retrieve_inventory_results(self, jobid): + @mock_glacier + @mock_s3 + def test_retrieve_inventory_results(self): """ Once the job has been completed, use the job id to retrieve archive results """ # Connect to your glacier vault for retrieval - glacier = self.su.connect('client', 'glacier', self.su.conf['region']) - vault_name = self.su.conf['vault_name'] + glacier = mock.Mock(spec=Layer1)#self.s3_utils.connect('client', 'glacier', self.region) + vault_name = 'archive-vault-new' + glacier.create_vault(vaultName=vault_name) + + body_json = {'Body': [{'test':'value'}]} + body_encoded = json.dumps(body_json)#.encode("utf-16") - # Retrieve the job results - inventory = self.su.retrieve_inventory_results(vault_name, glacier, jobid) + body = StreamingBody( + StringIO(str(body_encoded)), + len(str(body_encoded)) + ) + + mocked_response = { + 'body': body + } + glacier.get_job_output.return_value = mocked_response + with mock.patch('boto.glacier.job.tree_hash_from_str') as t: + t.return_value = 'tree_hash' + inventory = self.s3_utils.retrieve_inventory_results(vault_name, glacier, 'ASDF78') - self.assertTrue(inventory != None) - ''' + self.assertEqual(body_json, inventory) @mock_s3 def test_extra_parameters_constructor(self): From 32a300a94d27d5fe0582b42da01f9d912c717eab Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 13:46:01 -0600 Subject: [PATCH 036/100] 1500-Updated python-client requirements boto3. Seems to be using an old one. Trying to figure out how to force it to a newer one. --- onestop-python-client/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 9783885..06a3f5b 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -5,7 +5,7 @@ smart-open PyYAML~=5.3.1 setuptools~=49.2.0 argparse~=1.4.0 -boto3~=1.15.11 +boto3~=1.17.71 requests~=2.24.0 botocore~=1.18.11 moto[all]==2.0.5 From a3f6e96795dc167a2ad6906acd329f28ce8fcf6f Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 13:59:36 -0600 Subject: [PATCH 037/100] 1500-Updated python-client requirements botocore to 1.20.71 due to conflict between botocore 1.18.11 and moto 2.0.5 --- onestop-python-client/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 06a3f5b..9a38faa 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -7,6 +7,6 @@ setuptools~=49.2.0 argparse~=1.4.0 boto3~=1.17.71 requests~=2.24.0 -botocore~=1.18.11 +botocore~=1.20.71 moto[all]==2.0.5 undictify From 8fca7a9e55452e5238f1906b568c5729b9fe7e0f Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 14:06:52 -0600 Subject: [PATCH 038/100] 1500-Changed circleci config for python client to try and update boto --- .circleci/config.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index dbaddb4..c86d021 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -106,12 +106,16 @@ jobs: - python/install-packages: app-dir: ./onestop-python-client pkg-manager: pip + # This is to update boto + - run: pip -V + - run: pip list boto3 + - run: pip install --upgrade --user boto3 + - run: pip3 install boto - run: name: "Run unit tests" command: > cd onestop-python-client/; python -m unittest discover -s test/unit - - run: name: "Run integration tests" command: > From bd38748a45206b34da3b2da62e4920e6e7606ca7 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 14:15:53 -0600 Subject: [PATCH 039/100] 1500-added region_name to S3Utils connect for session. Suspect it was using my local aws config when region was not specified. --- onestop-python-client/onestop/util/S3Utils.py | 4 +++- .../test/unit/extractor/test_CsbExtractor.py | 2 +- onestop-python-client/test/unit/util/test_S3Utils.py | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index d5de564..cbc8f24 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -97,13 +97,15 @@ def connect(self, type, service_name, region): return boto3.Session( aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, + region_name=region ) elif type == 'client': return boto3.client( service_name, aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, - region_name=region) + region_name=region + ) elif type == 'resource': return boto3.resource( service_name, diff --git a/onestop-python-client/test/unit/extractor/test_CsbExtractor.py b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py index 415bb26..cba1bf7 100644 --- a/onestop-python-client/test/unit/extractor/test_CsbExtractor.py +++ b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py @@ -45,7 +45,7 @@ def test_csb_SME_user_path(self): self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key)) # This is how we would expect an external user to get the file. - sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect('session', None, None), self.bucket, self.key) + sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect('session', None, self.region), self.bucket, self.key) bounds_dict = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME') coords = bounds_dict["geospatial"] diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py index 6b3321f..f6bdd91 100644 --- a/onestop-python-client/test/unit/util/test_S3Utils.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -35,7 +35,7 @@ def setUp(self): @mock_sqs def test_connect_session(self): - session = self.s3_utils.connect('Session', None, None) + session = self.s3_utils.connect('Session', None, self.region) # No exception is called for unique method call session.client('sqs') @@ -92,7 +92,7 @@ def test_add_file_s3(self): @mock_s3 def test_get_csv_s3(self): - boto_session = self.s3_utils.connect('session', None, None) + boto_session = self.s3_utils.connect('session', None, self.region) s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} s3_key = "csv/file1.csv" From 6a101f8188e37b4a557f359d2a9d2801f2741da4 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 14:19:19 -0600 Subject: [PATCH 040/100] 1500-Changing python-client circleci config to see if need to tell it to install boto for pip3 every time. --- .circleci/config.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c86d021..a8ad73f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -106,11 +106,6 @@ jobs: - python/install-packages: app-dir: ./onestop-python-client pkg-manager: pip - # This is to update boto - - run: pip -V - - run: pip list boto3 - - run: pip install --upgrade --user boto3 - - run: pip3 install boto - run: name: "Run unit tests" command: > From 927fb7e00bdf80b88ec2a0959f1a61c1a6526874 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 14:27:46 -0600 Subject: [PATCH 041/100] 1500-updated python-client requirements to install boto --- onestop-python-client/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 9a38faa..036e217 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -5,6 +5,7 @@ smart-open PyYAML~=5.3.1 setuptools~=49.2.0 argparse~=1.4.0 +boto~=2.49.0 boto3~=1.17.71 requests~=2.24.0 botocore~=1.20.71 From 12374a046e48c43ce5c5d14b8b08ef8757675d44 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 17:59:20 -0600 Subject: [PATCH 042/100] 1500-Changed python-client integration test(s) to use environment variables if credentials yml doesn't exist. Commented out integration task in circleCI config. Since cannot reach registry on cedardevs. --- .circleci/config.yml | 11 ++++---- .../test/integration/test_WebPublisher.py | 25 +++++++++++++------ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a8ad73f..d475399 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -111,11 +111,12 @@ jobs: command: > cd onestop-python-client/; python -m unittest discover -s test/unit - - run: - name: "Run integration tests" - command: > - cd onestop-python-client/; - python -m unittest discover -s test/integration +# This is commented out only because the OneStop we have running on cedardevs doesn't have its registry exposed. You can only reach it via sshing to another machine. +# - run: +# name: "Run integration tests" +# command: > +# cd onestop-python-client/; +# python -m unittest discover -s test/integration orbs: slack: circleci/slack@3.4.2 diff --git a/onestop-python-client/test/integration/test_WebPublisher.py b/onestop-python-client/test/integration/test_WebPublisher.py index 9263938..04211dc 100644 --- a/onestop-python-client/test/integration/test_WebPublisher.py +++ b/onestop-python-client/test/integration/test_WebPublisher.py @@ -2,8 +2,10 @@ import json import unittest import time +import os.path from onestop.WebPublisher import WebPublisher +from os import path class WebPublisherTest(unittest.TestCase): wp = None @@ -59,13 +61,22 @@ def setUpClass(cls): cred_loc = "config/credentials.yml" conf_loc = "config/csb-data-stream-config-template.yml" - with open(cred_loc) as f: - creds = yaml.load(f, Loader=yaml.FullLoader) - - registry_username = creds['registry']['username'] - registry_password = creds['registry']['password'] - access_key = creds['sandbox']['access_key'] - access_secret = creds['sandbox']['secret_key'] + if path.exists(cred_loc): + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Credentials file doesn't exist at '%s', using environment variables."%cred_loc) + registry_username = os.environ.get('REGISTRY_USERNAME') + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + if registry_username == None: + raise Exception("REGISTRY_USERNAME not defined as env variable. Credentials file at '%s' doesn't exist." % cred_loc) with open(conf_loc) as f: conf = yaml.load(f, Loader=yaml.FullLoader) From eb0646d103d933ef30b0ba6fb2b98ca5fb8edb41 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 13 May 2021 11:58:43 -0600 Subject: [PATCH 043/100] 1500-Removed redundant log_level fields in all the configs. Put into credentials template. --- onestop-python-client/config/aws-util-config-dev.yml | 1 - onestop-python-client/config/credentials-template.yml | 3 +-- .../config/csb-data-stream-config-template.yml | 1 - scripts/config/aws-util-config-dev.yml | 1 - scripts/config/aws-util-config-test.yml | 1 - scripts/config/csb-data-stream-config.yml | 1 - scripts/config/kafka-publisher-config-dev.yml | 1 - scripts/config/web-publisher-config-dev.yml | 1 - scripts/config/web-publisher-config-local.yml | 1 - 9 files changed, 1 insertion(+), 10 deletions(-) diff --git a/onestop-python-client/config/aws-util-config-dev.yml b/onestop-python-client/config/aws-util-config-dev.yml index c30683e..2fdb5c1 100644 --- a/onestop-python-client/config/aws-util-config-dev.yml +++ b/onestop-python-client/config/aws-util-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs diff --git a/onestop-python-client/config/credentials-template.yml b/onestop-python-client/config/credentials-template.yml index 006e175..f94c70b 100644 --- a/onestop-python-client/config/credentials-template.yml +++ b/onestop-python-client/config/credentials-template.yml @@ -9,5 +9,4 @@ registry: username: rw_user password: rw_user_pwd - - +log_level: INFO \ No newline at end of file diff --git a/onestop-python-client/config/csb-data-stream-config-template.yml b/onestop-python-client/config/csb-data-stream-config-template.yml index 56bad99..8c2d4de 100644 --- a/onestop-python-client/config/csb-data-stream-config-template.yml +++ b/onestop-python-client/config/csb-data-stream-config-template.yml @@ -1,4 +1,3 @@ -log_level: INFO format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER type: COLLECTION diff --git a/scripts/config/aws-util-config-dev.yml b/scripts/config/aws-util-config-dev.yml index e054f49..9102be0 100644 --- a/scripts/config/aws-util-config-dev.yml +++ b/scripts/config/aws-util-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs diff --git a/scripts/config/aws-util-config-test.yml b/scripts/config/aws-util-config-test.yml index 6aac07a..9de4618 100644 --- a/scripts/config/aws-util-config-test.yml +++ b/scripts/config/aws-util-config-test.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: DEBUG # AWS config values sqs_url: 'test-queue' diff --git a/scripts/config/csb-data-stream-config.yml b/scripts/config/csb-data-stream-config.yml index 24a7cf6..06a45b6 100644 --- a/scripts/config/csb-data-stream-config.yml +++ b/scripts/config/csb-data-stream-config.yml @@ -1,4 +1,3 @@ -log_level: INFO format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER type: COLLECTION diff --git a/scripts/config/kafka-publisher-config-dev.yml b/scripts/config/kafka-publisher-config-dev.yml index 85a66f3..bd5af58 100644 --- a/scripts/config/kafka-publisher-config-dev.yml +++ b/scripts/config/kafka-publisher-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: DEBUG # COLLECTION or GRANULE metadata_type: GRANULE diff --git a/scripts/config/web-publisher-config-dev.yml b/scripts/config/web-publisher-config-dev.yml index 9b08391..387d252 100644 --- a/scripts/config/web-publisher-config-dev.yml +++ b/scripts/config/web-publisher-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # COLLECTION or GRANULE metadata_type: granule diff --git a/scripts/config/web-publisher-config-local.yml b/scripts/config/web-publisher-config-local.yml index 32db955..3ce7d88 100644 --- a/scripts/config/web-publisher-config-local.yml +++ b/scripts/config/web-publisher-config-local.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # COLLECTION or GRANULE metadata_type: granule From aa0b9a9ce25f2d27928c83b9278e0597e1c1172a Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 14 May 2021 09:55:39 -0600 Subject: [PATCH 044/100] 1500-Changed the kafka config in the scripts for collection and granule _topic_produce to _topic_publish, as it is in the constructor for KafkaPublisher and KafkaConsumer. --- scripts/config/kafka-publisher-config-dev.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/config/kafka-publisher-config-dev.yml b/scripts/config/kafka-publisher-config-dev.yml index bd5af58..8a94bf3 100644 --- a/scripts/config/kafka-publisher-config-dev.yml +++ b/scripts/config/kafka-publisher-config-dev.yml @@ -6,8 +6,8 @@ metadata_type: GRANULE # Kafka config values brokers: onestop-dev-cp-kafka:9092 schema_registry: http://onestop-dev-cp-schema-registry:8081 -collection_topic_produce: psi-granules-by-collection -granule_topic_produce: psi-granule-parsed +collection_topic_publish: psi-granules-by-collection +granule_topic_publish: psi-granule-parsed collection_topic_consume: psi-collection-input-unknown granule_topic_consume: psi-granule-input-unknown group_id: sme-test From 3b14757e303159f62ea07cff5c9590c990d9033f Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 13:23:23 -0600 Subject: [PATCH 045/100] 1500-Changed exception message to first be a string then passed into exception. Otherwise wasn't evaluating the variable within message. --- onestop-python-client/test/integration/test_WebPublisher.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/test/integration/test_WebPublisher.py b/onestop-python-client/test/integration/test_WebPublisher.py index 04211dc..5c7935a 100644 --- a/onestop-python-client/test/integration/test_WebPublisher.py +++ b/onestop-python-client/test/integration/test_WebPublisher.py @@ -76,7 +76,8 @@ def setUpClass(cls): access_key = os.environ.get("ACCESS_KEY") access_secret = os.environ.get("SECRET_KEY") if registry_username == None: - raise Exception("REGISTRY_USERNAME not defined as env variable. Credentials file at '%s' doesn't exist." % cred_loc) + msg = "REGISTRY_USERNAME not defined as env variable. Credentials file at '" + cred_loc + "' doesn't exist." + raise Exception(msg) with open(conf_loc) as f: conf = yaml.load(f, Loader=yaml.FullLoader) From ebf71ee681da93ea00aad68d9e229c0d132f738a Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 13:40:41 -0600 Subject: [PATCH 046/100] 1500-Adjusted exception thrown in S3Utils.connect for invalid type, wasn't printing value of variable. Added test for that negative case. --- onestop-python-client/onestop/util/S3Utils.py | 2 +- onestop-python-client/test/unit/util/test_S3Utils.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index cbc8f24..d63e654 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -114,7 +114,7 @@ def connect(self, type, service_name, region): aws_secret_access_key=self.secret_key ) else: - raise Exception('Unknown boto3 type of %s'%type) + raise Exception('Unknown boto3 type of "%s"'%(type)) def objectkey_exists(self, bucket, s3_key): """ diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py index f6bdd91..91b90a3 100644 --- a/onestop-python-client/test/unit/util/test_S3Utils.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -55,6 +55,11 @@ def test_connect_resource(self): # No exception is called for unique method call resource.Queue(url='test') + @mock_sqs + def test_connect_exception_for_invalid_connection_type(self): + with self.assertRaises(Exception): + self.s3_utils.connect('junk', 'sqs', self.region) + @mock_s3 def test_get_uuid_metadata(self): boto_client = self.s3_utils.connect('resource', 's3', None) From 053df0599094d127715449a8031a872aaa9d9049 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 14:22:37 -0600 Subject: [PATCH 047/100] 1500-Fixed log but in SqsConsumer of microseconds process time being multiplied instead of divided to get seconds. --- onestop-python-client/onestop/util/SqsConsumer.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index 39356da..1972cc6 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -98,15 +98,12 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): else: self.logger.info("s3 event message without 'Records' content received.") - sqs_message.delete() - - self.logger.info("The SQS message has been deleted.") - dt_end = datetime.now(tz=timezone.utc) processing_time = dt_end - dt_start + self.logger.info("Completed processing the message in %s seconds."%(processing_time.microseconds / 1000000)) - self.logger.info("Completed processing message (s):" + str(processing_time.microseconds * 1000)) - + sqs_message.delete() + self.logger.info("The SQS message has been deleted.") except: self.logger.exception( "An exception was thrown while processing a message, but this program will continue. The " From 5c66efa79d9ec4ee79a97d178a515a912ee7c896 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 15:11:25 -0600 Subject: [PATCH 048/100] 2500-Added SqsHandlers create_upload_handler back with tests. Didn't realize was used, looked obsolete. --- .../onestop/util/SqsHandlers.py | 57 ++++++++- .../test/unit/test_SqsHandlers.py | 111 +++++++++++++++++- 2 files changed, 165 insertions(+), 3 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index ce0f010..894f8b5 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -11,7 +11,7 @@ def create_delete_handler(web_publisher): """ def delete(records, log_level='INFO'): - logger = ClientLogger.get_logger('SqsHandlers', log_level, False) + logger = ClientLogger.get_logger('SqsHandlers.create_delete_handler.delete', log_level, False) logger.info("In create_delete_handler.delete() handler") logger.debug("Records: %s"%records) @@ -36,9 +36,62 @@ def delete(records, log_level='INFO'): if len(response_json['data']) != 0: granule_uuid = response_json['data'][0]['id'] response = web_publisher.delete_registry('granule', granule_uuid) - print('delete_registry response: %s'%response) + logger.debug('web_publisher.delete_registry response: %s'%response) return response logger.warning("OneStop search response has no 'data' field. Response=%s"%response_json) return delete + +def create_upload_handler(web_publisher, s3_utils, s3_message_adapter): + """ + Creates a upload function handler to be used with SqsConsumer.receive_messages. + + The upload handler function checks the object for a UUID and if one is not found, it will create one for it. + + :param: web_publisher: WebPublisher object + :param: s3_utils: S3Utils object + :param: s3ma: S3MessageAdapter object + + """ + def upload(records, log_level='INFO'): + logger = ClientLogger.get_logger('SqsHandlers.create_upload_handler.upload', log_level, False) + logger.info("In create_upload_handler.upload() handler") + logger.debug("Records: %s"%records) + + rec = records[0] + s3_key = rec['s3']['object']['key'] + logger.info("Received message for " + s3_key) + logger.info("Event type: " + rec['eventName']) + bucket = rec['s3']['bucket']['name'] + logger.info("BUCKET: %s"%bucket) + s3_resource = s3_utils.connect("s3_resource", None) + + # Fetch the object to get the uuid + object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) + if object_uuid is not None: + logger.info("Retrieved object-uuid: %s"%object_uuid) + else: + logger.info("Adding uuid") + # Can't add uuid to glacier and should be copied over + if "backup" not in bucket: + object_uuid = s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) + + # Convert s3 message to IM message + json_payload = s3_message_adapter.transform(records) + logger.debug('transformed message, json_payload: %s'%json_payload) + + # Send the message to registry + payload = json_payload.serialize() + method = 'PATCH' # Backup location should be patched if not backup within bucket name + if "backup" not in bucket: + method = 'POST' + + logger.debug('web_publisher.publish_registry method using "%s" with payload %s'%(method,payload)) + registry_response = web_publisher.publish_registry("granule", object_uuid, payload, method) + logger.debug('web_publisher.publish_registry response=%s'%registry_response) + logger.debug('web_publisher.publish_registry response json=%s'%registry_response.json()) + + return registry_response + + return upload \ No newline at end of file diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py index b881fc9..c17b972 100644 --- a/onestop-python-client/test/unit/test_SqsHandlers.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -10,6 +10,7 @@ from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.util.SqsConsumer import SqsConsumer from onestop.util.SqsHandlers import create_delete_handler +from onestop.util.SqsHandlers import create_upload_handler class test_SqsHandler(unittest.TestCase): @@ -32,7 +33,7 @@ def setUp(self): self.wp = WebPublisher(**self.config_dict) self.s3_utils = S3Utils(**self.config_dict) - self.s3ma = S3MessageAdapter(**self.config_dict) + self.s3_message_adapter = S3MessageAdapter(**self.config_dict) self.sqs_consumer = SqsConsumer(**self.config_dict) self.sqs_max_polls = 3 @@ -215,5 +216,113 @@ def test_delete_handler_eventName_not_delete_ends_cb(self, mock_wp, mock_respons mock_wp.search_onestop.assert_not_called() mock_wp.delete_registry.assert_not_called() + @mock_sqs + @patch('onestop.WebPublisher') + @patch('onestop.util.S3Utils') + @patch('onestop.util.S3MessageAdapter') + def test_upload_handler_happy(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): + bucket = self.bucket + key = self.key + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, bucket, key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + records = json.loads(message['Message'])['Records'] + records_transformed = mock_s3_msg_adapter.transform(records) + cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify get uuid called + mock_s3_utils.get_uuid_metadata.assert_called_with( + mock_s3_utils.connect('s3_resource', None), + bucket, + key) + # Verify uuid not added + mock_s3_utils.add_uuid_metadata.assert_not_called() + # Verify transform called + mock_s3_msg_adapter.transform.assert_called_with(records) + # Verify publish called + mock_wp.publish_registry.assert_called_with( + 'granule', + mock_s3_utils.get_uuid_metadata(mock_s3_utils.connect('s3_resource', None), bucket, key), + records_transformed.serialize(), + 'POST' + ) + + @mock_sqs + @patch('onestop.WebPublisher') + @patch('onestop.util.S3Utils') + @patch('onestop.util.S3MessageAdapter') + def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): + bucket = self.bucket + key = self.key + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, bucket, key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_s3_utils.get_uuid_metadata.return_value = None + cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify add uuid called + mock_s3_utils.add_uuid_metadata.assert_called_with( + mock_s3_utils.connect('s3_resource', None), + bucket, + key) + + @mock_sqs + @patch('onestop.WebPublisher') + @patch('onestop.util.S3Utils') + @patch('onestop.util.S3MessageAdapter') + def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): + bucket = "testing_backup_bucket" + key = self.key + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, bucket, key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_s3_utils.get_uuid_metadata.return_value = None + records = json.loads(message['Message'])['Records'] + records_transformed = mock_s3_msg_adapter.transform(records) + cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify publish called + mock_wp.publish_registry.assert_called_with( + 'granule', + mock_s3_utils.get_uuid_metadata(mock_s3_utils.connect('s3_resource', None), bucket, key), + records_transformed.serialize(), + 'PATCH' + ) + if __name__ == '__main__': unittest.main() \ No newline at end of file From d4b2013c3f84125e0941be6605cdfb03c95944d2 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 20:49:13 -0600 Subject: [PATCH 049/100] 1500-Changed references to psi_registry_url to registry_base_url --- kubernetes/pyconsumer-pod.yaml | 2 +- scripts/config/csb-data-stream-config.yml | 2 +- serverless/conf.py | 2 +- serverless/lambda_function.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kubernetes/pyconsumer-pod.yaml b/kubernetes/pyconsumer-pod.yaml index fed2258..6943403 100644 --- a/kubernetes/pyconsumer-pod.yaml +++ b/kubernetes/pyconsumer-pod.yaml @@ -72,7 +72,7 @@ data: headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 - psi_registry_url: https://cedardevs.org/ + registry_base_url: https://cedardevs.org/ access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_identifier_prefix: "gov.noaa.ncei.csb:" diff --git a/scripts/config/csb-data-stream-config.yml b/scripts/config/csb-data-stream-config.yml index 06a45b6..2d25328 100644 --- a/scripts/config/csb-data-stream-config.yml +++ b/scripts/config/csb-data-stream-config.yml @@ -2,7 +2,7 @@ format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 -psi_registry_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com +registry_base_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_id_prefix: "gov.noaa.ncei.csb:" diff --git a/serverless/conf.py b/serverless/conf.py index b41eb0b..26ef3cd 100644 --- a/serverless/conf.py +++ b/serverless/conf.py @@ -3,6 +3,6 @@ HEADERS = 'UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER' TYPE = 'COLLECTION' COLLECTION_ID = 'fdb56230-87f4-49f2-ab83-104cfd073177' -PSI_REGISTRY_URL = 'http://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com' +REGISTRY_BASE_URL = 'http://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com' ACCESS_BUCKET = 'https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com' FILE_IDENTIFIER_PREFIX = 'gov.noaa.ncei.csb:' diff --git a/serverless/lambda_function.py b/serverless/lambda_function.py index abe8fb7..3b6cd97 100644 --- a/serverless/lambda_function.py +++ b/serverless/lambda_function.py @@ -9,7 +9,7 @@ def lambda_handler(event, context): - registry_url = conf.PSI_REGISTRY_URL + "/metadata/granule" + registry_url = conf.REGISTRY_BASE_URL + "/metadata/granule" for rec in event['Records']: From a0711f2276e6854dad6a6502db3619f677631d74 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 20 May 2021 15:34:35 -0600 Subject: [PATCH 050/100] 1507-Added sqs_name to helm values. --- helm/onestop-sqs-consumer/values.yaml | 1 + helm/sme-chart/values.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index 20557a0..afbc414 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -58,6 +58,7 @@ config: |- # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs + sqs_name: 'test-queue' sqs_max_polls: 100 s3_region: us-east-2 s3_bucket: archive-testing-demo diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index 924f62f..0c68925 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -15,6 +15,7 @@ config: |- # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs + sqs_name: 'test-queue' sqs_max_polls: 100 s3_region: us-east-2 s3_bucket: archive-testing-demo From 77190e87d182dce09046cf5caf28f1553d072226 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 21 May 2021 15:53:07 -0600 Subject: [PATCH 051/100] 1507-fixed bug in S3Utils upload_s3 passing in wrong parameter type to --- onestop-python-client/onestop/util/S3Utils.py | 2 +- .../test/unit/util/test_S3Utils.py | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index d63e654..24a81c3 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -220,7 +220,7 @@ def upload_s3(self, boto_client, local_file, bucket, s3_key, overwrite): obj_uuid = str(uuid.uuid4()) if not overwrite: - key_exists = self.objectkey_exists(boto_client, bucket, s3_key) + key_exists = self.objectkey_exists(bucket, s3_key) if (not key_exists) or (key_exists and overwrite): try: diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py index 91b90a3..6508837 100644 --- a/onestop-python-client/test/unit/util/test_S3Utils.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -85,15 +85,24 @@ def test_add_uuid_metadata(self): self.assertTrue(self.s3_utils.add_uuid_metadata(boto_client, self.bucket, s3_key)) @mock_s3 - def test_add_file_s3(self): + def test_add_file_s3_overwrite(self): boto_client = self.s3_utils.connect('client', 's3', None) local_file = abspath_from_relative(__file__, "../../data/file4.csv") s3_key = "csv/file4.csv" location = {'LocationConstraint': self.region} boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) - overwrite = True - self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_key, overwrite)) + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_key, True)) + + @mock_s3 + def test_add_file_s3_nooverwrite(self): + boto_client = self.s3_utils.connect('client', 's3', None) + local_file = abspath_from_relative(__file__, "../../data/file4.csv") + s3_key = "csv/file4.csv" + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_key, False)) @mock_s3 def test_get_csv_s3(self): @@ -126,12 +135,11 @@ def test_add_files(self): local_files = ["file1_s3.csv", "file2.csv", "file3.csv"] location = {'LocationConstraint': self.region} boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) - overwrite = True for file in local_files: local_file = abspath_from_relative(__file__, "../../data/" + file) s3_file = "csv/" + file - self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_file, overwrite)) + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_file, True)) @mock_s3 @mock_glacier From c073a2f0419ceb768b7d9db43a287de637629ff9 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 28 May 2021 13:09:22 -0600 Subject: [PATCH 052/100] 1507-Updated circleCI config to run/publish docker image of onestop-python-client based on branch name. Removed unnecessary comments. --- .circleci/config.yml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d475399..9df7485 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -55,7 +55,6 @@ jobs: # - slack/status: # fail_only: false -# cli build cli-build: executor: docker/docker steps: @@ -75,8 +74,7 @@ jobs: - slack/status: fail_only: false -# clients build - client-build: + python-client-build: executor: docker/docker steps: - setup_remote_docker @@ -89,14 +87,11 @@ jobs: - run: name: "What branch am I on now?" command: echo $CIRCLE_BRANCH -#no need to push this image yet - docker/push: image: cedardevs/onestop-python-client tag: ${CIRCLE_BRANCH}-SNAPSHOT - slack/status: fail_only: false - # Base test configuration for Go library tests Each distinct version should - # inherit this base, and override (at least) the container image used. python-client-test: &python-client-test executor: python/default @@ -127,9 +122,8 @@ version: 2.1 workflows: main: jobs: -# - "latest" # - cli-test # - cli-build -# - client-build + - python-client-build - python-client-test From 5d3344c33a8269864d68fb1dfe0dfda343368ea5 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 28 May 2021 14:09:11 -0600 Subject: [PATCH 053/100] 1507-Copied root Dockerfile to onestop-python-client to get docker publishing images. --- onestop-python-client/Dockerfile | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 onestop-python-client/Dockerfile diff --git a/onestop-python-client/Dockerfile b/onestop-python-client/Dockerfile new file mode 100644 index 0000000..e5ec186 --- /dev/null +++ b/onestop-python-client/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.8 +COPY ./onestop-python-client /onestop-python-client +COPY ./scripts /scripts +RUN apt-get update +RUN pip install --upgrade pip +RUN pip install ./onestop-python-client +RUN pip install -r ./onestop-python-client/requirements.txt + +#Base image stays up for dev access +CMD tail -f /dev/null From 46f7fff4b17b4c88f71c54e14faf93181f00c573 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 28 May 2021 14:26:51 -0600 Subject: [PATCH 054/100] 1507-Removed from Dockerfile copy step of module in, had trouble doing to and from examples hoping unnecessary. --- onestop-python-client/Dockerfile | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/onestop-python-client/Dockerfile b/onestop-python-client/Dockerfile index e5ec186..28bdc61 100644 --- a/onestop-python-client/Dockerfile +++ b/onestop-python-client/Dockerfile @@ -1,10 +1,4 @@ FROM python:3.8 -COPY ./onestop-python-client /onestop-python-client -COPY ./scripts /scripts RUN apt-get update RUN pip install --upgrade pip -RUN pip install ./onestop-python-client -RUN pip install -r ./onestop-python-client/requirements.txt - -#Base image stays up for dev access -CMD tail -f /dev/null +RUN pip install -r requirements.txt From 0166d4fa2a787f55d97e88cf3fae9ad45f13a2be Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 28 May 2021 16:31:03 -0600 Subject: [PATCH 055/100] 1507-Changed onestop-python-client Dockerfile to copy onestop-python-client code to working directory then install requirements.txt. --- onestop-python-client/Dockerfile | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/Dockerfile b/onestop-python-client/Dockerfile index 28bdc61..4da5281 100644 --- a/onestop-python-client/Dockerfile +++ b/onestop-python-client/Dockerfile @@ -1,4 +1,12 @@ FROM python:3.8 + +WORKDIR /app + +# Copy requirements.txt into workspace and execute it, so installed in workspace. +COPY requirements.txt requirements.txt +RUN pip3 install -r requirements.txt RUN apt-get update -RUN pip install --upgrade pip -RUN pip install -r requirements.txt +RUN pip3 install --upgrade pip + +# Copy source code into workspace +COPY . . \ No newline at end of file From f3088da5d7fb37191eb0683e47d16b13dadef519 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 28 May 2021 16:49:51 -0600 Subject: [PATCH 056/100] 1507-Add to circleCI building and publishing images of onestop-s3-handler and onestop-sme onestop-python-client --- .circleci/config.yml | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9df7485..47b7dba 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -74,7 +74,39 @@ jobs: - slack/status: fail_only: false - python-client-build: + onestop-s3-handler-build: + executor: docker/docker + steps: + - setup_remote_docker + - checkout + - docker/check + - docker/build: + path: scripts/sqs-to-registry + image: cedardevs/onestop-s3-handler + tag: ${CIRCLE_BRANCH}-SNAPSHOT + - docker/push: + image: cedardevs/onestop-s3-handler + tag: ${CIRCLE_BRANCH}-SNAPSHOT + - slack/status: + fail_only: false + + onestop-sme-build: + executor: docker/docker + steps: + - setup_remote_docker + - checkout + - docker/check + - docker/build: + path: onestop-python-client + image: cedardevs/onestop-sme + tag: ${CIRCLE_BRANCH}-SNAPSHOT + - docker/push: + image: cedardevs/onestop-sme + tag: ${CIRCLE_BRANCH}-SNAPSHOT + - slack/status: + fail_only: false + + onestop-python-client-build: executor: docker/docker steps: - setup_remote_docker @@ -84,9 +116,6 @@ jobs: path: onestop-python-client image: cedardevs/onestop-python-client tag: ${CIRCLE_BRANCH}-SNAPSHOT - - run: - name: "What branch am I on now?" - command: echo $CIRCLE_BRANCH - docker/push: image: cedardevs/onestop-python-client tag: ${CIRCLE_BRANCH}-SNAPSHOT From eaf145422a24609220d0ef973e79c705d21e3a01 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 28 May 2021 16:51:55 -0600 Subject: [PATCH 057/100] 1507-Fixed booboo in circleCI of renaming build and not changing name in the jobs section and added the sme and s3 builds to jobs. --- .circleci/config.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 47b7dba..27ffef8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -122,7 +122,7 @@ jobs: - slack/status: fail_only: false - python-client-test: &python-client-test + onestop-python-client-test: &python-client-test executor: python/default steps: &steps - checkout @@ -153,6 +153,8 @@ workflows: jobs: # - cli-test # - cli-build - - python-client-build - - python-client-test + - onestop-sme-build + - onestop-s3-handler-build + - onestop-python-client-build + - onestop-python-client-test From ed3a93e6d502d8350c14fa8ba4471af8d4ba5a31 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 1 Jun 2021 13:26:11 -0600 Subject: [PATCH 058/100] 1507-Changed circleCI image tag to 'latest'. Need to revisit. --- .circleci/config.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 27ffef8..2fdc0b0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -83,10 +83,10 @@ jobs: - docker/build: path: scripts/sqs-to-registry image: cedardevs/onestop-s3-handler - tag: ${CIRCLE_BRANCH}-SNAPSHOT + tag: latest - docker/push: image: cedardevs/onestop-s3-handler - tag: ${CIRCLE_BRANCH}-SNAPSHOT + tag: latest - slack/status: fail_only: false @@ -99,10 +99,10 @@ jobs: - docker/build: path: onestop-python-client image: cedardevs/onestop-sme - tag: ${CIRCLE_BRANCH}-SNAPSHOT + tag: latest - docker/push: image: cedardevs/onestop-sme - tag: ${CIRCLE_BRANCH}-SNAPSHOT + tag: latest - slack/status: fail_only: false @@ -115,10 +115,10 @@ jobs: - docker/build: path: onestop-python-client image: cedardevs/onestop-python-client - tag: ${CIRCLE_BRANCH}-SNAPSHOT + tag: latest - docker/push: image: cedardevs/onestop-python-client - tag: ${CIRCLE_BRANCH}-SNAPSHOT + tag: latest - slack/status: fail_only: false From 095d5fafe01764061211cd8f4a3ac7b786e61699 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 2 Jun 2021 10:26:09 -0600 Subject: [PATCH 059/100] 1507-Changed dockerfiles back (had changed it due to so problems with circleci) and changed circleci config. --- .circleci/config.yml | 2 +- Dockerfile | 10 ++++++---- onestop-python-client/Dockerfile | 12 ------------ scripts/sme/Dockerfile | 4 +++- scripts/sqs-to-registry/Dockerfile | 4 +--- 5 files changed, 11 insertions(+), 21 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2fdc0b0..1f308ea 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -97,7 +97,7 @@ jobs: - checkout - docker/check - docker/build: - path: onestop-python-client + path: scripts/sme/ image: cedardevs/onestop-sme tag: latest - docker/push: diff --git a/Dockerfile b/Dockerfile index e5ec186..21df663 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,12 @@ FROM python:3.8 -COPY ./onestop-python-client /onestop-python-client -COPY ./scripts /scripts + +COPY onestop-python-client /onestop-python-client +COPY scripts /scripts + RUN apt-get update RUN pip install --upgrade pip -RUN pip install ./onestop-python-client -RUN pip install -r ./onestop-python-client/requirements.txt +RUN pip install /onestop-python-client +RUN pip install -r /onestop-python-client/requirements.txt #Base image stays up for dev access CMD tail -f /dev/null diff --git a/onestop-python-client/Dockerfile b/onestop-python-client/Dockerfile index 4da5281..e69de29 100644 --- a/onestop-python-client/Dockerfile +++ b/onestop-python-client/Dockerfile @@ -1,12 +0,0 @@ -FROM python:3.8 - -WORKDIR /app - -# Copy requirements.txt into workspace and execute it, so installed in workspace. -COPY requirements.txt requirements.txt -RUN pip3 install -r requirements.txt -RUN apt-get update -RUN pip3 install --upgrade pip - -# Copy source code into workspace -COPY . . \ No newline at end of file diff --git a/scripts/sme/Dockerfile b/scripts/sme/Dockerfile index d4b48fa..c91b8f4 100644 --- a/scripts/sme/Dockerfile +++ b/scripts/sme/Dockerfile @@ -1,6 +1,8 @@ FROM cedardevs/onestop-python-client:latest -COPY . . + RUN pip install argparse RUN pip install psycopg2 +RUN pip install ./onestop-python-client + #ENTRYPOINT [ "python" ,"scripts/sme/sme.py", "-cmd consume", "-b localhost:9092", "-s http://localhost:8081", "-t psi-collection-extractor-to" , "-g sme-test", "-o earliest" ] CMD tail -f /dev/null diff --git a/scripts/sqs-to-registry/Dockerfile b/scripts/sqs-to-registry/Dockerfile index 9db0598..4f59b4e 100644 --- a/scripts/sqs-to-registry/Dockerfile +++ b/scripts/sqs-to-registry/Dockerfile @@ -1,10 +1,8 @@ FROM cedardevs/onestop-python-client:latest -COPY . . + #required by the sme script, not our library RUN pip install argparse -#I should not have to do this, since it is done in the base image -#RUN pip install -r ./onestop-python-client/requirements.txt ENTRYPOINT [ "python" ] CMD [ "s3_notification_handler.py" ] #CMD tail -f /dev/null \ No newline at end of file From 9d4eaba6c91082d3c3008046ea0e16dad76c749f Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 2 Jun 2021 12:35:26 -0600 Subject: [PATCH 060/100] 1507-Removed onestop-python-client dockerfile, unecessary. Therefore changed circleci path for onestop-python-client build to indicate use project root as path. Changed paths in root dockerfile back to ./ instead of / so clearer. --- .circleci/config.yml | 2 +- Dockerfile | 8 ++++---- onestop-python-client/Dockerfile | 0 3 files changed, 5 insertions(+), 5 deletions(-) delete mode 100644 onestop-python-client/Dockerfile diff --git a/.circleci/config.yml b/.circleci/config.yml index 1f308ea..c54ed47 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -113,7 +113,7 @@ jobs: - checkout - docker/check - docker/build: - path: onestop-python-client + path: ./ image: cedardevs/onestop-python-client tag: latest - docker/push: diff --git a/Dockerfile b/Dockerfile index 21df663..d73fa34 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,12 @@ FROM python:3.8 -COPY onestop-python-client /onestop-python-client -COPY scripts /scripts +COPY ./onestop-python-client /onestop-python-client +COPY ./scripts /scripts RUN apt-get update RUN pip install --upgrade pip -RUN pip install /onestop-python-client -RUN pip install -r /onestop-python-client/requirements.txt +RUN pip install ./onestop-python-client +RUN pip install -r ./onestop-python-client/requirements.txt #Base image stays up for dev access CMD tail -f /dev/null diff --git a/onestop-python-client/Dockerfile b/onestop-python-client/Dockerfile deleted file mode 100644 index e69de29..0000000 From bca1ce3bc1a8f409fc2c610fbdd1e6205d1e36da Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 2 Jun 2021 12:38:47 -0600 Subject: [PATCH 061/100] 1507-added comment to sqs-to-registry dockerfile about how things get copied over. --- scripts/sqs-to-registry/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/sqs-to-registry/Dockerfile b/scripts/sqs-to-registry/Dockerfile index 4f59b4e..985421d 100644 --- a/scripts/sqs-to-registry/Dockerfile +++ b/scripts/sqs-to-registry/Dockerfile @@ -1,3 +1,4 @@ +# Expect this to copy the scripts directory over and install onestop-python-client. FROM cedardevs/onestop-python-client:latest #required by the sme script, not our library From 4c62de8506add5e5563abd3773f0b904a75978f3 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 2 Jun 2021 12:44:00 -0600 Subject: [PATCH 062/100] 1507-Changed scripts/sme dockerfile to not install onestop-python-client, path is one up which is out of context for this dockerfile (hint root dockerfile does that step). Added comment about in root dockerfile. --- Dockerfile | 5 ++++- scripts/sme/Dockerfile | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index d73fa34..a906511 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,8 +5,11 @@ COPY ./scripts /scripts RUN apt-get update RUN pip install --upgrade pip -RUN pip install ./onestop-python-client RUN pip install -r ./onestop-python-client/requirements.txt +# Needed for scripts - do here since directory out of scope when in scripts/* dockerfiles. +# Unsure if possible this isn't latest build, like doing pip install before this is built. +RUN pip install ./onestop-python-client + #Base image stays up for dev access CMD tail -f /dev/null diff --git a/scripts/sme/Dockerfile b/scripts/sme/Dockerfile index c91b8f4..19051c3 100644 --- a/scripts/sme/Dockerfile +++ b/scripts/sme/Dockerfile @@ -1,8 +1,8 @@ +# Expect this to copy the scripts directory over and install onestop-python-client. FROM cedardevs/onestop-python-client:latest +# Install additional python libraries needed by scripts RUN pip install argparse RUN pip install psycopg2 -RUN pip install ./onestop-python-client -#ENTRYPOINT [ "python" ,"scripts/sme/sme.py", "-cmd consume", "-b localhost:9092", "-s http://localhost:8081", "-t psi-collection-extractor-to" , "-g sme-test", "-o earliest" ] CMD tail -f /dev/null From 34ed5975624d44357b174afa75f1c55e58158065 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 2 Jun 2021 21:08:15 -0600 Subject: [PATCH 063/100] 1507-In helm values files changed image pullPolicy to Always because from what can tell it only pulls if the image name changes, such as if you have a version number. We don't do this ATM. --- helm/onestop-sqs-consumer/values.yaml | 2 +- helm/sme-chart/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index afbc414..351cbfa 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -7,7 +7,7 @@ replicaCount: 1 image: repository: cedardevs/onestop-sme tag: latest - pullPolicy: IfNotPresent + pullPolicy: Always imagePullSecrets: [] nameOverride: "" diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index 0c68925..eb19445 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -1,7 +1,7 @@ image: repository: cedardevs/onestop-e2e-demo tag: latest - pullPolicy: IfNotPresent + pullPolicy: Always secret: registry_username: From d09ad1c7980f33382f5de5da5ca9ce5fa4deb324 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 3 Jun 2021 21:47:32 -0600 Subject: [PATCH 064/100] 1507-To helm/*/values.yaml removed unused sqs_url, added cert locations w comment and set security to false (cedar-devs is false), adjusted s3_bucket2 to a real bucket, fixed bug of granule type needing to be GRANULE, and added kafka information. --- helm/onestop-sqs-consumer/values.yaml | 31 +++++++++++++++++++-------- helm/sme-chart/values.yaml | 31 +++++++++++++++++++++------ 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index 351cbfa..3af3396 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -57,16 +57,15 @@ config: |- log_level: INFO # AWS config values - sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs - sqs_name: 'test-queue' - sqs_max_polls: 100 + sqs_name: cloud-archive-client-sqs s3_region: us-east-2 s3_bucket: archive-testing-demo + sqs_max_polls: 100 #AWS config values for 2nd vault in different region vault_name: archive-vault-new s3_region2: us-east-2 - s3_bucket2: noaa-nccf-dev-archive + s3_bucket2: archive-testing-testing-test #CSB stream config format: csv @@ -78,15 +77,29 @@ config: |- file_identifier_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE - metadata_type: granule + metadata_type: GRANULE registry_base_url: http://onestop-registry:80 onestop_base_url: http://onestop-search:8080 - security: - enabled: True - prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' NESDIS/H8: '0fad03df-0805-434a-86a6-7dc42d68480e' NESDIS/GOES: '11111111-1111-1111-1111-111111111111' - NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6' \ No newline at end of file + NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6' + + # Kafka config values + brokers: onestop-dev-cp-kafka:9092 + schema_registry: http://onestop-dev-cp-schema-registry:8081 + collection_topic_publish: psi-granules-by-collection + granule_topic_publish: psi-granule-parsed + collection_topic_consume: psi-collection-input-unknown + granule_topic_consume: psi-granule-input-unknown + group_id: sme-test + auto_offset_reset: earliest + security: + # True/False + enabled: False + # If security is enabled then need these: + caLoc: /etc/pki/tls/cert.pem + keyLoc: /etc/pki/tls/private/kafka-user.key + certLoc: /etc/pki/tls/certs/kafka-user.crt \ No newline at end of file diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index eb19445..795a388 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -14,16 +14,15 @@ config: |- log_level: INFO # AWS config values - sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs - sqs_name: 'test-queue' - sqs_max_polls: 100 + sqs_name: cloud-archive-client-sqs s3_region: us-east-2 s3_bucket: archive-testing-demo + sqs_max_polls: 100 #AWS config values for 2nd vault in different region vault_name: archive-vault-new s3_region2: us-east-2 - s3_bucket2: noaa-nccf-dev-archive + s3_bucket2: archive-testing-testing-test #CSB stream config format: csv @@ -35,9 +34,29 @@ config: |- file_identifier_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE - metadata_type: granule + metadata_type: GRANULE registry_base_url: http://onestop-registry:80 onestop_base_url: http://onestop-search:8080 + prefixMap: + NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' + NESDIS/H8: '0fad03df-0805-434a-86a6-7dc42d68480e' + NESDIS/GOES: '11111111-1111-1111-1111-111111111111' + NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6' + + # Kafka config values + brokers: onestop-dev-cp-kafka:9092 + schema_registry: http://onestop-dev-cp-schema-registry:8081 + collection_topic_publish: psi-granules-by-collection + granule_topic_publish: psi-granule-parsed + collection_topic_consume: psi-collection-input-unknown + granule_topic_consume: psi-granule-input-unknown + group_id: sme-test + auto_offset_reset: earliest security: - enabled: True \ No newline at end of file + # True/False + enabled: False + # If security is enabled then need these: + caLoc: /etc/pki/tls/cert.pem + keyLoc: /etc/pki/tls/private/kafka-user.key + certLoc: /etc/pki/tls/certs/kafka-user.crt \ No newline at end of file From 37a65a299ef89615725f79dfd9e0667e83e927fa Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 3 Jun 2021 22:13:22 -0600 Subject: [PATCH 065/100] 1507-Set metadata_type to uppercase and adjusted the ValueError raised if it metadata_type wasn't GRANULE or COLLECTION. --- onestop-python-client/onestop/KafkaPublisher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index 047783c..2be275e 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -77,7 +77,7 @@ def __init__(self, metadata_type, brokers, schema_registry, security, collection granule_topic: str granule topic you want to produce to """ - self.metadata_type = metadata_type + self.metadata_type = metadata_type.upper() self.brokers = brokers self.schema_registry = schema_registry self.security_enabled = security['enabled'] @@ -91,7 +91,7 @@ def __init__(self, metadata_type, brokers, schema_registry, security, collection self.granule_topic = granule_topic_publish if self.metadata_type not in ['COLLECTION', 'GRANULE']: - raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") + raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type)) self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) From c3f37611ce8bbfd29aac3a3dc3d2546af5e544ad Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 3 Jun 2021 22:13:37 -0600 Subject: [PATCH 066/100] 1507-Set metadata_type to uppercase and adjusted the ValueError raised if it metadata_type wasn't GRANULE or COLLECTION. Changed try/raise in consume method to try/finally with same closing of the consumer as before. This way exceptions do fail script, because before was unclear why it wasn't working. --- onestop-python-client/onestop/KafkaConsumer.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 747b0e4..0481af9 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -79,7 +79,7 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r What log level to use for this class """ - self.metadata_type = metadata_type + self.metadata_type = metadata_type.upper() self.brokers = brokers self.group_id = group_id self.auto_offset_reset = auto_offset_reset @@ -95,7 +95,7 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r self.granule_topic = granule_topic_consume if self.metadata_type not in ['COLLECTION', 'GRANULE']: - raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") + raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type)) self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) @@ -153,7 +153,8 @@ def create_consumer(self, registry_client): metadata_schema = latest_schema.schema.schema_str self.logger.debug("metadata_schema: "+metadata_schema) - metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) + + metadata_deserializer = AvroDeserializer(schema_str=metadata_schema, schema_registry_client=registry_client) conf = { 'bootstrap.servers': self.brokers, 'key.deserializer': StringDeserializer('utf-8'), @@ -199,12 +200,7 @@ def consume(self, metadata_consumer, handler): key = msg.key() value = msg.value() - except KafkaError: - raise - try: handler(key, value) - except Exception as e: - self.logger.error("Message handler failed: {}".format(e)) - break - self.logger.debug("Closing metadata_consumer") - metadata_consumer.close() + finally: + self.logger.debug("Closing metadata_consumer") + metadata_consumer.close() From 2cfdcf3a03e21370fe529401c08520176c1da4be Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 3 Jun 2021 22:14:57 -0600 Subject: [PATCH 067/100] 1507-Added additional log statement in delete handler if record eventName was a Delete type. --- onestop-python-client/onestop/util/SqsHandlers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index 894f8b5..fa3503a 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -24,6 +24,8 @@ def delete(records, log_level='INFO'): logger.info("Ending handler, eventName=%s"%record['eventName']) return + logger.info('Attempting to delete record %s'%record) + bucket = record['s3']['bucket']['name'] s3_key = record['s3']['object']['key'] s3_url = "s3://" + bucket + "/" + s3_key From be034ea6585216b4dc1ad30e33d9271c00b8c966 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 3 Jun 2021 22:23:15 -0600 Subject: [PATCH 068/100] 1507-Updated KafkaConsumer unit test to reflect change neglected to mention in last commit that the AvroDeserializer constructor needed more specific parameter names (think it got updated and our order of params was wrong). --- onestop-python-client/test/unit/test_KafkaConsumer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/test/unit/test_KafkaConsumer.py b/onestop-python-client/test/unit/test_KafkaConsumer.py index b119e9a..5404c31 100644 --- a/onestop-python-client/test/unit/test_KafkaConsumer.py +++ b/onestop-python-client/test/unit/test_KafkaConsumer.py @@ -126,7 +126,7 @@ def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consume deser_consumer = consumer.create_consumer(reg_client) # Verify AvroDeserializer called with expected registry client - mock_avro_deserializer.assert_called_with(ANY, reg_client) + mock_avro_deserializer.assert_called_with(schema_str=ANY, schema_registry_client=reg_client) self.assertIsNotNone(deser_consumer) From 528fa83ad858a423e24521bf707897926ef3870f Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 3 Jun 2021 22:25:51 -0600 Subject: [PATCH 069/100] 1507-Updated scripts to work with new class constructors that take a dict instead of config location. Removed mocking for launch_delete_handler script since have unit tests that cover this now. --- scripts/launch_delete_handler.py | 106 +++++++++++++------------------ 1 file changed, 43 insertions(+), 63 deletions(-) diff --git a/scripts/launch_delete_handler.py b/scripts/launch_delete_handler.py index 7bb3983..6d000d4 100644 --- a/scripts/launch_delete_handler.py +++ b/scripts/launch_delete_handler.py @@ -1,79 +1,59 @@ -import json -import boto3 import argparse -from moto import mock_s3 -from moto import mock_sqs -from tests.utils import create_delete_message +import os +import yaml + from onestop.WebPublisher import WebPublisher from onestop.util.S3Utils import S3Utils from onestop.util.SqsConsumer import SqsConsumer from onestop.util.SqsHandlers import create_delete_handler - -def mock_init_s3(s3u): - """ Sets up bucket, object, SQS queue, and delete message. - - Assumes there are additional keys passed in via config - - :param s3u: S3Utils object - :return: URL of the mock queue created in SQS - """ - boto_client = s3u.connect("s3", None) - bucket = s3u.conf['s3_bucket'] - region = s3u.conf['s3_region'] - key = s3u.conf['s3_key'] - boto_client.create_bucket(Bucket=bucket) - boto_client.put_object(Bucket=bucket, Key=key, Body="foobar") - - sqs_client = boto3.client('sqs', region_name=region) - sqs_queue = sqs_client.create_queue(QueueName=s3u.conf['sqs_name']) - message = create_delete_message(region, bucket, key) - sqs_client.send_message(QueueUrl=sqs_queue['QueueUrl'], MessageBody=json.dumps(message)) - return sqs_queue['QueueUrl'] - +config_dict = {} if __name__ == '__main__': # All command-line arguments have defaults that use test data, with AWS mocking set to true parser = argparse.ArgumentParser(description="Launches SQS delete test") - parser.add_argument('--aws-conf', dest="aws_conf", required=False, default="config/aws-util-config-test.yml", + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="AWS config filepath") - parser.add_argument('--osim-conf', dest="osim_conf", required=False, default="config/web-publisher-config-local.yml", - help="OSIM config filepath") - parser.add_argument('-mock', dest="mock", required=False, default=True, help="Use mock AWS or real values") - - parser.add_argument('-cred', dest="cred", required=False, default="config/credentials-template.yml", + parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") args = vars(parser.parse_args()) - wp_config = args.pop('osim_conf') - aws_config = args.pop('aws_conf') - cred_config = args.pop('cred') - use_mocks = args.pop('mock') - - web_publisher = WebPublisher(wp_config, cred_config) - s3_utils = S3Utils(aws_config, cred_config) - sqs_consumer = SqsConsumer(aws_config, cred_config) - - if use_mocks is True: - mock_1 = mock_s3() - mock_2 = mock_sqs() - mock_1.start() - mock_2.start() - mock_queue_url = mock_init_s3(s3_utils) - # Need to override the config value here so that sqs_consumer.connect will use the correct url for the queue - sqs_consumer.conf['sqs_url'] = mock_queue_url - - sqs_max_polls = s3_utils.conf['sqs_max_polls'] + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + + web_publisher = WebPublisher(**config_dict) + s3_utils = S3Utils(**config_dict) + sqs_consumer = SqsConsumer(**config_dict) + + sqs_max_polls = config_dict['sqs_max_polls'] delete_handler = create_delete_handler(web_publisher) + s3_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region']) + queue = sqs_consumer.connect(s3_resource, config_dict['sqs_name']) - queue = sqs_consumer.connect() - try: - sqs_consumer.receive_messages(queue, sqs_max_polls, delete_handler) - if use_mocks is True: - mock_1.stop() - mock_2.stop() - except Exception as e: - print("Message queue consumption failed: {}".format(e)) - if use_mocks is True: - mock_1.stop() - mock_2.stop() + sqs_consumer.receive_messages(queue, sqs_max_polls, delete_handler) From d43d0673db226a5b57713b9fd052e98c855b82e7 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 4 Jun 2021 15:55:17 -0600 Subject: [PATCH 070/100] 1507-Updated AvroSerializer call to specify parameter name, since order of params seem to have changed. --- onestop-python-client/onestop/KafkaPublisher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index 2be275e..a95081d 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -146,7 +146,7 @@ def create_producer(self, registry_client): topic = self.granule_topic metadata_schema = registry_client.get_latest_version(topic + '-value').schema.schema_str - metadata_serializer = AvroSerializer(metadata_schema, registry_client) + metadata_serializer = AvroSerializer(schema_str=metadata_schema, schema_registry_client=registry_client) conf = {'bootstrap.servers': self.brokers} if self.security_enabled: From 27e7d1be5a2025f88f8b4d0c45a734516edfb77e Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 4 Jun 2021 15:57:44 -0600 Subject: [PATCH 071/100] 1507-Updated AvroSerializer call to specify parameter name, since order of params seem to have changed. (forgot intellij's multi select for commits doesn't work) --- onestop-python-client/test/unit/test_KafkaPublisher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/test/unit/test_KafkaPublisher.py b/onestop-python-client/test/unit/test_KafkaPublisher.py index 1c9497b..f43d3f6 100644 --- a/onestop-python-client/test/unit/test_KafkaPublisher.py +++ b/onestop-python-client/test/unit/test_KafkaPublisher.py @@ -123,7 +123,7 @@ def test_create_producer_calls_AvroSerializer(self, mock_serializing_publisher, publisher.create_producer(reg_client) # Verify AvroSerializer called with expected registry client - mock_avro_serializer.assert_called_with(ANY, reg_client) + mock_avro_serializer.assert_called_with(schema_str=ANY, schema_registry_client=reg_client) @patch('onestop.KafkaPublisher.AvroSerializer') @patch('onestop.KafkaPublisher.SerializingProducer') From 3993106d76a434cb76fbc1e241bd4779fe717e29 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 4 Jun 2021 16:05:50 -0600 Subject: [PATCH 072/100] 1507-Removed try/catch around code in KafkaPublisher publish_granule so as to get better error reporting. --- onestop-python-client/onestop/KafkaPublisher.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index a95081d..a0c66ce 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -274,13 +274,11 @@ def publish_granule(self, granule_producer, collection_uuid, content_dict): 'discovery': content_dict['discovery'] } - try: - self.logger.debug('Publishing granule with topic='+self.granule_topic+' key='+key+' value='+str(value_dict)) - granule_producer.produce( - topic=self.granule_topic, - value=value_dict, - key=key, - on_delivery=self.delivery_report) - except KafkaError: - raise + self.logger.debug('Publishing granule with topic='+self.granule_topic+' key='+key+' value='+str(value_dict)) + granule_producer.produce( + topic=self.granule_topic, + value=value_dict, + key=key, + on_delivery=self.delivery_report) + granule_producer.poll() From 2ade49bf8dda72244457e422a425d0afd52d5be1 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 4 Jun 2021 21:16:16 -0600 Subject: [PATCH 073/100] 1507-Removed try/catch around code in KafkaPublisher publish_collection so as to get better error reporting. --- onestop-python-client/onestop/KafkaPublisher.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index a0c66ce..0ca40d0 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -214,15 +214,12 @@ def publish_collection(self, collection_producer, collection_uuid, content_dict, 'method': method, 'source': 'unknown', } - try: - self.logger.debug('Publishing collection with topic='+self.collection_topic+' key='+key+' value='+str(value_dict)) - collection_producer.produce( - topic=self.collection_topic, - value=value_dict, - key=key, - on_delivery=self.delivery_report) - except KafkaError: - raise + self.logger.debug('Publishing collection with topic='+self.collection_topic+' key='+key+' value='+str(value_dict)) + collection_producer.produce( + topic=self.collection_topic, + value=value_dict, + key=key, + on_delivery=self.delivery_report) collection_producer.poll() def publish_granule(self, granule_producer, collection_uuid, content_dict): From 9b6dc1b270ae1c77c716f3f042575fe7c197b9ac Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 4 Jun 2021 21:49:12 -0600 Subject: [PATCH 074/100] 1507-Updated helm/*/values.yml for Kafka brokers and schema_registry to what they are on cedar-devs. --- helm/onestop-sqs-consumer/values.yaml | 4 ++-- helm/sme-chart/values.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index 3af3396..6d0ac4e 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -88,8 +88,8 @@ config: |- NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6' # Kafka config values - brokers: onestop-dev-cp-kafka:9092 - schema_registry: http://onestop-dev-cp-schema-registry:8081 + brokers: cp-cp-kafka:9092 + schema_registry: http://cp-cp-schema-registry:8081 collection_topic_publish: psi-granules-by-collection granule_topic_publish: psi-granule-parsed collection_topic_consume: psi-collection-input-unknown diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index 795a388..3c72d66 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -45,8 +45,8 @@ config: |- NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6' # Kafka config values - brokers: onestop-dev-cp-kafka:9092 - schema_registry: http://onestop-dev-cp-schema-registry:8081 + brokers: cp-cp-kafka:9092 + schema_registry: http://cp-cp-schema-registry:8081 collection_topic_publish: psi-granules-by-collection granule_topic_publish: psi-granule-parsed collection_topic_consume: psi-collection-input-unknown From 35ed0dc6c199716adeef4d93dda9cf8b0932eafa Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 7 Jun 2021 10:48:39 -0600 Subject: [PATCH 075/100] 1507-Changed helm/*/values file_identifier_prefix to file_id_prefix (was changed a while ago in the code). --- helm/onestop-sqs-consumer/values.yaml | 2 +- helm/sme-chart/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index 6d0ac4e..0baaf1e 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -74,7 +74,7 @@ config: |- collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com - file_identifier_prefix: "gov.noaa.ncei.csb:" + file_id_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE metadata_type: GRANULE diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index 3c72d66..58678a3 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -31,7 +31,7 @@ config: |- collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com - file_identifier_prefix: "gov.noaa.ncei.csb:" + file_id_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE metadata_type: GRANULE From 1e7669635a959863186b2ed92fa33b1d23c6cb95 Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 7 Jun 2021 14:28:07 -0600 Subject: [PATCH 076/100] 1507-Found bug of not updating the s3Utils connect call to new way, updated test to reflect/catch. --- onestop-python-client/onestop/util/SqsHandlers.py | 2 +- onestop-python-client/test/unit/test_SqsHandlers.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index fa3503a..08f1d05 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -67,9 +67,9 @@ def upload(records, log_level='INFO'): logger.info("Event type: " + rec['eventName']) bucket = rec['s3']['bucket']['name'] logger.info("BUCKET: %s"%bucket) - s3_resource = s3_utils.connect("s3_resource", None) # Fetch the object to get the uuid + s3_resource = s3_utils.connect('resource', 's3', None) object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) if object_uuid is not None: logger.info("Retrieved object-uuid: %s"%object_uuid) diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py index c17b972..cd6a3ad 100644 --- a/onestop-python-client/test/unit/test_SqsHandlers.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -242,8 +242,9 @@ def test_upload_handler_happy(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp) self.sqs_consumer.receive_messages(sqs_queue, 1, cb) # Verify get uuid called + mock_s3_utils.connect.assert_called_with('resource', 's3', None) mock_s3_utils.get_uuid_metadata.assert_called_with( - mock_s3_utils.connect('s3_resource', None), + mock_s3_utils.connect(), bucket, key) # Verify uuid not added @@ -285,7 +286,7 @@ def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_s3_msg_adapter, mock # Verify add uuid called mock_s3_utils.add_uuid_metadata.assert_called_with( - mock_s3_utils.connect('s3_resource', None), + mock_s3_utils.connect(), bucket, key) @@ -319,7 +320,7 @@ def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_s3_msg_ # Verify publish called mock_wp.publish_registry.assert_called_with( 'granule', - mock_s3_utils.get_uuid_metadata(mock_s3_utils.connect('s3_resource', None), bucket, key), + mock_s3_utils.get_uuid_metadata(), records_transformed.serialize(), 'PATCH' ) From ede2e556d1ed98e4d8e3fa9c7b6b51489b25566f Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 7 Jun 2021 15:33:51 -0600 Subject: [PATCH 077/100] 1507-Changed bulid order in CircleCI config, seeing sme/sqs script builds with old code and hoping is because it was being built before the onestop-python-client-build. --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c54ed47..c15d729 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -153,8 +153,8 @@ workflows: jobs: # - cli-test # - cli-build - - onestop-sme-build - - onestop-s3-handler-build - onestop-python-client-build - onestop-python-client-test + - onestop-sme-build + - onestop-s3-handler-build From 6e843f0ed792597395d5059a709329c0483b2dff Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 7 Jun 2021 16:08:54 -0600 Subject: [PATCH 078/100] 1507-Changed circleCI config to have the onestop-python-client-build require onestop-python-client-test to run and the two script builds to require onestop-python-client-build to run. --- .circleci/config.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c15d729..a5f0dde 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -153,8 +153,13 @@ workflows: jobs: # - cli-test # - cli-build - - onestop-python-client-build - onestop-python-client-test + - onestop-python-client-build + requires: + - onestop-python-client-test - onestop-sme-build + requires: + - onestop-python-client-build - onestop-s3-handler-build - + requires: + - onestop-python-client-build From 1beb0bc6f9ef339352c73eb6f209208673a09b50 Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 7 Jun 2021 16:22:22 -0600 Subject: [PATCH 079/100] 1507-Changed circleci config - added colons at end of jobs that had requires field. --- .circleci/config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a5f0dde..c8ea89b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -154,12 +154,12 @@ workflows: # - cli-test # - cli-build - onestop-python-client-test - - onestop-python-client-build + - onestop-python-client-build: requires: - onestop-python-client-test - - onestop-sme-build + - onestop-sme-build: requires: - onestop-python-client-build - - onestop-s3-handler-build + - onestop-s3-handler-build: requires: - onestop-python-client-build From 19dc282ce9aa313bc46431533c5eb583e17f453e Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 7 Jun 2021 16:34:04 -0600 Subject: [PATCH 080/100] 1507-Making a visual change to onestop-python-client code to test new build requires configuration. --- onestop-python-client/onestop/util/SqsHandlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index 08f1d05..ce3ca60 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -68,7 +68,7 @@ def upload(records, log_level='INFO'): bucket = rec['s3']['bucket']['name'] logger.info("BUCKET: %s"%bucket) - # Fetch the object to get the uuid + # Fetch the object's uuid from cloud object, if exists. s3_resource = s3_utils.connect('resource', 's3', None) object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) if object_uuid is not None: From 91ba8f3b53b2eea74b80f22fe82d22684154bc7b Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 11:45:34 -0600 Subject: [PATCH 081/100] 1507-Changed SqsHandlers to not serialize the json payload but instead do a json dumps. Was complaining ParsedRecord doesn't have method serialize. Fixed tests, also added size and versionid to test util message since S3MessageAdapter.transform required. --- .../onestop/util/SqsHandlers.py | 13 +++++---- .../test/unit/test_SqsHandlers.py | 28 +++++++------------ onestop-python-client/test/utils.py | 6 +++- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index ce3ca60..9170f8d 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -1,4 +1,7 @@ +import json + from onestop.util.ClientLogger import ClientLogger +from onestop.schemas.util.jsonEncoder import EnumEncoder def create_delete_handler(web_publisher): """ @@ -56,7 +59,7 @@ def create_upload_handler(web_publisher, s3_utils, s3_message_adapter): :param: s3ma: S3MessageAdapter object """ - def upload(records, log_level='INFO'): + def upload(records, log_level='DEBUG'): logger = ClientLogger.get_logger('SqsHandlers.create_upload_handler.upload', log_level, False) logger.info("In create_upload_handler.upload() handler") logger.debug("Records: %s"%records) @@ -80,17 +83,17 @@ def upload(records, log_level='INFO'): object_uuid = s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) # Convert s3 message to IM message - json_payload = s3_message_adapter.transform(records) + im_message = s3_message_adapter.transform(records) + json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder) logger.debug('transformed message, json_payload: %s'%json_payload) # Send the message to registry - payload = json_payload.serialize() method = 'PATCH' # Backup location should be patched if not backup within bucket name if "backup" not in bucket: method = 'POST' - logger.debug('web_publisher.publish_registry method using "%s" with payload %s'%(method,payload)) - registry_response = web_publisher.publish_registry("granule", object_uuid, payload, method) + logger.debug('web_publisher.publish_registry method using "%s" with payload %s'%(method,json_payload)) + registry_response = web_publisher.publish_registry("granule", object_uuid, json_payload, method) logger.debug('web_publisher.publish_registry response=%s'%registry_response) logger.debug('web_publisher.publish_registry response json=%s'%registry_response.json()) diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py index cd6a3ad..b9e2894 100644 --- a/onestop-python-client/test/unit/test_SqsHandlers.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -11,6 +11,7 @@ from onestop.util.SqsConsumer import SqsConsumer from onestop.util.SqsHandlers import create_delete_handler from onestop.util.SqsHandlers import create_upload_handler +from onestop.schemas.util.jsonEncoder import EnumEncoder class test_SqsHandler(unittest.TestCase): @@ -219,8 +220,7 @@ def test_delete_handler_eventName_not_delete_ends_cb(self, mock_wp, mock_respons @mock_sqs @patch('onestop.WebPublisher') @patch('onestop.util.S3Utils') - @patch('onestop.util.S3MessageAdapter') - def test_upload_handler_happy(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): + def test_upload_handler_happy(self, mock_s3_utils, mock_wp): bucket = self.bucket key = self.key queue_name = 'test_queue' @@ -236,9 +236,7 @@ def test_upload_handler_happy(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp) MessageBody=json.dumps(message) ) - records = json.loads(message['Message'])['Records'] - records_transformed = mock_s3_msg_adapter.transform(records) - cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + cb = create_upload_handler(mock_wp, mock_s3_utils, self.s3_message_adapter) self.sqs_consumer.receive_messages(sqs_queue, 1, cb) # Verify get uuid called @@ -249,13 +247,11 @@ def test_upload_handler_happy(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp) key) # Verify uuid not added mock_s3_utils.add_uuid_metadata.assert_not_called() - # Verify transform called - mock_s3_msg_adapter.transform.assert_called_with(records) - # Verify publish called + # Verify publish called & transform called mock_wp.publish_registry.assert_called_with( 'granule', - mock_s3_utils.get_uuid_metadata(mock_s3_utils.connect('s3_resource', None), bucket, key), - records_transformed.serialize(), + mock_s3_utils.get_uuid_metadata(), + json.dumps(self.s3_message_adapter.transform(json.loads(message['Message'])['Records']).to_dict(), cls=EnumEncoder), 'POST' ) @@ -293,9 +289,8 @@ def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_s3_msg_adapter, mock @mock_sqs @patch('onestop.WebPublisher') @patch('onestop.util.S3Utils') - @patch('onestop.util.S3MessageAdapter') - def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): - bucket = "testing_backup_bucket" + def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_wp): + bucket = "testing_backup_bucket" # backup in bucket means a PATCH should happen. key = self.key queue_name = 'test_queue' sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) @@ -310,10 +305,7 @@ def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_s3_msg_ MessageBody=json.dumps(message) ) - mock_s3_utils.get_uuid_metadata.return_value = None - records = json.loads(message['Message'])['Records'] - records_transformed = mock_s3_msg_adapter.transform(records) - cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + cb = create_upload_handler(mock_wp, mock_s3_utils, self.s3_message_adapter) self.sqs_consumer.receive_messages(sqs_queue, 1, cb) @@ -321,7 +313,7 @@ def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_s3_msg_ mock_wp.publish_registry.assert_called_with( 'granule', mock_s3_utils.get_uuid_metadata(), - records_transformed.serialize(), + json.dumps(self.s3_message_adapter.transform(json.loads(message['Message'])['Records']).to_dict(), cls=EnumEncoder), 'PATCH' ) diff --git a/onestop-python-client/test/utils.py b/onestop-python-client/test/utils.py index 9cb7913..fc124fb 100644 --- a/onestop-python-client/test/utils.py +++ b/onestop-python-client/test/utils.py @@ -26,7 +26,11 @@ def create_delete_message(region, bucket, key): "bucket": {"name": "''' + bucket + '''", "ownerIdentity": {"principalId": "AX8TWPQYA8JEM"}, "arn": "arn:aws:s3:::''' + bucket + '''"}, - "object": {"key": "''' + key + '''", "sequencer": "005FD7D1765F04D8BE"} + "object": {"key": "''' + key + '''", + "sequencer": "005FD7D1765F04D8BE", + "eTag": "44d2452e8bc2c8013e9c673086fbab7a", + "size": 1385, + "versionId": "q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf"} } }] }''', From d29e7a57045827e490040e5da5452eba7ba8c32d Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 13:27:35 -0600 Subject: [PATCH 082/100] 1507-Changed helm*/values.yml onestop and registry urls from onestop* to os*, is url on cedardevs. --- helm/onestop-sqs-consumer/values.yaml | 4 ++-- helm/sme-chart/values.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index 0baaf1e..bc0f8fb 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -78,8 +78,8 @@ config: |- # COLLECTION or GRANULE metadata_type: GRANULE - registry_base_url: http://onestop-registry:80 - onestop_base_url: http://onestop-search:8080 + registry_base_url: http://os-registry:80 + onestop_base_url: http://os-search:8080 prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index 58678a3..3fc6922 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -35,8 +35,8 @@ config: |- # COLLECTION or GRANULE metadata_type: GRANULE - registry_base_url: http://onestop-registry:80 - onestop_base_url: http://onestop-search:8080 + registry_base_url: http://os-registry:80 + onestop_base_url: http://os-search:8080 prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' From 3beefc398ae2820c13ab2548e53b77323c63b5d0 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 16:11:35 -0600 Subject: [PATCH 083/100] 1507-Added passing in log level to callback method in KafkaConsumer. Adjusted some log statements. --- onestop-python-client/onestop/KafkaConsumer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 0481af9..5359a29 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -97,6 +97,7 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r if self.metadata_type not in ['COLLECTION', 'GRANULE']: raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type)) + self.log_level = log_level self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) @@ -196,11 +197,12 @@ def consume(self, metadata_consumer, handler): self.logger.info('No Messages') continue - self.logger.debug("Message key="+str(msg.key())+" value="+str(msg.value())) key = msg.key() value = msg.value() + self.logger.debug('Message key=%s'%key) + self.logger.debug('Message value=%s'%value) - handler(key, value) + handler(key, value, self.log_level) finally: self.logger.debug("Closing metadata_consumer") metadata_consumer.close() From ffaf33fef5c7d01527e6499986c17b0cb02d650f Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 20:05:55 -0600 Subject: [PATCH 084/100] 1507-Removed try/finally from the KafkaConsumer.consume() since our code shouldn't be able to throw any exceptions, the ones that could happen you want to bubble up (like connection problems). Moved the closing of connection after while polling section, since was closing the connection. --- .../onestop/KafkaConsumer.py | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 5359a29..18a84cf 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -189,20 +189,19 @@ def consume(self, metadata_consumer, handler): """ self.logger.info('Consuming from topic') while True: - try: - msg = metadata_consumer.poll(10) - self.logger.debug("Message received: "+str(msg)) - - if msg is None: - self.logger.info('No Messages') - continue - - key = msg.key() - value = msg.value() - self.logger.debug('Message key=%s'%key) - self.logger.debug('Message value=%s'%value) - - handler(key, value, self.log_level) - finally: - self.logger.debug("Closing metadata_consumer") - metadata_consumer.close() + msg = metadata_consumer.poll(10) + self.logger.debug("Message received: "+str(msg)) + + if msg is None: + self.logger.info('No Messages') + continue + + key = msg.key() + value = msg.value() + self.logger.debug('Message key=%s'%key) + self.logger.debug('Message value=%s'%value) + + handler(key, value, self.log_level) + + self.logger.debug("Closing metadata_consumer") + metadata_consumer.close() From 880b8588f3d6cc4b06abe3536072e4e0b6875172 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 20:08:51 -0600 Subject: [PATCH 085/100] 1507-Narrowed down try/except code in the SqsConsumer.consume() so if there's a problem with the callback it gets thrown - also the sqs_message.delete is outside try so if a connection problem that ends everything. --- .../onestop/util/SqsConsumer.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index 1972cc6..d784734 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -80,31 +80,31 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): self.logger.debug("Messages: %s" % sqs_messages) for sqs_message in sqs_messages: - try: - # Log start time - dt_start = datetime.now(tz=timezone.utc) - self.logger.info("Starting processing message") - self.logger.debug("Message: %s" % sqs_message) - self.logger.debug("Message body: %s" % sqs_message.body) + # Log start time + dt_start = datetime.now(tz=timezone.utc) + self.logger.info("Starting message processing") + self.logger.debug("Message: %s" % sqs_message) + self.logger.debug("Message body: %s" % sqs_message.body) + try: message_body = json.loads(sqs_message.body) self.logger.debug("Message body message: %s" % message_body['Message']) message_content = json.loads(message_body['Message']) - - if 'Records' in message_content: - recs = message_content['Records'] - self.logger.debug('Message "Records": %s' % recs) - cb(recs, self.log_level) - else: - self.logger.info("s3 event message without 'Records' content received.") - - dt_end = datetime.now(tz=timezone.utc) - processing_time = dt_end - dt_start - self.logger.info("Completed processing the message in %s seconds."%(processing_time.microseconds / 1000000)) - - sqs_message.delete() - self.logger.info("The SQS message has been deleted.") except: self.logger.exception( "An exception was thrown while processing a message, but this program will continue. The " "message will not be deleted from the SQS queue. The message was: %s" % sqs_message) + + if 'Records' in message_content: + recs = message_content['Records'] + self.logger.debug('Message "Records": %s' % recs) + cb(recs, self.log_level) + else: + self.logger.info("s3 event message without 'Records' content received.") + + dt_end = datetime.now(tz=timezone.utc) + processing_time = dt_end - dt_start + self.logger.info("Completed processing the message in %s seconds."%(processing_time.microseconds / 1000000)) + + sqs_message.delete() + self.logger.info("The SQS message has been deleted.") From dd1d5f42fc95b7e55d6fc3523ec64e72ff68bd2f Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 20:39:56 -0600 Subject: [PATCH 086/100] 1507-Fixed test for a KafkaConsumer call back that now includes log level. --- onestop-python-client/test/unit/test_KafkaConsumer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/test/unit/test_KafkaConsumer.py b/onestop-python-client/test/unit/test_KafkaConsumer.py index 5404c31..6106738 100644 --- a/onestop-python-client/test/unit/test_KafkaConsumer.py +++ b/onestop-python-client/test/unit/test_KafkaConsumer.py @@ -281,7 +281,7 @@ def test_consume(self, mock_metadata_consumer, mock_message): # Verify callback function was called once with expected message attributes mock_handler.assert_called_once() - mock_handler.assert_called_with(mock_message_key, mock_message_value) + mock_handler.assert_called_with(mock_message_key, mock_message_value, self.conf_w_security['log_level']) if __name__ == '__main__': unittest.main() \ No newline at end of file From 2d8025a0338a4aba29cd6733275868bf0e5015f8 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 20:40:50 -0600 Subject: [PATCH 087/100] 1507-Fixed a test missed related to commit of changed SqsHandlers to not serialize the json payload but instead do a json dumps. --- onestop-python-client/test/unit/test_SqsHandlers.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py index b9e2894..5bba184 100644 --- a/onestop-python-client/test/unit/test_SqsHandlers.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -258,8 +258,7 @@ def test_upload_handler_happy(self, mock_s3_utils, mock_wp): @mock_sqs @patch('onestop.WebPublisher') @patch('onestop.util.S3Utils') - @patch('onestop.util.S3MessageAdapter') - def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): + def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_wp): bucket = self.bucket key = self.key queue_name = 'test_queue' @@ -276,7 +275,7 @@ def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_s3_msg_adapter, mock ) mock_s3_utils.get_uuid_metadata.return_value = None - cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + cb = create_upload_handler(mock_wp, mock_s3_utils, self.s3_message_adapter) self.sqs_consumer.receive_messages(sqs_queue, 1, cb) From 7e6b72daf60d7cb7f5aa221b0b12c8d82a70e669 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 9 Jun 2021 09:40:20 -0600 Subject: [PATCH 088/100] 1507-Updated scripts to work with new class constructors that take a dict instead of config location. --- scripts/archive_client_integration.py | 96 ++++---- scripts/bucket_automation.py | 89 +++++--- scripts/launch_e2e.py | 197 ++++++---------- scripts/launch_kafka_publisher.py | 18 +- scripts/launch_pyconsumer.py | 135 ++++++----- scripts/sme/sme.py | 213 ++++++++++-------- scripts/sme/smeFunc.py | 75 ++---- .../s3_notification_handler.py | 164 +++++++------- 8 files changed, 495 insertions(+), 492 deletions(-) diff --git a/scripts/archive_client_integration.py b/scripts/archive_client_integration.py index 2831045..be672f8 100644 --- a/scripts/archive_client_integration.py +++ b/scripts/archive_client_integration.py @@ -1,64 +1,74 @@ import argparse -from onestop.util.S3Utils import S3Utils - - -def handler(): - ''' - Simultaneously upload files to main bucket 'noaa-nccf-dev' in us-east-2 and glacier in cross region bucket 'noaa-nccf-dev-archive' in us-west-2. - - :return: str - Returns response from boto3 indicating if upload was successful. - ''' - print("Handler...") +import yaml +import os - # config for s3 low level api for us-east-2 - s3 = s3_utils.connect('s3', s3_utils.conf['s3_region']) - bucket_name = s3_utils.conf['s3_bucket'] - - # config for s3 low level api cross origin us-west-2 - s3_cross_region = s3_utils.connect('s3', s3_utils.conf['s3_region2']) - bucket_name_cross_region = s3_utils.conf['s3_bucket2'] - - overwrite = True - - # Add 3 files to bucket - local_files = ["file1.csv", "file2.csv"] - s3_file = None - for file in local_files: - local_file = "tests/data/" + file - # changed the key for testing - s3_file = "public/NESDIS/CSB/" + file - s3_utils.upload_s3(s3, local_file, bucket_name, s3_file, overwrite) +from onestop.util.S3Utils import S3Utils - # Upload file to cross region bucket then transfer to glacier right after - s3_utils.upload_s3(s3_cross_region, local_file, bucket_name_cross_region, s3_file, overwrite) - s3_utils.s3_to_glacier(s3_cross_region, bucket_name_cross_region, s3_file) +config_dict = {} if __name__ == '__main__': + # Example command: python3 archive_client_integration.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + # python3 archive_client_integration.py -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml parser = argparse.ArgumentParser(description="Launches archive client integration") - parser.add_argument('-conf', dest="conf", required=True, + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="AWS config filepath") - parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") args = vars(parser.parse_args()) - # Get configuration file path locations + # Generate configuration dictionary conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) # Upload a test file to s3 bucket - s3_utils = S3Utils(conf_loc, cred_loc) - - handler() - - - - + s3_utils = S3Utils(**config_dict) - + s3 = s3_utils.connect('client', 's3', config_dict['s3_region']) + # config for s3 low level api cross origin us-west-2 + s3_cross_region = s3_utils.connect('client', 's3', config_dict['s3_region2']) + bucket_name_cross_region = config_dict['s3_bucket2'] + overwrite = True + # Files to upload - TODO: User should change these paths. + local_files = ["/scripts/data/file1.csv", "/scripts/data/file2.csv"] + for file in local_files: + print("Uploading file: %s"%file) + # changed the key for testing + s3_file = "public/NESDIS/CSB/" + file + upload = s3_utils.upload_s3(s3, file, config_dict['s3_bucket'], s3_file, overwrite) + if not upload: + raise Exception("Unknown, upload to s3 failed.") + # Upload file to cross region bucket then transfer to glacier right after + upload = s3_utils.upload_s3(s3_cross_region, file, bucket_name_cross_region, s3_file, overwrite) + if not upload: + raise Exception("Unknown, upload to s3 failed.") + s3_utils.s3_to_glacier(s3_cross_region, bucket_name_cross_region, s3_file) diff --git a/scripts/bucket_automation.py b/scripts/bucket_automation.py index a64f11c..5c922ee 100644 --- a/scripts/bucket_automation.py +++ b/scripts/bucket_automation.py @@ -1,7 +1,12 @@ import argparse import json +import os +import yaml + from onestop.util.S3Utils import S3Utils +config_dict = {} + def handler(): ''' Creates bucket with defined key paths @@ -10,43 +15,42 @@ def handler(): Returns boto3 response indicating if bucket creation was successful ''' # connect to low level api - s3 = s3_utils.connect("s3", s3_utils.conf['s3_region']) + s3 = s3_utils.connect('client', 's3', config_dict['s3_region']) # use s3_resource api to check if the bucket exists - s3_resource = s3_utils.connect("s3_resource", s3_utils.conf['s3_region']) + s3_resource = s3_utils.connect('resource', 's3', config_dict['s3_region']) # Create bucket name bucket_name = "noaa-nccf-dev" - # checks to see if the bucket is already created, if it isn't create yet then it will create the bucket, set bucket policy, and create key paths + # Create bucket policy + bucket_policy = { + "Version": "2012-10-17", + "Id": "noaa-nccf-dev-policy", + "Statement": [ + { + "Sid": "PublicRead", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": f'arn:aws:s3:::{bucket_name}/public/*' + }] + } + # Convert the policy from JSON dict to string + bucket_policy_str = json.dumps(bucket_policy) + + # checks to see if the bucket is already created, if it isn't create it, then it will create the bucket, set bucket policy, and create key paths if not s3_resource.Bucket(bucket_name) in s3_resource.buckets.all(): """ - Create bucket - need to specify bucket location for every region except us-east-1 -> https://github.com/aws/aws-cli/issues/2603 """ s3.create_bucket(Bucket=bucket_name, - CreateBucketConfiguration={'LocationConstraint': s3_utils.conf['s3_region']}, + CreateBucketConfiguration={'LocationConstraint': config_dict['s3_region']}, ObjectLockEnabledForBucket=True) - # Create bucket policy - bucket_policy = { - "Version": "2012-10-17", - "Id": "noaa-nccf-dev-policy", - "Statement": [ - { - "Sid": "PublicRead", - "Effect": "Allow", - "Principal": "*", - "Action": "s3:GetObject", - "Resource": f'arn:aws:s3:::{bucket_name}/public/*' - }] - } - - # Convert the policy from JSON dict to string - bucket_policy = json.dumps(bucket_policy) - # Set new bucket policy - s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy) + s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy_str) """ - Create Public Key Paths @@ -86,6 +90,9 @@ def handler(): s3.put_object(Bucket=bucket_name, Body='', Key='private/OMAO/') s3.put_object(Bucket=bucket_name, Body='', Key='private/OAR/') + else: + #Set bucket policy + s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy_str) # Set CORS bucket config cors_config = { @@ -109,12 +116,6 @@ def handler(): } s3.put_bucket_cors(Bucket=bucket_name, CORSConfiguration=cors_config) - # Convert the policy from JSON dict to string - bucket_policy = json.dumps(bucket_policy) - - #Set new bucket policy - s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy) - """ - Set ACL for public read """ @@ -131,18 +132,42 @@ def handler(): if __name__ == '__main__': parser = argparse.ArgumentParser(description="Launches e2e test") - parser.add_argument('-conf', dest="conf", required=True, + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="AWS config filepath") - parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") args = vars(parser.parse_args()) - # Get configuration file path locations + # Generate configuration dictionary conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) # Create S3Utils instance - s3_utils = S3Utils(conf_loc, cred_loc) + s3_utils = S3Utils(**config_dict) handler() \ No newline at end of file diff --git a/scripts/launch_e2e.py b/scripts/launch_e2e.py index 6d60b2c..820fd22 100644 --- a/scripts/launch_e2e.py +++ b/scripts/launch_e2e.py @@ -7,11 +7,12 @@ from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.WebPublisher import WebPublisher -from onestop.extract.CsbExtractor import CsbExtractor from onestop.schemas.util.jsonEncoder import EnumEncoder +from onestop.util.ClientLogger import ClientLogger +config_dict = {} -def handler(recs): +def handler(recs, log_level): ''' Processes metadata information from sqs message triggered by S3 event and uploads to registry through web publisher (https). Also uploads s3 object to glacier. @@ -22,50 +23,48 @@ def handler(recs): IM registry response and boto3 glacier response ''' - print("Handler...") + logger = ClientLogger.get_logger('launch_e2e.handler', log_level, False) + logger.info('In Handler') - # Now get boto client for object-uuid retrieval - object_uuid = None - bucket = None - print(recs) + # If record exists try to get object-uuid retrieval + logger.debug('Records:%s'%recs) if recs is None: - print("No records retrieved") + logger.info('No records retrieved, doing nothing.') + return + + rec = recs[0] + logger.debug('Record: %s'%rec) + bucket = rec['s3']['bucket']['name'] + s3_key = rec['s3']['object']['key'] + logger.info("Getting uuid") + s3_resource = s3_utils.connect('resource', 's3', None) + object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) + if object_uuid is not None: + logger.info('Retrieved object-uuid: %s'% object_uuid) else: - rec = recs[0] - print(rec) - bucket = rec['s3']['bucket']['name'] - s3_key = rec['s3']['object']['key'] - print("Getting uuid") - # High-level api - s3_resource = s3_utils.connect("s3_resource", None) - object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) - if object_uuid is not None: - print("Retrieved object-uuid: " + object_uuid) - else: - print("Adding uuid") - s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) + logger.info('UUID not found, adding uuid to bucket=%s key=%s'%(bucket, s3_key)) + s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) + s3ma = S3MessageAdapter(**config_dict) im_message = s3ma.transform(recs) - + logger.debug('S3MessageAdapter.transform: %s'%im_message) json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder) + logger.debug('S3MessageAdapter.transform.json dump: %s'%json_payload) - print(json_payload) - - + wp = WebPublisher(**config_dict) registry_response = wp.publish_registry("granule", object_uuid, json_payload, "POST") - #print(registry_response.json()) + logger.debug('publish_registry response: %s'%registry_response.json()) # Upload to archive file_data = s3_utils.read_bytes_s3(s3_client, bucket, s3_key) - glacier = s3_utils.connect("glacier", cloud_conf['s3_region']) - vault_name = cloud_conf['vault_name'] - + glacier = s3_utils.connect('client', 'glacier', config_dict['s3_region']) + vault_name = config_dict['vault_name'] resp_dict = s3_utils.upload_archive(glacier, vault_name, file_data) - - print("archiveLocation: " + resp_dict['location']) - print("archiveId: " + resp_dict['archiveId']) - print("sha256: " + resp_dict['checksum']) + logger.debug('Upload response: %s'%resp_dict) + logger.info('upload archived location: %s'% resp_dict['location']) + logger.info('archiveId: %s'% resp_dict['archiveId']) + logger.info('sha256: %s'% resp_dict['checksum']) addlocPayload = { "fileLocations": { @@ -82,116 +81,60 @@ def handler(recs): json_payload = json.dumps(addlocPayload, indent=2) # Send patch request next with archive location registry_response = wp.publish_registry("granule", object_uuid, json_payload, "PATCH") - + logger.debug('publish to registry response: %s'% registry_response) + logger.info('Finished publishing to registry.') if __name__ == '__main__': - """ parser = argparse.ArgumentParser(description="Launches e2e test") - parser.add_argument('-conf', dest="conf", required=True, + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="AWS config filepath") parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") args = vars(parser.parse_args()) - # Get configuration file path locations - conf_loc = args.pop('conf') - cred_loc = args.pop('cred') - # Upload a test file to s3 bucket - s3_utils = S3Utils(conf_loc, cred_loc) - # Low-level api ? Can we just use high level revisit me! - s3 = s3_utils.connect("s3", None) - registry_user = os.environ.get("REGISTRY_USERNAME") - registry_pwd = os.environ.get("REGISTRY_PASSWORD") - print(registry_user) - access_key = os.environ.get("AWS_ACCESS") - access_secret = os.environ.get("AWS_SECRET") - print(access_key) - - # High-level api - s3_resource = s3_utils.connect("s3_resource", None) - bucket = cloud_conf['s3_bucket'] - overwrite = True - sqs_max_polls = cloud_conf['sqs_max_polls'] - # Add 3 files to bucket - local_files = ["file1.csv", "file4.csv"] - s3_file = None - for file in local_files: - local_file = "tests/data/" + file - s3_file = "csv/" + file - s3_utils.upload_s3(s3, local_file, bucket, s3_file, overwrite) - # Receive s3 message and MVM from SQS queue - sqs_consumer = SqsConsumer(conf_loc, cred_loc) - s3ma = S3MessageAdapter("scripts/config/csb-data-stream-config.yml", s3_utils) - # Retrieve data from s3 object - #csb_extractor = CsbExtractor() - wp = WebPublisher("config/web-publisher-config-dev.yml", cred_loc) - queue = sqs_consumer.connect() - try: - debug = False - sqs_consumer.receive_messages(queue, sqs_max_polls, handler) - except Exception as e: - print("Message queue consumption failed: {}".format(e)) - """ - parser = argparse.ArgumentParser(description="Launches e2e test") - parser.add_argument('-conf', dest="conf", required=True, - help="AWS config filepath") - - parser.add_argument('-cred', dest="cred", required=True, - help="Credentials filepath") - args = vars(parser.parse_args()) - - # Get configuration file path locations + # Generate configuration dictionary conf_loc = args.pop('conf') - cred_loc = args.pop('cred') - stream_conf_loc = args.pop('cred') - - with open(os.path.abspath(os.path.join(os.path.dirname(__file__), cred_loc))) as f: - cred = yaml.load(f, Loader=yaml.FullLoader) - with open(os.path.abspath(os.path.join(os.path.dirname(__file__), conf_loc))) as f: - cloud_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(os.path.abspath(os.path.join(os.path.dirname(__file__), stream_conf_loc))) as f: - stream_conf = yaml.load(f, Loader=yaml.FullLoader) - - s3_utils = S3Utils(cred['sandbox']['access_key'], - cred['sandbox']['secret_key'], - "DEBUG") - - bucket = cloud_conf['s3_bucket'] - sqs_max_polls = cloud_conf['sqs_max_polls'] - - #Source - access_bucket = stream_conf['access_bucket'] + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) - #Onestop related - file_id_prefix = stream_conf['file_identifier_prefix'] - file_format = stream_conf['format'] - headers = stream_conf['headers'] - type = stream_conf['type'] - - # Low-level api ? Can we just use high level revisit me! - s3_client = s3_utils.connect("s3", None) + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + + s3_utils = S3Utils(**config_dict) + s3_client = s3_utils.connect('client', 's3', config_dict['s3_region']) # Upload test files to s3 bucket local_files = ["file1.csv", "file4.csv"] s3_file = None for file in local_files: - local_file = "data/" + file + local_file = "scripts/data/" + file # s3_file = "csv/" + file - s3_file = "NESDIS/CSB/" + file - if not s3_utils.upload_s3(s3_client, local_file, bucket, s3_file, True): + s3_file = "public/" + file + if not s3_utils.upload_s3(s3_client, local_file, config_dict['s3_bucket'], s3_file, True): exit("Error setting up for e2e: The test files were not uploaded to the s3 bucket therefore the tests cannot continue.") - - # Receive s3 message and MVM from SQS queue - sqs_consumer = SqsConsumer(conf_loc, cred_loc) - s3ma = S3MessageAdapter(access_bucket, headers, type, file_id_prefix, "DEBUG") - wp = WebPublisher("config/web-publisher-config-dev.yml", cred_loc) - - queue = sqs_consumer.connect() - try: - debug = False - sqs_consumer.receive_messages(queue, sqs_max_polls, handler) - - except Exception as e: - print("Message queue consumption failed: {}".format(e)) + sqs_consumer = SqsConsumer(**config_dict) + sqs_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region']) + queue = sqs_consumer.connect(sqs_resource, config_dict['sqs_name']) + sqs_consumer.receive_messages(queue, config_dict['sqs_max_polls'], handler) diff --git a/scripts/launch_kafka_publisher.py b/scripts/launch_kafka_publisher.py index f4a853d..85283c2 100644 --- a/scripts/launch_kafka_publisher.py +++ b/scripts/launch_kafka_publisher.py @@ -1,17 +1,21 @@ import argparse +import yaml + from onestop.KafkaPublisher import KafkaPublisher if __name__ == '__main__': ''' Uploads collection to Kafka collection topic ''' - parser = argparse.ArgumentParser(description="Launches KafkaPublisher to publish kafkda topics") - parser.add_argument('-conf', dest="conf", required=True, + parser = argparse.ArgumentParser(description="Launches KafkaPublisher to publish kafka topics") + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="Config filepath") args = vars(parser.parse_args()) conf_loc = args.pop('conf') + with open(conf_loc) as f: + conf = yaml.load(f, Loader=yaml.FullLoader) # "discovery": # { @@ -22,13 +26,19 @@ # FileIdentifier: gov.noaa.ngdc.mgg.dem:yaquina_bay_p210_30m collection_uuid = '3ee5976e-789a-41d5-9cae-d51e7b92a247' content_dict = {'discovery': {'title': 'My Extra New Title!', - 'fileIdentifier': 'gov.noaa.osim2.mgg.dem:yaquina_bay_p210_30m' + 'fileIdentifier': 'gov.noaa.osim2.mgg.dem:yaquina_bay_p210_30m', + "links": [ + { + "linkFunction": "download", "linkName": "Amazon S3", "linkProtocol": "HTTPS", + "linkUrl": "https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2" + } + ] } } # method one of POST, PUT, PATCH, DELETE method = 'POST' #Update - kafka_publisher = KafkaPublisher(conf_loc) + kafka_publisher = KafkaPublisher(**conf) metadata_producer = kafka_publisher.connect() kafka_publisher.publish_collection(metadata_producer, collection_uuid, content_dict, method) diff --git a/scripts/launch_pyconsumer.py b/scripts/launch_pyconsumer.py index 7850f38..5d22317 100644 --- a/scripts/launch_pyconsumer.py +++ b/scripts/launch_pyconsumer.py @@ -1,13 +1,18 @@ +import argparse import os import yaml +import json from onestop.util.SqsConsumer import SqsConsumer from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.WebPublisher import WebPublisher +from onestop.util.ClientLogger import ClientLogger +from onestop.schemas.util.jsonEncoder import EnumEncoder +config_dict = {} -def handler(recs): +def handler(recs, log_level): ''' Processes metadata information from sqs message triggered by S3 event and uploads to registry through web publisher (https). Utilizes helm for credentials and conf. @@ -17,77 +22,83 @@ def handler(recs): :return: str IM registry response ''' - print("Handling message...") + logger = ClientLogger.get_logger('launch_pyconsumer.handler', log_level, False) + logger.info('In Handler') # Now get boto client for object-uuid retrieval object_uuid = None if recs is None: - print("No records retrieved") - else: - rec = recs[0] - bucket = rec['s3']['bucket']['name'] - s3_key = rec['s3']['object']['key'] + logger.info('No records retrieved, doing nothing.') + return + + rec = recs[0] + bucket = rec['s3']['bucket']['name'] + s3_key = rec['s3']['object']['key'] - # Fetch the object to get the uuid - object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) + # Fetch the object to get the uuid + logger.info("Getting uuid") + s3_resource = s3_utils.connect('resource', 's3', None) + object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) - if object_uuid is not None: - print("Retrieved object-uuid: " + object_uuid) - else: - print("Adding uuid") - s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) + if object_uuid is not None: + logger.info('Retrieved object-uuid: %s'% object_uuid) + else: + logger.info('UUID not found, adding uuid to bucket=%s key=%s'%(bucket, s3_key)) + s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) # Convert s3 message to IM message - s3ma = S3MessageAdapter(conf_loc, s3_utils) - json_payload = s3ma.transform(recs) + s3ma = S3MessageAdapter(**config_dict) + im_message = s3ma.transform(recs) + logger.debug('S3MessageAdapter.transform: %s'%im_message) + json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder) + logger.debug('S3MessageAdapter.transform.json dump: %s'%json_payload) #Send the message to Onestop - wp = WebPublisher(conf_loc, cred_loc) - registry_response = wp.publish_registry("granule", object_uuid, json_payload.serialize(), "POST") - print("RESPONSE: ") - print(registry_response.json()) + wp = WebPublisher(**config_dict) + registry_response = wp.publish_registry("granule", object_uuid, json_payload, "POST") + logger.debug('publish_registry response: %s'%registry_response.json()) if __name__ == '__main__': - conf_loc = "/etc/config/config.yml" - cred_loc = "creds.yml" - with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "creds.yml"))) as f: - cred = yaml.load(f, Loader=yaml.FullLoader) - with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "/etc/config/config.yml"))) as f: - conf = yaml.load(f, Loader=yaml.FullLoader) - - registry_user = os.environ.get("REGISTRY_USERNAME") - registry_pwd = os.environ.get("REGISTRY_PASSWORD") - access_key = os.environ.get("ACCESS_KEY") - access_secret = os.environ.get("SECRET_KEY") - - f = open(cred_loc, "w+") - -#write creds to a file to avoid changing the python library - s = """sandbox: - access_key: {key} - secret_key: {secret} - -registry: - username: {user} - password: {pw} - """.format(key=access_key, secret=access_secret, user=registry_user, pw=registry_pwd) - f.write(s) - f.close() - r = open(cred_loc, "r") - - # # Receive s3 message and MVM from SQS queue - s3_utils = S3Utils(cred['sandbox']['access_key'], - cred['sandbox']['secret_key'], - "DEBUG") - sqs_max_polls = conf['sqs_max_polls'] - sqs_consumer = SqsConsumer(conf_loc, cred_loc) - queue = sqs_consumer.connect() - - try: - debug = False - # # Pass in the handler method - sqs_consumer.receive_messages(queue, sqs_max_polls, handler) - - except Exception as e: - print("Message queue consumption failed: {}".format(e)) + parser = argparse.ArgumentParser(description="Launches e2e test") + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', + help="AWS config filepath") + parser.add_argument('-cred', dest="cred", required=True, + help="Credentials filepath") + args = vars(parser.parse_args()) + + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + + s3_utils = S3Utils(**config_dict) + + # Receive s3 message and MVM from SQS queue + sqs_consumer = SqsConsumer(**config_dict) + sqs_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region']) + queue = sqs_consumer.connect(sqs_resource, config_dict['sqs_name']) + sqs_consumer.receive_messages(queue, config_dict['sqs_max_polls'], handler) diff --git a/scripts/sme/sme.py b/scripts/sme/sme.py index 6509aa3..12f7859 100644 --- a/scripts/sme/sme.py +++ b/scripts/sme/sme.py @@ -1,6 +1,7 @@ -import argparse import json import os +import yaml +import argparse from onestop.extract.CsbExtractor import CsbExtractor from onestop.KafkaConsumer import KafkaConsumer @@ -9,13 +10,16 @@ from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.point import Point from onestop.schemas.geojsonSchemaClasses.point_type import PointType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord -from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm +#from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.temporal_bounding import TemporalBounding from onestop.schemas.util.jsonEncoder import EnumEncoder, as_enum, EnumEncoderValue from onestop.KafkaPublisher import KafkaPublisher -from spatial import script_generation, postgres_insert +#from spatial import script_generation, postgres_insert +from onestop.util.ClientLogger import ClientLogger -def handler(key, value): +config_dict = {} + +def handler(key, value, log_level = 'INFO'): ''' Consumes message from psi-input-unknown, extracts geospatial data, uploads new payload to parsed-record topic in kafka, and uploads geospatial data to Postgres @@ -27,99 +31,120 @@ def handler(key, value): :return: str returns response message from kafka ''' - print('Key:', key) - print('Value: ' ,value) # Grabs the contents of the message and turns the dict string into a dictionary using json.loads - try: - content_dict = json.loads(value['content'], object_hook=as_enum) - - parsed_record = ParsedRecord().from_dict(content_dict) - - # Geospatial Extraction - # Extract the bucket key for csb_extractor object initialization - bucket_key = content_dict['discovery']['links'][0]['linkUrl'].split('.com/')[1] - - csb_extractor = CsbExtractor(su, bucket_key) - if csb_extractor.is_csv(bucket_key): - geospatial = csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') - begin_date, end_date = geospatial['temporal'][0], geospatial['temporal'][1] - max_lon, max_lat, min_lon, min_lat = geospatial['geospatial'][2], geospatial['geospatial'][3], \ - geospatial['geospatial'][0], geospatial['geospatial'][1] - coords = csb_extractor.extract_coords(max_lon, max_lat, min_lon, min_lat) - - # Create spatial bounding types based on the given coords - pointType = PointType('Point') - point = Point(coordinates=coords[0], type=pointType) - - # Create temp bounding obj - tempBounding = TemporalBounding(beginDate=begin_date, endDate=end_date) - - # Update parsed record object with geospatial data - parsed_record.discovery.temporalBounding = tempBounding - parsed_record.discovery.spatialBounding = point - - """ - # Insert data into postgres - script = script_generation(coords[0], key) - postgres_insert(script) - """ - - # update content dict - parsed_record.type = value['type'] - content_dict = parsed_record.to_dict() - # reformat Relationship field - relationship_type = content_dict['relationships'][0]['type']['type'] - content_dict['relationships'][0]['type'] = relationship_type - - # reformat File Locations - filelocation_type = content_dict['fileLocations']['type']['type'] - content_dict['fileLocations']['type'] = filelocation_type - + logger = ClientLogger.get_logger('sme.handler', log_level, False) + logger.info('In Handler') + # This is an example for testing purposes. + value = { + "type": "granule", + "content": "{ \"discovery\": {\n \"fileIdentifier\": \"92ade5dc-946d-11ea-abe4-0242ac120004\",\n \"links\": [\n {\n \"linkFunction\": \"download\",\n \"linkName\": \"Amazon S3\",\n \"linkProtocol\": \"HTTPS\",\n \"linkUrl\": \"https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n }\n ],\n \"parentIdentifier\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"spatialBounding\": null,\n \"temporalBounding\": {\n \"beginDate\": \"2020-05-12T16:20:15.158Z\", \n \"endDate\": \"2020-05-12T16:21:51.494Z\"\n },\n \"title\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"fileInformation\": {\n \"checksums\": [{\"algorithm\": \"MD5\",\"value\": \"44d2452e8bc2c8013e9c673086fbab7a\"}]\n, \"optionalAttributes\":{}, \"format\": \"HSD\",\n \"name\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\",\n \"size\": 208918\n },\n \"fileLocations\": {\n \"optionalAttributes\":{}, \"uri\":\"//nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\", \"asynchronous\": false,\n \"deleted\": false,\n \"lastModified\": 1589300890000,\n \"locality\": \"us-east-1\",\n \"restricted\": false,\n \"serviceType\": \"Amazon:AWS:S3\",\n \"type\": {\"type\":\"ACCESS\"},\n \"uri\": \"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"relationships\": [\n {\n \"id\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"type\": {\"type\":\"COLLECTION\"}\n }\n ]\n }", + "contentType": "application/json", + "method": "PUT", + "source": "unknown", + "operation": "ADD" + } + logger.debug('content: %s'%value['content']) + + content_dict = json.loads(value['content'], object_hook=as_enum) + logger.debug('content_dict: %s'%content_dict) + parsed_record = ParsedRecord().from_dict(content_dict) + + # Geospatial Extraction + bucket_key = content_dict['discovery']['links'][0]['linkUrl'].split('.com/')[1] + logger.info("Bucket key="+bucket_key) + if CsbExtractor.is_csv(bucket_key): + logger.info('Extracting geospatial information') + sm_open_file = su.get_csv_s3(su.connect("session", None), config_dict['s3_bucket'], bucket_key) + geospatial = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME') + begin_date, end_date = geospatial['temporal'][0], geospatial['temporal'][1] + max_lon, max_lat, min_lon, min_lat = geospatial['geospatial'][2], geospatial['geospatial'][3], \ + geospatial['geospatial'][0], geospatial['geospatial'][1] + coords = CsbExtractor.extract_coords(sm_open_file, max_lon, max_lat, min_lon, min_lat) + + # Create spatial bounding types based on the given coords + pointType = PointType('Point') + point = Point(coordinates=coords[0], type=pointType) content_dict['discovery']['spatialBounding']['type'] = pointType.value - # Transform content_dict to appropiate payload - # cls=EnumEncoderValue argument looks for instances of Enum classes and extracts only the value of the Enum - content_dict = json.dumps(content_dict, cls=EnumEncoderValue) - content_dict = json.loads(content_dict) - - # Produce new information to kafka - kafka_publisher = KafkaPublisher("scripts/config/kafka-publisher-config-dev.yml") - metadata_producer = kafka_publisher.connect() - collection_id = parsed_record.relationships[0].id - kafka_publisher.publish_granule(metadata_producer, collection_id, collection_id, content_dict) - - except: - print('Invalid Format') - + # Create temp bounding obj + tempBounding = TemporalBounding(beginDate=begin_date, endDate=end_date) + + # Update parsed record object with geospatial data + parsed_record.discovery.temporalBounding = tempBounding + parsed_record.discovery.spatialBounding = point + + """ + # Insert data into postgres + script = script_generation(coords[0], key) + postgres_insert(script) + """ + else: + logger.info('Record not CSV - Skipping extracting geospatial information') + + # update content dict + parsed_record.type = value['type'] + content_dict = parsed_record.to_dict() + # reformat Relationship field + relationship_type = content_dict['relationships'][0]['type']['type'] + content_dict['relationships'][0]['type'] = relationship_type + + # reformat File Locations + filelocation_type = content_dict['fileLocations']['type']['type'] + content_dict['fileLocations']['type'] = filelocation_type + + # Transform content_dict to appropiate payload + # cls=EnumEncoderValue argument looks for instances of Enum classes and extracts only the value of the Enum + content_dict = json.dumps(content_dict, cls=EnumEncoderValue) + content_dict = json.loads(content_dict) + + # Produce new information to publish to kafka, TODO: Be wary of cyclical publish/consuming here, since the consumer calls this handler. + kafka_publisher = KafkaPublisher(**config_dict) + metadata_producer = kafka_publisher.connect() + collection_id = parsed_record.relationships[0].id + kafka_publisher.publish_granule(metadata_producer, collection_id, content_dict) if __name__ == '__main__': - # This is where helm will mount the config - conf_loc = "/etc/config/config.yml" - # this is where we are about to write the cred yaml - cred_loc = "creds.yml" - - registry_user = os.environ.get("REGISTRY_USERNAME") - registry_pwd = os.environ.get("REGISTRY_PASSWORD") - access_key = os.environ.get("ACCESS_KEY") - access_secret = os.environ.get("SECRET_KEY") - - f = open(cred_loc, "w+") - - # TODO revisit this when we make a standard that all scripts will follow - # write creds to a file to avoid changing the python library - s = """ - sandbox: - access_key: {key} - secret_key: {secret} - registry: - username: {user} - password: {pw} - """.format(key=access_key, secret=access_secret, user=registry_user, pw=registry_pwd) - f.write(s) - f.close() - r = open(cred_loc, "r") - - su = S3Utils(conf_loc, cred_loc) - kafka_consumer = KafkaConsumer(conf_loc) + # Example command: python3 sme.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + # python3 archive_client_integration.py -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + parser = argparse.ArgumentParser(description="Launches sme test") + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', + help="AWS config filepath") + parser.add_argument('-cred', dest="cred", required=True, + help="Credentials filepath") + args = vars(parser.parse_args()) + + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + + su = S3Utils(**config_dict) + + kafka_consumer = KafkaConsumer(**config_dict) metadata_consumer = kafka_consumer.connect() - kafka_consumer.consume(metadata_consumer, lambda k, v: handler(k, v)) \ No newline at end of file +# handler('', '', config_dict['log_level']) # For testing purposes + kafka_consumer.consume(metadata_consumer, handler) \ No newline at end of file diff --git a/scripts/sme/smeFunc.py b/scripts/sme/smeFunc.py index 084e15b..f07c7b6 100644 --- a/scripts/sme/smeFunc.py +++ b/scripts/sme/smeFunc.py @@ -1,8 +1,11 @@ +import yaml import argparse -import json from onestop.KafkaConsumer import KafkaConsumer +from onestop.util.ClientLogger import ClientLogger -def handler(key,value): +config_dict = {} + +def handler(key, value, log_level = 'INFO'): ''' Prints key, value pair of items in topic @@ -13,9 +16,10 @@ def handler(key,value): :return: None ''' - print(key) - print('VALUE-------') - print(value) + logger = ClientLogger.get_logger('smeFunc.handler', log_level, False) + logger.info('In Handler') + logger.info('key=%s value=%s'%(key, value)) + """ if (value['type'] == 'collection' or not bool(value['fileInformation'])): print(value['discovery']['fileIdentifier']) @@ -25,51 +29,20 @@ def handler(key,value): if __name__ == '__main__': - - kafka_consumer = KafkaConsumer("scripts/config/kafka-publisher-config-dev.yml") + # Example command: python3 smeFunc.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml + # python3 smeFunc.py + parser = argparse.ArgumentParser(description="Launches smeFunc test") + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', + help="AWS config filepath") + args = vars(parser.parse_args()) + + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + kafka_consumer = KafkaConsumer(**config_dict) kafka_consumer.granule_topic_consume = 'psi-granule-parsed' metadata_consumer = kafka_consumer.connect() - kafka_consumer.consume(metadata_consumer, lambda k, v: handler(k, v)) - """ - parser = argparse.ArgumentParser(description="Allows smeFunc to produce or consume messagges from kafkda topics") - parser.add_argument('-cmd', dest="command", required=True, - help="Command (produce/consume)") - - parser.add_argument('-b', dest="bootstrap.servers", required=True, - help="Bootstrap broker(s) (host[:port])") - parser.add_argument('-s', dest="schema.registry.url", required=True, - help="Schema Registry (http(s)://host[:port]") - parser.add_argument('-t', dest="topic", required=True, - help="Topic name") - parser.add_argument('-g', dest="group.id", required=False, - help="Consumer group") - parser.add_argument('-o', dest="auto.offset.reset", required=False, - help="offset") - - config = vars(parser.parse_args()) - topic = config.pop('topic') - cmd = config.pop('command') - - if (cmd=="consume"): - consume(config, topic, lambda k, v: handler(k, v)) - - - if (cmd=="produce"): - - #Example content - value = { - "type": "collection", - "content": "Update!", - "contentType": "application/json", - "method": "PUT", - "source": "unknown", - "operation": "ADD" - } - - key = "3ee5976e-789a-41d5-9cae-d51e7b92a247" - - data = {key: value} - - produce(config, topic, data) - """ - + kafka_consumer.consume(metadata_consumer, handler) diff --git a/scripts/sqs-to-registry/s3_notification_handler.py b/scripts/sqs-to-registry/s3_notification_handler.py index 2b26ab5..c2785a0 100644 --- a/scripts/sqs-to-registry/s3_notification_handler.py +++ b/scripts/sqs-to-registry/s3_notification_handler.py @@ -1,55 +1,87 @@ import os import yaml +import json + from onestop.util.SqsConsumer import SqsConsumer from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.WebPublisher import WebPublisher from onestop.util.SqsHandlers import create_delete_handler from onestop.util.SqsHandlers import create_upload_handler +from onestop.util.ClientLogger import ClientLogger -from datetime import date import argparse +config_dict = {} + +test_message = { + "Type": "Notification", + "MessageId": "e12f0129-0236-529c-aeed-5978d181e92a", + "TopicArn": "arn:aws:sns:" + config_dict['s3_region'] + ":798276211865:cloud-archive-client-sns", + "Subject": "Amazon S3 Notification", + "Message": '''{ + "Records": [{ + "eventVersion": "2.1", "eventSource": "aws:s3", "awsRegion": "''' + config_dict['s3_region'] + '''", + "eventTime": "2020-12-14T20:56:08.725Z", + "eventName": "ObjectRemoved:Delete", + "userIdentity": {"principalId": "AX8TWPQYA8JEM"}, + "requestParameters": {"sourceIPAddress": "65.113.158.185"}, + "responseElements": {"x-amz-request-id": "D8059E6A1D53597A", + "x-amz-id-2": "7DZF7MAaHztZqVMKlsK45Ogrto0945RzXSkMnmArxNCZ+4/jmXeUn9JM1NWOMeKK093vW8g5Cj5KMutID+4R3W1Rx3XDZOio"}, + "s3": { + "s3SchemaVersion": "1.0", "configurationId": "archive-testing-demo-event", + "bucket": {"name": "''' + config_dict['s3_bucket'] + '''", + "ownerIdentity": {"principalId": "AX8TWPQYA8JEM"}, + "arn": "arn:aws:s3:::''' + config_dict['s3_bucket'] + '''"}, + "object": {"key": "123", + "sequencer": "005FD7D1765F04D8BE", + "eTag": "44d2452e8bc2c8013e9c673086fbab7a", + "size": 1385, + "versionId": "q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf"} + } + }] + }''', + "Timestamp": "2020-12-14T20:56:23.786Z", + "SignatureVersion": "1", + "Signature": "MB5P0H5R5q3zOFoo05lpL4YuZ5TJy+f2c026wBWBsQ7mbNQiVxAy4VbbK0U1N3YQwOslq5ImVjMpf26t1+zY1hoHoALfvHY9wPtc8RNlYqmupCaZgtwEl3MYQz2pHIXbcma4rt2oh+vp/n+viARCToupyysEWTvw9a9k9AZRuHhTt8NKe4gpphG0s3/C1FdvrpQUvxoSGVizkaX93clU+hAFsB7V+yTlbKP+SNAqP/PaLtai6aPY9Lb8reO2ZjucOl7EgF5IhBVT43HhjBBj4JqYBNbMPcId5vMfBX8qI8ANIVlGGCIjGo1fpU0ROxSHsltuRjkmErpxUEe3YJJM3Q==", + "SigningCertURL": "https://sns.us-east-2.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem", + "UnsubscribeURL": "https://sns.us-east-2.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-2:798276211865:cloud-archive-client-sns:461222e7-0abf-40c6-acf7-4825cef65cce" +} + +def handler(recs, log_level): + logger = ClientLogger.get_logger('s3_notification_handler.handler', log_level, False) + logger.info('In Handler') -def handler(recs): - print("Handling message...") + if recs is None: + logger.info('No records retrieved, doing nothing.') + return - # Now get boto client for object-uuid retrieval - object_uuid = None + rec = recs[0] + logger.info('Record:%s'%rec) - if recs is None: - print("No records retrieved" + date.today()) + if 'ObjectRemoved' in rec['eventName']: + delete_handler(recs) else: - rec = recs[0] - print(rec) - if 'ObjectRemoved' in rec['eventName']: - print("SME - calling delete handler") - print(rec['eventName']) - delete_handler(recs) - else: - print("SME - calling upload handler") - upload_handler(recs) - #copy_handler(recs) - + upload_handler(recs) if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="Launch SQS to Registry consumer") - parser.add_argument('-conf', dest="conf", required=False, - help="Config filepath") - - parser.add_argument('-cred', dest="cred", required=False, + # Example command: python3 archive_client_integration.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + # python3 archive_client_integration.py -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + parser = argparse.ArgumentParser(description="Launches archive client integration") + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', + help="AWS config filepath") + parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") - args = vars(parser.parse_args()) - cred_loc = args.pop('cred') - #credentials from either file or env - registry_username = None - registry_password = None - access_key = None - access_secret = None + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') if cred_loc is not None: with open(cred_loc) as f: creds = yaml.load(f, Loader=yaml.FullLoader) @@ -64,60 +96,34 @@ def handler(recs): access_key = os.environ.get("ACCESS_KEY") access_secret = os.environ.get("SECRET_KEY") - # default config location mounted in pod - if args.pop('conf') is None: - conf_loc = "/etc/config/config.yml" - else: - conf_loc = args.pop('conf') - - conf = None - with open(conf_loc) as f: - conf = yaml.load(f, Loader=yaml.FullLoader) - - #TODO organize the config - #System - log_level = conf['log_level'] - sqs_max_polls = conf['sqs_max_polls'] - - #Destination - registry_base_url = conf['registry_base_url'] - onestop_base_url = conf['onestop_base_url'] - - #Source - access_bucket = conf['access_bucket'] - sqs_url = conf['sqs_url'] - s3_region = conf['s3_region'] - s3_bucket2 = conf['s3_bucket2'] - s3_region2 = conf['s3_region2'] - - - #Onestop related - prefix_map = conf['prefixMap'] - file_id_prefix = conf['file_identifier_prefix'] - file_format = conf['format'] - headers = conf['headers'] - type = conf['type'] + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + sqs_consumer = SqsConsumer(**config_dict) - sqs_consumer = SqsConsumer(access_key, access_secret, s3_region, sqs_url, log_level) + wp = WebPublisher(**config_dict) - wp = WebPublisher(registry_base_url=registry_base_url, username=registry_username, password=registry_password, - onestop_base_url=onestop_base_url, log_level=log_level) + s3_utils = S3Utils(**config_dict) - s3_utils = S3Utils(access_key, access_secret, log_level) - s3ma = S3MessageAdapter(access_bucket, prefix_map, format, headers, type, file_id_prefix, log_level) + s3ma = S3MessageAdapter(**config_dict) delete_handler = create_delete_handler(wp) upload_handler = create_upload_handler(wp, s3_utils, s3ma) - queue = sqs_consumer.connect() + s3_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region']) + queue = sqs_consumer.connect(s3_resource, config_dict['sqs_name']) - try: - debug = False - # # Pass in the handler method - #Hack to make this stay up forever - #TODO add feature to client library for polling indefinitely - while True: - sqs_consumer.receive_messages(queue, sqs_max_polls, handler) + # Send a test message +# sqs_client = s3_utils.connect('client', 'sqs' , config_dict['s3_region']) +# sqs_client.send_message( +# QueueUrl='https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs', +# MessageBody=json.dumps(test_message) +# ) - except Exception as e: - print("Message queue consumption failed: {}".format(e)) + #Hack to make this stay up forever + #TODO add feature to client library for polling indefinitely + while True: + sqs_consumer.receive_messages(queue, config_dict['sqs_max_polls'], handler) From 0fe8853797cdd06987e59c3d5b5b40928c730b1e Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 9 Jun 2021 10:04:23 -0600 Subject: [PATCH 089/100] 1507 - Changed warning about extra constructor arguments to debug statement since no harm in extra params. --- onestop-python-client/onestop/KafkaConsumer.py | 2 +- onestop-python-client/onestop/KafkaPublisher.py | 2 +- onestop-python-client/onestop/WebPublisher.py | 2 +- onestop-python-client/onestop/util/S3MessageAdapter.py | 2 +- onestop-python-client/onestop/util/S3Utils.py | 2 +- onestop-python-client/onestop/util/SqsConsumer.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 18a84cf..80cacb6 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -102,7 +102,7 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.warning("There were extra constructor arguments: " + str(wildargs)) + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) def register_client(self): """ diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index 0ca40d0..3144ff0 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -97,7 +97,7 @@ def __init__(self, metadata_type, brokers, schema_registry, security, collection self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.warning("There were extra constructor arguments: " + str(wildargs)) + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) def connect(self): """ diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index 7b1c6bd..47c3bd3 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -42,7 +42,7 @@ def __init__(self, registry_base_url, registry_username, registry_password, ones self.logger.info("Initializing " + self.__class__.__name__) if kwargs: - self.logger.warning("There were extra constructor arguments: " + str(kwargs)) + self.logger.debug("Superfluous parameters in constructor call: " + str(kwargs)) def publish_registry(self, metadata_type, uuid, payload, method): """ diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index 6bd832d..0f6d020 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -59,7 +59,7 @@ def __init__(self, access_bucket, type, file_id_prefix, collection_id, log_leve self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.warning("There were extra constructor arguments: " + str(wildargs)) + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) def transform(self, recs): """ diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index 24a81c3..e654df9 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -76,7 +76,7 @@ def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.warning("There were extra constructor arguments: " + str(wildargs)) + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) def connect(self, type, service_name, region): """ diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index d784734..4f2b6ac 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -30,7 +30,7 @@ def __init__(self, log_level = 'INFO', **wildargs): self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.warning("There were extra constructor arguments: " + str(wildargs)) + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) def connect(self, sqs_resource, sqs_queue_name): """ From bb03a987c9494621a6afd814d596341262e53ff2 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 10 Jun 2021 13:32:23 -0600 Subject: [PATCH 090/100] 1507-Change all init parameters from metadata_type to their classname_metadata_type, including changing type var used in S3MessageAdapter. --- helm/onestop-sqs-consumer/values.yaml | 6 +++-- helm/sme-chart/values.yaml | 7 +++-- kubernetes/pyconsumer-pod.yaml | 10 ++++--- .../csb-data-stream-config-template.yml | 6 ++++- .../onestop/KafkaConsumer.py | 6 ++--- .../onestop/KafkaPublisher.py | 6 ++--- .../onestop/util/S3MessageAdapter.py | 8 +++--- .../test/unit/test_KafkaConsumer.py | 18 ++++++------- .../test/unit/test_KafkaPublisher.py | 18 ++++++------- .../test/unit/test_SqsHandlers.py | 2 +- .../test/unit/util/test_S3MessageAdapter.py | 26 ++++++++++++------- .../test/unit/util/test_S3Utils.py | 2 +- .../test/unit/util/test_SqsConsumer.py | 2 +- scripts/config/csb-data-stream-config.yml | 6 ++++- scripts/sqs-to-registry/config/e2e.yml | 6 +++-- 15 files changed, 77 insertions(+), 52 deletions(-) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index bc0f8fb..f5a24fb 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -70,14 +70,16 @@ config: |- #CSB stream config format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER - type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_id_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE - metadata_type: GRANULE + kafka_consumer_metadata_type: GRANULE + kafka_publisher_metadata_type: GRANULE + s3_message_adapter_metadata_type: COLLECTION + registry_base_url: http://os-registry:80 onestop_base_url: http://os-search:8080 diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index 3fc6922..6016adc 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -27,14 +27,17 @@ config: |- #CSB stream config format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER - type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_id_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE - metadata_type: GRANULE + kafka_consumer_metadata_type: GRANULE + kafka_producer_metadata_type: GRANULE + web_publisher_metadata_type: GRANULE + s3_message_adapter_metadata_type: COLLECTION + registry_base_url: http://os-registry:80 onestop_base_url: http://os-search:8080 diff --git a/kubernetes/pyconsumer-pod.yaml b/kubernetes/pyconsumer-pod.yaml index 6943403..e6ac5c5 100644 --- a/kubernetes/pyconsumer-pod.yaml +++ b/kubernetes/pyconsumer-pod.yaml @@ -70,7 +70,6 @@ data: csb: format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER - type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 registry_base_url: https://cedardevs.org/ access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com @@ -79,10 +78,13 @@ data: # Web Publisher web: - # COLLECTION or GRANULE - metadata_type: granule registry_base_url: https://cedardevs.org/onestop/registry-api onestop_base_url: https://cedardevs.org/onestop/search-api security: - enabled: True \ No newline at end of file + enabled: True + + # COLLECTION or GRANULE + kafka_consumer_metadata_type: GRANULE + kafka_publisher_metadata_type: GRANULE + s3_message_adapter_metadata_type: COLLECTION \ No newline at end of file diff --git a/onestop-python-client/config/csb-data-stream-config-template.yml b/onestop-python-client/config/csb-data-stream-config-template.yml index 8c2d4de..07ab823 100644 --- a/onestop-python-client/config/csb-data-stream-config-template.yml +++ b/onestop-python-client/config/csb-data-stream-config-template.yml @@ -1,6 +1,10 @@ +# COLLECTION or GRANULE +kafka_consumer_metadata_type: COLLECTION +kafka_publisher_metadata_type: COLLECTION +s3_message_adapter_metadata_type: COLLECTION + format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER -type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 #registry_base_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com registry_base_url: http://localhost/onestop/api/registry diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 80cacb6..b15b529 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -51,11 +51,11 @@ class KafkaConsumer: asynchronously polls for messages in the connected topic, results vary depending on the handler function that is passed into it """ - def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_registry, security, collection_topic_consume, granule_topic_consume, log_level = 'INFO', **wildargs): + def __init__(self, kafka_consumer_metadata_type, brokers, group_id, auto_offset_reset, schema_registry, security, collection_topic_consume, granule_topic_consume, log_level = 'INFO', **wildargs): """ Attributes ---------- - metadata_type: str + kafka_consumer_metadata_type: str type of metadata (COLLECTION or GRANULE) brokers: str brokers (kubernetes service) @@ -79,7 +79,7 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r What log level to use for this class """ - self.metadata_type = metadata_type.upper() + self.metadata_type = kafka_consumer_metadata_type.upper() self.brokers = brokers self.group_id = group_id self.auto_offset_reset = auto_offset_reset diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index 3144ff0..15a5d3b 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -52,11 +52,11 @@ class KafkaPublisher: Publish granule to granule topic """ - def __init__(self, metadata_type, brokers, schema_registry, security, collection_topic_publish, granule_topic_publish, log_level='INFO', **wildargs): + def __init__(self, kafka_publisher_metadata_type, brokers, schema_registry, security, collection_topic_publish, granule_topic_publish, log_level='INFO', **wildargs): """ Attributes ---------- - metadata_type: str + kafka_publisher_metadata_type: str type of metadata (COLLECTION or GRANULE) brokers: str brokers (kubernetes service) @@ -77,7 +77,7 @@ def __init__(self, metadata_type, brokers, schema_registry, security, collection granule_topic: str granule topic you want to produce to """ - self.metadata_type = metadata_type.upper() + self.metadata_type = kafka_publisher_metadata_type.upper() self.brokers = brokers self.schema_registry = schema_registry self.security_enabled = security['enabled'] diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index 0f6d020..cb8ffe0 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -35,13 +35,13 @@ class S3MessageAdapter: transform(recs) transforms sqs message triggered by s3 event to correct format for publishing to IM registry """ - def __init__(self, access_bucket, type, file_id_prefix, collection_id, log_level = 'INFO', **wildargs): + def __init__(self, access_bucket, s3_message_adapter_metadata_type, file_id_prefix, collection_id, log_level = 'INFO', **wildargs): """ Parameters ---------- access_bucket: str access bucket to put in the links field when transformed. - type: str + s3_message_adapter_metadata_type: str COLLECTION or GRANULE file_id_prefix: str File prefix returned as fileIdentifier @@ -52,7 +52,7 @@ def __init__(self, access_bucket, type, file_id_prefix, collection_id, log_leve """ self.access_bucket = access_bucket - self.type = type + self.metadata_type = s3_message_adapter_metadata_type.upper() self.file_id_prefix = file_id_prefix self.collection_id = collection_id self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) @@ -91,7 +91,7 @@ def transform(self, recs): fileInformation = FileInformation(name=file_name, size=file_size, checksums=[checkSum], optionalAttributes={}) # Relationship - relationshipType = RelationshipType(type=self.type) + relationshipType = RelationshipType(type=self.metadata_type) relationship = Relationship(id=self.collection_id, type=relationshipType) # File Location diff --git a/onestop-python-client/test/unit/test_KafkaConsumer.py b/onestop-python-client/test/unit/test_KafkaConsumer.py index 6106738..4a5345f 100644 --- a/onestop-python-client/test/unit/test_KafkaConsumer.py +++ b/onestop-python-client/test/unit/test_KafkaConsumer.py @@ -13,7 +13,7 @@ class test_KafkaConsumer(unittest.TestCase): def setUp(cls): print("Set it up!") cls.conf_w_security = { - "metadata_type" : "GRANULE", + "kafka_consumer_metadata_type" : "GRANULE", "brokers" : "onestop-dev-cp-kafka:9092", "group_id" : "sme-test", "auto_offset_reset" : "earliest", @@ -41,7 +41,7 @@ def tearDown(self): def test_init_happy_nonconditional_params(self): consumer = KafkaConsumer(**self.conf_w_security) - self.assertEqual(consumer.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(consumer.metadata_type, self.conf_w_security['kafka_consumer_metadata_type']) self.assertEqual(consumer.brokers, self.conf_w_security['brokers']) self.assertEqual(consumer.group_id, self.conf_w_security['group_id']) self.assertEqual(consumer.auto_offset_reset, self.conf_w_security['auto_offset_reset']) @@ -67,11 +67,11 @@ def test_init_security_disabled(self): def test_init_metadata_type_valid(self): consumer = KafkaConsumer(**self.conf_w_security) - self.assertEqual(consumer.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(consumer.metadata_type, self.conf_w_security['kafka_consumer_metadata_type']) def test_init_metadata_type_invalid(self): wrong_metadata_type_config = dict(self.conf_w_security) - wrong_metadata_type_config['metadata_type'] = "invalid_type" + wrong_metadata_type_config['kafka_consumer_metadata_type'] = "invalid_type" self.assertRaises(ValueError, KafkaConsumer, **wrong_metadata_type_config) @@ -118,7 +118,7 @@ def test_register_client_wo_security(self, mock_client): @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consumer, mock_avro_deserializer): conf_w_security_collection = dict(self.conf_w_security) - conf_w_security_collection['metadata_type'] = "COLLECTION" + conf_w_security_collection['kafka_consumer_metadata_type'] = "COLLECTION" consumer = KafkaConsumer(**conf_w_security_collection) reg_client = consumer.register_client() @@ -135,7 +135,7 @@ def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consume def test_create_consumer_collection_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): conf_w_security_collection = dict(self.conf_w_security) topic = conf_w_security_collection['collection_topic_consume'] - conf_w_security_collection['metadata_type'] = 'COLLECTION' + conf_w_security_collection['kafka_consumer_metadata_type'] = 'COLLECTION' consumer = KafkaConsumer(**conf_w_security_collection) reg_client = MagicMock() @@ -166,7 +166,7 @@ def test_create_consumer_collection_w_security(self, mock_deserializing_consumer def test_create_consumer_collection_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): conf_wo_security_collection = dict(self.conf_wo_security) topic = conf_wo_security_collection['collection_topic_consume'] - conf_wo_security_collection['metadata_type'] = 'COLLECTION' + conf_wo_security_collection['kafka_consumer_metadata_type'] = 'COLLECTION' consumer = KafkaConsumer(**conf_wo_security_collection) reg_client = MagicMock() @@ -193,7 +193,7 @@ def test_create_consumer_collection_wo_security(self, mock_deserializing_consume def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): conf_w_security_granule = dict(self.conf_w_security) topic = conf_w_security_granule['granule_topic_consume'] - conf_w_security_granule['metadata_type'] = 'GRANULE' + conf_w_security_granule['kafka_consumer_metadata_type'] = 'GRANULE' consumer = KafkaConsumer(**conf_w_security_granule) reg_client = MagicMock() @@ -224,7 +224,7 @@ def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, m def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): conf_wo_security_granule = dict(self.conf_wo_security) exp_topic = conf_wo_security_granule['granule_topic_consume'] - conf_wo_security_granule['metadata_type'] = 'GRANULE' + conf_wo_security_granule['kafka_consumer_metadata_type'] = 'GRANULE' consumer = KafkaConsumer(**conf_wo_security_granule) reg_client = MagicMock() diff --git a/onestop-python-client/test/unit/test_KafkaPublisher.py b/onestop-python-client/test/unit/test_KafkaPublisher.py index f43d3f6..6357a3c 100644 --- a/onestop-python-client/test/unit/test_KafkaPublisher.py +++ b/onestop-python-client/test/unit/test_KafkaPublisher.py @@ -14,7 +14,7 @@ class test_KafkaPublisher(unittest.TestCase): def setUp(cls): print("Set it up!") cls.conf_w_security = { - "metadata_type" : "GRANULE", + "kafka_publisher_metadata_type" : "GRANULE", "brokers" : "onestop-dev-cp-kafka:9092", "schema_registry" : "http://onestop-dev-cp-schema-registry:8081", "security" : { @@ -40,7 +40,7 @@ def tearDown(self): def test_init_happy_nonconditional_params(self): publisher = KafkaPublisher(**self.conf_w_security) - self.assertEqual(publisher.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(publisher.metadata_type, self.conf_w_security['kafka_publisher_metadata_type']) self.assertEqual(publisher.brokers, self.conf_w_security['brokers']) self.assertEqual(publisher.schema_registry, self.conf_w_security['schema_registry']) self.assertEqual(publisher.security_enabled, self.conf_w_security['security']['enabled']) @@ -64,11 +64,11 @@ def test_init_security_disabled(self): def test_init_metadata_type_valid(self): publisher = KafkaPublisher(**self.conf_w_security) - self.assertEqual(publisher.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(publisher.metadata_type, self.conf_w_security['kafka_publisher_metadata_type']) def test_init_metadata_type_invalid(self): wrong_metadata_type_config = dict(self.conf_w_security) - wrong_metadata_type_config['metadata_type'] = "invalid_type" + wrong_metadata_type_config['kafka_publisher_metadata_type'] = "invalid_type" self.assertRaises(ValueError, KafkaPublisher, **wrong_metadata_type_config) @@ -115,7 +115,7 @@ def test_register_client_wo_security(self, mock_client): @patch('onestop.KafkaPublisher.SerializingProducer') def test_create_producer_calls_AvroSerializer(self, mock_serializing_publisher, mock_avro_serializer): conf_w_security_collection = dict(self.conf_w_security) - conf_w_security_collection['metadata_type'] = "COLLECTION" + conf_w_security_collection['kafka_publisher_metadata_type'] = "COLLECTION" publisher = KafkaPublisher(**conf_w_security_collection) reg_client = publisher.register_client() @@ -130,7 +130,7 @@ def test_create_producer_calls_AvroSerializer(self, mock_serializing_publisher, def test_create_producer_collection_w_security(self, mock_serializing_producer, mock_avro_serializer): conf_w_security_collection = dict(self.conf_w_security) topic = conf_w_security_collection['collection_topic_publish'] - conf_w_security_collection['metadata_type'] = 'COLLECTION' + conf_w_security_collection['kafka_publisher_metadata_type'] = 'COLLECTION' publisher = KafkaPublisher(**conf_w_security_collection) reg_client = MagicMock() @@ -157,7 +157,7 @@ def test_create_producer_collection_w_security(self, mock_serializing_producer, def test_create_producer_collection_wo_security(self, mock_serializing_producer, mock_avro_serializer): conf_wo_security_collection = dict(self.conf_wo_security) topic = conf_wo_security_collection['collection_topic_publish'] - conf_wo_security_collection['metadata_type'] = 'COLLECTION' + conf_wo_security_collection['kafka_publisher_metadata_type'] = 'COLLECTION' publisher = KafkaPublisher(**conf_wo_security_collection) reg_client = MagicMock() @@ -180,7 +180,7 @@ def test_create_producer_collection_wo_security(self, mock_serializing_producer, def test_create_producer_granule_w_security(self, mock_serializing_producer, mock_avro_serializer): conf_w_security_granule = dict(self.conf_w_security) topic = conf_w_security_granule['granule_topic_publish'] - conf_w_security_granule['metadata_type'] = 'GRANULE' + conf_w_security_granule['kafka_publisher_metadata_type'] = 'GRANULE' publisher = KafkaPublisher(**conf_w_security_granule) reg_client = MagicMock() @@ -207,7 +207,7 @@ def test_create_producer_granule_w_security(self, mock_serializing_producer, moc def test_create_producer_granule_wo_security(self, mock_serializing_producer, mock_avro_serializer): conf_wo_security_granule = dict(self.conf_wo_security) exp_topic = conf_wo_security_granule['granule_topic_publish'] - conf_wo_security_granule['metadata_type'] = 'GRANULE' + conf_wo_security_granule['kafka_publisher_metadata_type'] = 'GRANULE' publisher = KafkaPublisher(**conf_wo_security_granule) reg_client = MagicMock() diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py index 5bba184..231e6cf 100644 --- a/onestop-python-client/test/unit/test_SqsHandlers.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -22,7 +22,7 @@ def setUp(self): 'access_key': 'test_access_key', 'secret_key': 'test_secret_key', 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', - 'type': 'COLLECTION', + 's3_message_adapter_metadata_type': 'COLLECTION', 'file_id_prefix': 'gov.noaa.ncei.csb:', 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', 'registry_base_url': 'http://localhost/onestop/api/registry', diff --git a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py index 925be2e..4eb277f 100644 --- a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py +++ b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py @@ -6,6 +6,7 @@ class S3MessageAdapterTest(unittest.TestCase): s3ma = None + config_dict = None recs1 = \ [{ @@ -52,24 +53,34 @@ class S3MessageAdapterTest(unittest.TestCase): def setUp(self): print("Set it up!") - config_dict = { + self.config_dict = { 'access_key': 'test_access_key', 'secret_key': 'test_secret_key', 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', - 'type': 'COLLECTION', + 's3_message_adapter_metadata_type': 'COLLECTION', 'file_id_prefix': 'gov.noaa.ncei.csb:', 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', 'log_level': 'DEBUG' } - self.s3_utils = S3Utils(**config_dict) - self.s3ma = S3MessageAdapter(**config_dict) + self.s3_utils = S3Utils(**self.config_dict) + self.s3ma = S3MessageAdapter(**self.config_dict) self.region = 'us-east-2' def tearDown(self): print("Tear it down!") + def test_metadata_type_lowercase(self): + metadata_type = 'collection' + uppercase_metadata_type = metadata_type.upper() + config = dict(self.config_dict) + config['s3_message_adapter_metadata_type'] = metadata_type + + s3MA = S3MessageAdapter(**config) + + self.assertEqual(uppercase_metadata_type, s3MA.metadata_type) + @mock_s3 def test_transform(self): s3 = self.s3_utils.connect('client', 's3', self.region) @@ -90,11 +101,8 @@ def test_transform(self): @mock_s3 def test_extra_parameters_constructor(self): - testParams = {"access_bucket": "blah1", - "type": "blah2", - "file_id_prefix": "blah3", - "collection_id": "blah4", - "extra": "extra value"} + testParams = dict(self.config_dict) + testParams['extra'] = 'extra value' self.assertRaises(Exception, S3MessageAdapter(**testParams)) if __name__ == '__main__': diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py index 6508837..830a1d8 100644 --- a/onestop-python-client/test/unit/util/test_S3Utils.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -21,7 +21,7 @@ def setUp(self): 'access_key': 'test_access_key', 'secret_key': 'test_secret_key', 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', - 'type': 'COLLECTION', + 'metadata_type': 'COLLECTION', 'file_id_prefix': 'gov.noaa.ncei.csb:', 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', 'log_level': 'DEBUG' diff --git a/onestop-python-client/test/unit/util/test_SqsConsumer.py b/onestop-python-client/test/unit/util/test_SqsConsumer.py index ef50b20..03ee897 100644 --- a/onestop-python-client/test/unit/util/test_SqsConsumer.py +++ b/onestop-python-client/test/unit/util/test_SqsConsumer.py @@ -13,7 +13,7 @@ class SqsConsumerTest(unittest.TestCase): 's3_region': 'us-east-2', 's3_bucket': 'archive-testing-demo', 'sqs_url': 'https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs', - 'type': 'COLLECTION', + 'metadata_type': 'COLLECTION', 'file_id_prefix': 'gov.noaa.ncei.csb:', 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', 'registry_base_url': 'http://localhost/onestop/api/registry', diff --git a/scripts/config/csb-data-stream-config.yml b/scripts/config/csb-data-stream-config.yml index 2d25328..f110852 100644 --- a/scripts/config/csb-data-stream-config.yml +++ b/scripts/config/csb-data-stream-config.yml @@ -1,6 +1,10 @@ +# COLLECTION or GRANULE +kafka_consumer_metadata_type: COLLECTION +kafka_publisher_metadata_type: COLLECTION +s3_message_adapter_metadata_type: COLLECTION + format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER -type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 registry_base_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com diff --git a/scripts/sqs-to-registry/config/e2e.yml b/scripts/sqs-to-registry/config/e2e.yml index 4c2c800..a2bdcfc 100644 --- a/scripts/sqs-to-registry/config/e2e.yml +++ b/scripts/sqs-to-registry/config/e2e.yml @@ -14,14 +14,16 @@ s3_bucket2: noaa-nccf-dev-archive #CSB stream config format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER -type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_identifier_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE -metadata_type: granule +kafka_consumer_metadata_type: GRANULE +kafka_publisher_metadata_type: GRANULE +s3_message_adapter_metadata_type: COLLECTION + registry_base_url: http://onestop-registry:80 onestop_base_url: http://onestop-search:8080 From c34b33891daae02fb81e0a9fca5a11d38e79050f Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 11 Jun 2021 13:24:55 -0600 Subject: [PATCH 091/100] 1507-Added metadata_type validation to S3MessageAdapter and tests. --- .../onestop/util/S3MessageAdapter.py | 3 +++ .../test/unit/util/test_S3MessageAdapter.py | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index cb8ffe0..9b74bb3 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -58,6 +58,9 @@ def __init__(self, access_bucket, s3_message_adapter_metadata_type, file_id_pre self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) + if self.metadata_type not in ['COLLECTION', 'GRANULE']: + raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type)) + if wildargs: self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) diff --git a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py index 4eb277f..93dfed2 100644 --- a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py +++ b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py @@ -71,6 +71,17 @@ def setUp(self): def tearDown(self): print("Tear it down!") + def test_init_metadata_type_valid(self): + publisher = S3MessageAdapter(**self.config_dict) + + self.assertEqual(publisher.metadata_type, self.config_dict['s3_message_adapter_metadata_type']) + + def test_init_metadata_type_invalid(self): + wrong_metadata_type_config = dict(self.config_dict) + wrong_metadata_type_config['s3_message_adapter_metadata_type'] = "invalid_type" + + self.assertRaises(ValueError, S3MessageAdapter, **wrong_metadata_type_config) + def test_metadata_type_lowercase(self): metadata_type = 'collection' uppercase_metadata_type = metadata_type.upper() From 1566e1c1e9fd80873b277ba5c43e7ed18f802382 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 11 Jun 2021 14:29:49 -0600 Subject: [PATCH 092/100] 1507-Adjusted log messages in KafkaConsumer and KafkaPublisher create methods. Mostly added, removed a redundant one.Adjusted one in deliver reports from error to info. --- onestop-python-client/onestop/KafkaConsumer.py | 3 +-- onestop-python-client/onestop/KafkaPublisher.py | 7 +++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index b15b529..c064dd9 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -170,9 +170,8 @@ def create_consumer(self, registry_client): conf['ssl.key.location'] = self.security_keyLoc conf['ssl.certificate.location'] = self.security_certLoc - self.logger.debug("conf: "+str(conf)) + self.logger.debug("Deserializing conf: "+str(conf)) metadata_consumer = DeserializingConsumer(conf) - self.logger.debug("topic: "+str(topic)) metadata_consumer.subscribe([topic]) return metadata_consumer diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index 15a5d3b..da0e16d 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -144,8 +144,11 @@ def create_producer(self, registry_client): if self.metadata_type == "GRANULE": topic = self.granule_topic + self.logger.debug("topic: "+str(topic)) metadata_schema = registry_client.get_latest_version(topic + '-value').schema.schema_str + self.logger.debug("metadata_schema: "+metadata_schema) + metadata_serializer = AvroSerializer(schema_str=metadata_schema, schema_registry_client=registry_client) conf = {'bootstrap.servers': self.brokers} @@ -156,7 +159,7 @@ def create_producer(self, registry_client): conf['ssl.certificate.location'] = self.security_certLoc conf['value.serializer'] = metadata_serializer - + self.logger.debug("Serializing conf: "+str(conf)) metadata_producer = SerializingProducer(conf) return metadata_producer @@ -172,7 +175,7 @@ def delivery_report(self, err, msg): if err is not None: self.logger.error('Message delivery failed: {}'.format(err)) else: - self.logger.error('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) + self.logger.info('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) @staticmethod def get_collection_key_from_uuid(collection_uuid): From 3946bf1bf36fd5957dbab2b8cc3cec273ab65721 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 11 Jun 2021 14:30:47 -0600 Subject: [PATCH 093/100] 1507-KafkaPublisher consolidated create method's config into one spot. --- onestop-python-client/onestop/KafkaPublisher.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index da0e16d..9206fe0 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -150,7 +150,9 @@ def create_producer(self, registry_client): self.logger.debug("metadata_schema: "+metadata_schema) metadata_serializer = AvroSerializer(schema_str=metadata_schema, schema_registry_client=registry_client) - conf = {'bootstrap.servers': self.brokers} + conf = { + 'bootstrap.servers': self.brokers, + 'value.serializer': metadata_serializer} if self.security_enabled: conf['security.protocol'] = 'SSL' @@ -158,7 +160,6 @@ def create_producer(self, registry_client): conf['ssl.key.location'] = self.security_keyLoc conf['ssl.certificate.location'] = self.security_certLoc - conf['value.serializer'] = metadata_serializer self.logger.debug("Serializing conf: "+str(conf)) metadata_producer = SerializingProducer(conf) return metadata_producer From 9ce570e366506bdd74035aabfbf85bc81eb57872 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 15 Jun 2021 09:23:15 -0600 Subject: [PATCH 094/100] 1507-To the scripts readme added more information about helm, build information, and tried to organize by the automated process vs manual. Second pass to come in next story. --- scripts/README.md | 156 ++++++++++++++++++++++++++++++---------------- 1 file changed, 104 insertions(+), 52 deletions(-) diff --git a/scripts/README.md b/scripts/README.md index e0276cb..4773928 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,60 +1,112 @@ -#Scripts +# Using onestop-python-client ## Table of Contents -* [Quickstart](#quickstart) -* [Kubectl Pod Verification](#kubectl-pod-verification) -* [Load Data](#load-data) -* [Updating Containers](#updating-containers) +* [Setup](#setup) + * [Helm](#helm) + * [Use Helm to Create a Script Container](#use-helm-to-create-a-script-container) + * [Using Helm Config File](#using-helm-config-file) + * [Helm Pulling of Image](#helm-pulling-of-image) + * [Startup Helm Script Container](#startup-helm-script-container) + * [Manually Setup Environment](#manually-setup-environment) +* [Building](#building) + * [Rebuilding Code or Scripts](#rebuilding-code-or-scripts) + * [Rebuilding Containers](#rebuilding-containers) +* [Load Data into OneStop](#load-data-into-onestop) + * [onestop-test-data repository](#onestop-test-data-repositoryhttpsgithubcomcedardevsonestop-test-data) + * [osim-deployment repository](#osim-deployment-repositoryhttpsgithubcomcedardevsosim-deployment) +* [OneStop Quickstart](https://cedardevs.github.io/onestop/developer/quickstart) -This directory contains scripts that use the onestop-python-library to send data to a OneStop. - -## Quickstart -- Install conda (miniconda works). -- Restart terminal or source files to recognize conda commands. -- Create a new conda environment and activate it - - `conda create -n onestop-clients python=3` - - `conda activate onestop-clients` - - `pip install setuptools` - -- Install any libraries needed by your sme script - - Ex: `pip install PyYaml` - -- Build the latest onestop-python-client - - `pip uninstall onestop-python-client-cedardevs` - - `pip install ./onestop-python-client` (run from root of this repository) - -- Input credentials for helm in the file `helm/onestop-sqs-consumer/values.yaml` - - Then: - - `helm uninstall sme` - - `helm install sme helm/onestop-sqs-consumer` - -## Kubectl Pod Verification -- Verify onestop-client pod is running, copy the pod name. - - `kubectl get pods` - -- Exec into it - - `kubectl exec -it -- sh` where the is listed in `kubectl get pods` - -- Check logs - - `kubectl logs ` - -## Load Data -There are several repositories to aid in loading data into a OneStop. Please read the appropriate repository's readme for accurate and up to date usage information. +## Setup +To use onestop-python-client there are two options: helm or manually. + +### Helm +#### Use Helm to Create a Script Container +We use helm to pull a OneStop-Clients image (specified in `helm//values.yml`) and deploy a kubernetes container that can communicate to the configured OneStop. It also copies over the onestop-python-client and scripts directories to the container. + +Those configuration values are in this repo under `helm//values.yml`. Our helm is configured to create a configuration file in the script container at `/etc/confif/confif.yml` from the appropriate values.yml. You can use this or create your own configuration file and put it in the script container. Our scripts are configured to use the command-line parameter `conf` or will look for the helm configuration file that isn't specified. + +#### Using Helm Config File +If you are going to use the helm generated configuration file then you should probably edit the conf section in the helm values.yaml file for the container you will have helm create (Ex. 1helm/onestop-sqs-consumer/values.yaml1). + * *_metadata_type - should be granule or collection, depending on what you are sending/receiving. + * schema_registry, registry_base_url, and onestop_base_url - set to what you are communicating with, especially if not on cedar-devs talking to its OneStop. + * AWS section - there's several config values for AWS you probably need to change, many are set to testing values. + * Kafka section - There is a whole Kafka section that if you are using kafka you might need to adjust this. This isn't perhaps the most preferred way to submit to OneStop. + * log_level - If you are troubleshooting or just want to see a more granular log level set this to DEBUG. + +#### Helm Pulling of Image +When you run the helm install command helm pulls the specified image from the repository that is indicated in the helm values yaml file. + +#### Startup Helm Script Container +The helm install command, done from the root of this repository, will use the charts in the helm directory to create a container called `sme` using the helm charts and configuration information in this repo fom `helm/onestop-sqs-consumer` + * cd to the root of this repository + * `helm uninstall sme` + * `helm install sme helm/onestop-sqs-consumer` + +To check on the container run this and look for the pod with the : + +`kubectl get pods` +``` +(base) ~/repo/onestop-clients 07:00 PM$ kubectl get pods +NAME READY STATUS RESTARTS AGE +sme-onestop-sqs-consumer-5c678675f7-q2s7h 0/1 Pending 0 26s +``` +If it isn't in a 'Running' state within 10 seconds then something is probably wrong. If it hasn't crashed yet, CrashBackLoop state, then it is probably a timeout problem trying to connect to a resource. -- To load data locally you will need a OneStop running locally. This is an example of how to do that, more info in the OneStop repository. - - `skaffold dev --status-check false` - -- To load test collections from onestop-test-data repository (read the README for more information) to your local OneStop: - - `./upload.sh demo http://localhost/onestop/api/registry` - -- From the osim-deployment repository there is a staging-scripts directory with scripts for loading some data: - - `./copyS3objects.sh -max_files=5 copy-config/archive-testing-demo-csb.sh` +Once the container is running, which should only be a matter of seconds, you can "ssh" into the container via this command. -## Updating Containers -- If the onestop-python-client code changes then run: - - `docker build . -t cedardevs/onestop-python-client:latest` +NOTE: you need to have the container name listed in the `kubectl get pods` command results for this command: -- If just the scripts change - - `docker build ./scripts/sqs-to-registry -t cedardevs/onestop-s3-handler` +`kubectl exec --stdin --tty sme-onestop-sqs-consumer-5c678675f7-kmpvn -- /bin/bash` + +### Manually Setup Environment +* Install conda (miniconda works). +* Restart terminal or source files to recognize conda commands. +* Create a new conda environment and activate it (not convinced you need this) + * `conda create -n onestop-clients python=3` + * `conda activate onestop-clients` + * `pip install setuptools` + +* Install any libraries needed by your script + * Ex: `pip install PyYaml` + + `pip install ./onestop-python-client` + + To test the import, try this and it shouldn't give an error: + + ``` + $ python3 + >>> import onestop_client + ``` + +## Building +Building locally is not necessary if you are using the images that we build automatically. Currently, we build an image via docker files with the tag 'latest' when *any* commits, even branches, are made to github and trigger CircleCI. +You might want to do this is to make code changes, build them, and then run your python script against that pip installed onestop-python-client locally. + +### Rebuilding Code or Scripts +* Install the latest onestop-python-client into directory + + `pip uninstall onestop-python-client-cedardevs` + + `pip install ./onestop-python-client` (run from root of this repository) + +### Rebuilding Containers +* If the onestop-python-client code changes then run: + + `docker build . -t cedardevs/onestop-python-client:latest` + +* If just the scripts change + + `docker build ./scripts/sqs-to-registry -t cedardevs/onestop-s3-handler` + + `docker build ./scripts/sme/ -t cedardevs/onestop-sme:latest` + +## Load Data into OneStop +There are several repositories to aid in loading data into a OneStop. Please read the appropriate repository's readme for accurate and up to date usage information. +### [onestop-test-data repository](https://github.com/cedardevs/onestop-test-data) + `./upload.sh demo http://localhost/onestop/api/registry` +### [osim-deployment repository](https://github.com/cedardevs/osim-deployment) + From the osim-deployment repository there is a staging-scripts directory with scripts for loading some data: + + `./copyS3objects.sh -max_files=5 copy-config/archive-testing-demo-csb.sh` From b79a4c0aa5a5f43612b98c256546c6f298c20a1d Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 15 Jun 2021 14:44:46 -0600 Subject: [PATCH 095/100] 1509-added cedar devs image into images directory for GH pages. --- docs/images/cedar_devs_logo.png | Bin 0 -> 14294 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/images/cedar_devs_logo.png diff --git a/docs/images/cedar_devs_logo.png b/docs/images/cedar_devs_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..cd4b7fc4ead9678c5cd174d63fc583448e048603 GIT binary patch literal 14294 zcmV;{Hz~-8P)rAmfZP1PG`FxEs;Y(I5(s@}c^9MZqvIFFXYwPV^#rh!tX1!sG<1OeI4qHA<95 zr$(rC8o17&h3SoYz-Tg>O(v5G001K#1~bB7a03c~GNO=ZBZ`DH@f!ptkw7RW;z(i= zjwGcJD5}n8wgO>BG@CVMzKA2NR?1X@nwlD@e+6y@2(XS^e?3J+TsVsq zOjCO;w2@@E)LMmW^E90?kMmZ8+>8%nk^jbAUgSYBK%g<6v{Fs2Q{U@*YsC6m_s zB>247z^PAwT92~;f}oLxqFq+hDZflRbu|}iJy0^{4AMK<4vh|ixeU{ z)S8?be=kOL*MqL3+HT#c+|1l=sP#CSF=dctYnR4RrcgfblCl>;t;H4b#n3PCON1Y0 zUc6BUwH8O=_67-$2#2$#glxiv;9e0ML{p*G9t;HQ-`LGH_C6ZTuJ@#t})LQI?TN-55mQ_)NDEwz!HD?9X zTHGBN2s2Kg#e8@5VEXQ+nkKjFPTa~MlV?w61X+eIuH|$L~DIKeB>Sc z4l2}IYylFERFc%>?;373eobHX4`s};UY=&Bot#J7sBz42WWxmsVzvvWn zL_w{^5+VqZXV3rV!e)U$UG)nKuW;spaG-l}aPjnnr?48szzRw$C+27_zx?&Sa>ySW1w0kf`x; z^Q_X+Qpw}*b#!#-<|NF`m!c)x^hUkQ_v()TlbR&65*AI|fL3E}aN0R87Z~YM24O|8 zC=0gF|CERA`9`f%6QI^(DQE;5pz^)9oIG-J(BVez-CXvn1*c1Es} zdwGU?JRK7gV~}!W#d4Y4WqK2r`L^2l?1l(TBKm3vzk>y}7HiowdCOPNuY7)|CD*=p z=-n$t9YxFSuL>Ic35+{szm%`4EUT13t;G_Yz6a@@#4deq$!iLR{8wi_oHQ5{83}oN z-}-8kN~N->TL2ZX60a!Aa~iz{Wq);!N3N3vF%tcsV^W#5O;yc}Q0o~Er#48!qy${* z%L{*I1D)?0Oa>&>daMPD!UEbd-Lk^_g%$_-b+mN|m&{qh>o#_;w68ko(C9P-6UDT7 z$>JqOZhiM%nap`R6Q?!Ev;|WqMXrgw*(z#Dfm+K*fPpYz7Jc?#_U+k!+1h)Xs+u26 znKOBsL?;Qgk2>c-pcl-Go*uOTr^jb^wRef3)-w=JX^?k5dS||ZDZ3_i;ZXFHkXaWF z{l6dI9?7?(N~Hn_0HGMoM7?G*o9v>`Sy1Ygfxa>HjRZY0t*gDu>2~0qZpV4y#)YZn z`Ue>rorVatmXYF3_Le@lQuZpF%^qj2BQH3wVu(y0z&_K{J`PvaBYxxP)&1Ha|6_R9^y1T$jZO0e2DJ@K9?R4bvj07oOU8`deGH)llD{ zRn@BhW{b950l8X6(T8fU?0EnAr=iyJ5XgHF4^J|D#TzSj_ki91a;1HlfuIm6!j`nw z1(oHMHcNQG>);%Y0I{q6_eQn=jnSEEnKpY*-+T{ zyIQS=ObcWNA$yWhGoG7vMh=&+f=Yue@xl7!-8r89nmxiOxE!v5j`zKb^hG>lG#OoP zKWL=%sCp76PfzyitY)XFRnV9O5`%bndLS3Qx$ue%E^~_$e;6c2F3pk7swlPH(CTrT zYn!{1rzMk=FvSdeJ;NE1nxsj~RxS%@ENr|C#c&IfN&2D}7wr}p1a6aAgs38Zd+Yig zuDzdAn^pTC92z&~61tUeinl-W?$fV8@oa}0giIy_+jni-B|z~0>d5EX6K}FlL(y-= zpE}}MWkuzD55DIn7zlHjXjofOt*ZKR{mbjtJNmf}9b&z)`}NmqG}WIw@|pGo1VFvE zTsOBkr)1u&dP}w+R{)|AmNtWoSXF3B$lM$iDsn^8@#@?&1XlQlRa~)=o<(ro; z=aIS(xnj`aV0g+X&!rtryXolX8fC@;MuPt)SB^FqDFg;#G0T&c-cZxvkPIpg8Dj1F zV%H*lsP=b_UW0YS^Be&kOaJ}XeZTz-#j~qRt9x)9{F5L6x_6+~0EiSKA9NDx_UzQz z0giZ{-FuK(i)ST?{Y6(bdW}nmk30$jj*vB`HLc}$@kLN+s3@rHN}7^{QzO(fq0#^q zYMmM%5X)F$<^$(>T%N+7&$4R|LieR3!k2~qtkfvnYAE0#p}bsOSknZxj?#+{c6n2L z#!Q~vN9huEg-(BZ>R|?hVb|x_)gascwRL|>Pm4>WPCKIRN@0IUbD^&m?4j?b2KWXYDtI}E32ip#V#v}cIiPbadAH0xGQjz}ywGJrI8}(?$Y`>$ve!h13awtoGetYEn7v~)%kw`Xg+vWzDwszXPLY*Ml*6l(G z8i`hQ=k~lT7CYs!=|AY^^k}Yqb$uDbE1(K$El|MGcRjsu^7K}P0-Kcwz-tLS(+52;M%HQOH_YibkW6vosu5jtH$2ZU4ydt?P zix2V+s)1UIBT;G;9+TH4@3KW(qwYc0y|Qj=r?KO8TZnN6PzaP#UZhxcJN=GZtOm6< zwJKyS@-rc!Ay@;(5N|S>5KwD!1WJ=KmProEXsvG@)r5Ui21!UxpaC(aOoP!qR;>^) zgfr|v?n#g>SLVogn6Z3e~U^(!kXI6!VtL_)2}-k8iLcwkIGN=H%Wfzc#= z7*!^br~bC_iylPxJELjCS%AafTAIRp)9JlueuG+zy_q?0rZ3Tl7^l=Kh*%WXNWcrUbq(ugG9DxODU=!hk5370EZ}-z=zxT8krLGMOMNiWTVDJ!sOb~R0Q+dbp zpAqy3+-H4y7^W|oHf_d=89OB=aY}P{vk%mI90?up`|9-W(-xJ%dlakpL1rwOKF^iS zfrH`diCoh9+uvtz8B6Bq1qa6oxf;&Qv1X251aRJ~5%CdL$Dml7Kk(kyTn)+QaAf zP5hCScC%sE>|KB70Gtu8>bNQfqX89^9QZ3?W?s7_8O@89p99 zW`-_c%c=jIKEvT~+$R?1zlCR_w0f;q@86p^jSK(*%m=8ziy##Q)~{GwuNN4yNdPIY zD!;mrh$a3}Fw--@V>U46ax?xHY7}(~gTdHkO<^Oe<{)o;`TCbt;!3N>aZmySVE`RX zzp3VFa;4p}dK?VL?P~4nB4F@b002~NZSBVP_VxkqojrRtXfhk|0+}EvAR-`uEo6Jh zTNO7un>%?9|A!2~!W9c5JX6TG1u~)UaMzM>Bme|JK&l#}?)V>Mp662+4_*Al((5fY zNYdmaGIc41uQ%x3BJmp2H5DCS`L+}6idWezrEDC|rL9NGGQEHW*GgeKr z=)~o)1__G@Lo|vTE$))s8DEU=?=4L&PXEFC>661JsABImXe3$-wH9ZE5JGsylIw;w zh*tk<`idLao74PJVMTO@2=q9Yc>j549F&q=DNoY2LDV5yM?KtU;(@ z)MYkP<-)M&tkO%zFXcn6!y0n3a$5Giwtsz)A!s`s1|L&lHJ0ER;<0+TYlbukjYb3T z0l0C4001DU68-?ShLLjQ(_=>@ccsxRFV-!nwKx+bCTZ9rvO{hZE?Sd1Ss$evv*#op zLckCN)yHcY&5g}2J*Myd!khEdoY&

1R5F&TWbrjTATx4!}#1)Qq$Y;b7Mc*@Fa5 z3bc7DM_0s4PTlG1=IDjb=L6^Wo!{S`$x95Qg#8AN?!>jrV>B6o=vmQQhq`7^gZMH0 zK)#8$*)Gal2rq6bwBLq~oo{|nSQTnPq) z0c?C}!^Sp5>rWzu$mNn{qiQhIGk(js|M=?z4zaK_tlK0GvlbGZR8)1f+G!3%xe9DH z8~D!$U;fg5sWUp#JJRXuI1UTmoH)?S0~*99#Ai$zghHZlRK=RbP4!K3sI|B%TsF6Z z4)?tPwGL~+)A2C_y*}WHLN%Zcvb!QDL|P(y{>IfT%g$mtA-=x8Ac;&u*Vol+9Pzx~ zVO8R!L=OOKp6VCsA4F&Q%Fhs+I0k?_)Ty zD6C9cD%+8BDfb-IT1LyO@4u2FHi_1mRA#P5s%;|UND`CQq-&^ckfD%hDGY?E0pO3k zG!OrSVMdbyr8DT!dV>y+#$m{j(GduZUX4&`)fk0F=>bQ>{a6tp1ch3Gl_s>XP%aD?E5ulL_^eP#G|}xZ?sz?(Dyg4Aw!FG!3CEM2?nrS^LdKB=9oIT%mE0?> zg<6ZPh@KK16Sk6dQ?8bKKmHyJg#8hN)QcG~14g3}aHq;(qXF>%<&$|evropFesd5t zTs2@<tH-b3B57xc|(@ zU+ZU(gc%7_9i|8(uqdp^M@fJ7%m-%|mgYTZgj$b1xOMebL;j(HDbtwA<7d<@L~4-q zJ3!TEl1?U*f!3aui4Ij|V|s@VBrM59z!Eql9Eqz@XjPr9oophG$Y!^OBq`kc} zyScI1Ez*w;1&6~meZ1p~*Ehbgv0UaQ*{{)SFi>l9W|)zI{g!(B7$hz(4%sQ_3~{Kc zQ4&ZC-uZlY*cMI+mGU5~8Q7|Q7>V;qT{`-oO~PjJq`ZEU z_i_Fka?*6V1yfV=fAOh(<{%7KKrj@-MgkI!1Wa7h{&90y&gHXLDw}?7jtRg8WI(OO z8Bu5yXab4Q=cHvHgP34Omx-a#8}&e`sU-W&FJC|Y%=1qt(`dA@eLT)Wit|hOSNC39 zHitFmAkCs_I{%-esWPN zHW>4~5}}-tb3S)&eOtC7UT2z=Jz)L?e$l8zoM$u)ch4{Jy4<6sr%H4=`%^6 z`{L|xK^pj~!&TV{PlBgxQfP8{bA7WhdO|cPRY}lnVOL}azr(|5>R)Kl8}vDH^Wd%R zk4JQlCCFql@Z8R=+qpQ-=X@#OqM}0XL?Fog*RhX||Cx)Rk3nW^pLQ7Si{1u_GKirf zX_+^^$yzEFiH%TeaRwGIUmOs)gpu9EZE~5tFd;h<)pPl=2-r8rCMsL)Q$yl8P z|HR`NUtM1{ZjS#WBQ2w|=y*v=BrUSvFx8#W>2-ZP(RZWJWU{($x~miC8+Z2X$LF@n zBr=Db!gD1a6h7#>{6*UAnXHtvP-}4%CbMw}gP6@@DzjdMe?(c{$@~|rshd64=>6S% z_wIpb6+DY*yj@!tq$HVGs(0+LQl2wQ34w z>bn>u#=WBwU_<-}gTdT>W2_e+v`^-lf1Q1z;C{h)_7V%>^ZDjac7FPwC|LA180fYE z1^@zt_t^yM^Djlf;BJ42$QMpOn(8i97}56#Trl+QB{+}t+4S_J^f8hJYueL0 zQ*T|r?D2SWfkWYF_#m84LVjjK6{)=ynS&}uIz z1PXw$;eTsyZ~t?tx6kt3%#@i3%p8>7+0)qvk%KaI?S_x zC(sKPLaoKo1p9>4AAbEvpE`?uzA19<tJEq17zi^F*`zrak6yY5^}oYa+4kyllYFQ8 zetW+@Zwk~}Yykm7P?a7m!#}?0)6XEi-x4W1XLd`JVz|tML&%J&z)2WuP-5IG=YA7$Lc))(pb@0VaFY=5iV}eK_ zB0{ZeFyJsapr};7!tP}E%SSul5UabjyOxY2JN-)5a0HSA?=@a%iqF4YFdo(o0en8+ z^u{x99=Q8$PH>=_`DTbu2nT9S0};cFy_cVtKVUwF0S(gG(P{R_FGqX7nG^jJe)9?N36tt1PEm3k#YVqgTU9sU z;krRnQY0lOA$(TQq#B(DXIFJLgi54x#D&sDH-5cQ1+|uOhK5F?0WW^|!uy4Sf{zUY zl%jAH5&n@6zJK{(;$ZI&y0KGIRATT(db`ZJHWf`Rkld9`95=_{Bu`DofB5YOpU`MD zyNq$jo@9_dfgTW@5RDq{T0Wl-?0)0pJrjsAyX~RHQcNOq`cUr=`HNDvt4;z`V2#aH z*$@m0qwqEQeUX0f+85j|?)a&=At=f(QYLOlh~50jQ}u!t!2=JxrvL#*YKMqFumK}GrF`Nbwrn@o>iAA7c0 z*gOjkgBusTJip2yHdF!%pi0~=uIg#;X)|h#EdT)aPKM29vk8l*;U}vQ%6_}oj)Ysz zJvG$(LuUP&lspOUvw|j3>(ow}>x4;THgwfPpYTs*(aagKji3Moo}+aP5&d zjz$l4&9G_lp4s#CIku{EolTXE5{wLXEWQf(0#0EN3=*RZL9ss1IC%ZDriLc>PQ!0U)UDcm02vtb z22rwAmxsG%*d8Q4IiBjXgvK$L3`m*1=>DlSwy+yx?u2wVEs zCAoYJZz|MUpu(Hv&B@uH7s&2p4|%3&*dC<3s9f;Eh8<^{B#m2as&E)^Xq=o$3`!H! z2~TBwpP4Qc3dfJz+6cK!ZqnBpw!>NQDx=BhUfHKDgleQ69L^koB|S(l8(-cqhv(IE z+a}5$fq)?Z!9Kw~WSmEpTq+kAzoDX=H+e*ez5C0)gC z5p!n^AP54cLLlJw_1nXy#IeXA6bc1c@{h&!TrnryCJF%nKqJw>9Iv^XjvqL2rlzLf zS^04sKyYXuTI zC^wJ{E1}lnNc64xJ@usx4=lOX;_+wVjKpBidE_Ru*=)CCl^7%j2%rWuAWf(aL6@M8 zLZFCB3Q843ETJ2X#Hipf7+^3Oum}vo-!Isg8W$4>E4y9s%b|UTitMGy-C+bU5OZFc zeS@Ryve}oPGef`-B{e5&gPQ7^EK0L$)dcO?J_79EL^hC8#@D2r_jjVs@BIqgkvIiAN=6phn8M%RZb`>$*T+SHg6x1G8;i7(kdnS zvZdEgTu+zD+_H|O!)a-1QO=q@8!k49+W0Y?~x_l28Vnp!QcZfwQwbmTK zNO}<9f2YBg|8-efyR>cDNbVmFUr%2V1@4u_u3_KPiS!RT8aj$AD=KX=wqi1wKzn@MRx#)9?pmmnt~&T92qr_DxUDY2fhd%t;m|Jx(Ecf{Wi8@Fx@5(e@cjV7brPv8@= zL_;hy?yj&^SfJvmOZjd5JP}{S$_gX(qKYwjhXCvI1*=uI6ghzokA1OEzGR{NP{#%ECX>sX*SxbgvR9)!$57(X% zm<1b0)5b^uK@jjJd+Yrue${kO-%gEGqpq!}6<|;pIRF4?3XMV!j|s=tv1=!mx0N%^ z=7C3RFc4-kl$j>oxpK#%UDdJ3)_0%X#Kv?UH#ap$79q65sSq7;J^@}j-Y*Vv~4*GJQ^=SG@@4C zICsOPvqwTfds92VCb#A~x3v44DG^id2L=WD3RHrS!RC2TAt{t@sx7PS7;QT?S?4Aa ziLfU>Se@A^?{w-|9ykncASehwwBBp~cUe&xA8I}B0GUh%rp=rh8k7*U0`3Ff(!j4z zG@772=N3VX{NePTGcS!~-{Wy?wh^>o$pRWN1z#mqNxf~=rmcX1FjEvY>X_hx=;JfT z&a^psci z95wuc!{A0!EAUQncCpQt?QI%L(EAOt;+^H^+T^W}nk0Z0(5~a^fA1>i9Jzb#?s#1g zi0B=DEnT%V7DfZtV}mizwRE@Gb(z@T%=qlm{@>r1%j7nT-LrYDOD6rraqPWqKqv0dpp0~&(`hxQy2~T2MgjF8yamI z?roP~wC8tjpWjLBNY@+n!zM8vEkrC)=Zo;Wq-at8c>TBQS&!$89mly~1Tb(ZD^u3v zSlE|ZMJ+ZdAx*}SMLk*kDS5Z@2COqU8tfLWx#j)MpL1YcBi`a44j2dn;_30%tI}&; zyMFb0zhBI%CsxAi+4VFbUu597@m(^ZL-MTT;8|N!&WaTxCf09FaKHZk+|jhf)CawI^s{4!Y^U!3u$w_5qa*Q&TP9|9tGexyy?bra zx??R5T93DPwqb+{QBc&xhyad=8yXf98rUi9Wbvdtng`0G9;}0}x^U`3y*)J8laSb8 zM$Co@#R`q$&wBM}BwB{$K!vez?kXq*JA!KO9kx18oT@UOEw*)Bc&*!lHePL>HCY##2q5KPF0 z`pm}d_wL>sHoZ$&Y?v=|X234BPK`t=(SnBKpMOBv8?VA`pv0aoeUVH4vidnko_X)-_u0tKj|aOS zgTw&AJ|SGR8C@&l2+H|vA={)e)pOgpZF0V%4yq*Lk&zMDiEHD}@BrSbhp$nUw7;GE z>ilPp(%{fT`4le-IPZhvi%PB)j>(b8enft@w)Jo%Ft zBN|oq$^dErH{36xQE${Ei<^oktF%g+Eyj4lD4F_J!-4F}*_WkK=^v{{TVB{aRpYPT zE7FUm4Rj9(f`C8KzZj)L-x9P6iZnuPt6HvZ005wO6wuoS%xpG!&;n?&BnD|ZmVsSc z&#Oy#G=|F?@cH@14_VI&`Hrm<14&vJED?*{uzUT@4t{%|UnmNJ0-}8;q<2^L{M=F5 zaVh6s&WH}$4@T_77)<1Xh`&kk(sv9-!?5e^7!*eDtM-5X@F#~)JpSJ5jjJPLXT+@h z==I&7KYXostRFT#bVKy)=pAJ(4`vZWghQ9UyZoXx8n=GCN8t-`c7*La*}amz>`1$dWY9)oeJPdc)(!MzjTam`nA0oE5#zQ!-sm1)Ugs;jJP9qGD%WMl+#@%F`UHEEjOR%w+! zmfX`jiJgp}vgP0J{J(x>f>%AU%G*EHH&K?v~_vesc z<>ZyWy|r?uRZH9(-dg`pK7scyYwmGp$Ow;;-0$;d@OpTVEZIYrsUquImUIpsOdZqLknzO34}k2CM;-Kw!?oSv?fASCxb#sWd80U3$5`fl8#xD@rQ(a2TBX zzwWyCE67w+Q)BKOr&8%u!h$E~KSPVB?yTr6XIZKZ0Yd-@n8Y0enPb@#ODFoJ(yz$X zGIYk}8>JIxOpJ_N9`Pej*)x+bbR=X4+^xMTtmo8y7-C@MH!v>GaN z08sWI4~bkF={bGz)XX+s+oX})4+CKUg-Ge`aR3|&_s4iwty7ys3XxPG6ZC%4tWJCu zNsG!k@aFd^kH5Zk$JSV2g7J2Tpwlas8J9c@skQf;%qBn} z6a4Y#5n#RZ$&qTpYB6^o{h3Ci0Z(p!Vuygt|5_{%}-qoXyhWf2o^L_e9|AsaEfnE zI2`e39ME$wJU3zUuBWawA{q{fmH#zIFTE2=tzqb%U#(LEDy_<@OzBFo(&YjxZOOF2 zX`g2uyt&;O%mFyV5bIGWUKH4xolhQURySF-@?&f09(47z$Z5q~5tm%jToN~$v(;Uf zxpDOs^_7-h&tx)z6VB8AB1bK+V941^i6J-EMIIh3{^YAuiAR2dP~YPGquvNLVs zgozRrO1a1kK$Bfup-~{~*mX>{knPhugc}W^Y~^ zCpY)qT2gqYun}r4L*dj0>7|?7Z8-6dpV!h=zOUiYcnQ>cti{`xX4O7OFBxgq#Y}Hz zt$h?h3ynl;gEhccWE{NyzAK!Fbb5mT|1B_SI=%PIcRg9WslJ|mHqTsPPpCfB5$!ce z+9Xap>=-Fz&?CF{Y<+UpxtnRWNDJx+T*4p^DapaCH~13%}Lhi{bPW zc)dY4qTFCDWi2^m9NGEDAj3e#(Cz~DuqUq^O8ZX8xBE#3o8wgbAeN%j>VWzO4VC-~ z{`ZkFkw`UM)h}+!SwLuz59;$9(yhHusZbi}RPP?p3+Awd7q$cjgXz6w)YtNUzW-WD zZb_R~>yr61T(1Xt6fT!*KL7cJf0)a`cuzRR>JOnKFqjM@&MsMftL`)n>FshWT0w;1 zld4>G=(%ssA9RI@_)xgCK^}7Z%I%7~2ktGyvk5CaoNQQPHk(J>OY`}BfS1RA35Ui( z8qsPg{uKXeb2XTBr20d+$LGQa8+lIv1hfjRdIR<9>kZJ+kt=OpDuW%var&rzMw{n z+Zf~_5}`!fP~3Q5TqW5b92_h(6U-?lv&rsBvJ$`Ec=W|cwz4;)gtW+UaKwDbM4!E5`9->evfcm0_LYM&dW7vc+s?w}X zfIdQ2c_A?$$5K#O)LseRp!)ljc)Y#0L{Mu2+R@K)vs1g$s;R=L&o^6y_ zj~BtKP0^tI0%!y`pa1^+l^$-7OYB9B2B$g?;t0aR!{CfW#?$m(VnFc;iPu z=?jul6E?He>@{kg8fTB^$a3XVif@+`+N!+;tLDd}5|MW`dW}^l6|Kbs@4+_nfpg*p z@#)fg4@zWG_Z>zJ#h5k7qa;mAMEg$gUBj9g@>XSgWwNc>MI+Jjrc=$HjSWpUoxDz2 zF?l^c9%u8zMIZnIEK2AFMU!&>t>13l?e6Zj^&?!4af3VpnM?*^C&fm3g;JJK!>P-; zO3qA;PBWqvFgOgZrOCZtI{np|W42cc082J4UVx53eJ#_=CXA*X5|$)DYcW~UHrciA z8g5!yak|c2=16S}1_PL$I(=I3Waf@mS=$Mxz1#On*N{Bx53xMu Date: Thu, 17 Jun 2021 10:39:35 -0600 Subject: [PATCH 096/100] 1509-moved public-user.md to cli docs and created main readme page for cli. Added TOC --- docs/cli/README.md | 7 ++++ .../quickstart.md => developer-quickstart.md} | 7 +++- ...uickstart.md => public-user-quickstart.md} | 33 +++++++++++++++---- docs/cli/{public-user => }/scdr-files.md | 2 -- docs/public-user.md | 17 ---------- 5 files changed, 39 insertions(+), 27 deletions(-) create mode 100644 docs/cli/README.md rename docs/cli/{developer/quickstart.md => developer-quickstart.md} (95%) rename docs/cli/{public-user/quickstart.md => public-user-quickstart.md} (78%) rename docs/cli/{public-user => }/scdr-files.md (98%) delete mode 100644 docs/public-user.md diff --git a/docs/cli/README.md b/docs/cli/README.md new file mode 100644 index 0000000..7b4521b --- /dev/null +++ b/docs/cli/README.md @@ -0,0 +1,7 @@ +

+
+ +## Table of contents +* [Public User Quickstart](public-user-quickstart) +* [SCDR Files](scdr-files) +* [Developer](developer-quickstart) \ No newline at end of file diff --git a/docs/cli/developer/quickstart.md b/docs/cli/developer-quickstart.md similarity index 95% rename from docs/cli/developer/quickstart.md rename to docs/cli/developer-quickstart.md index 7fde3b9..e2d70dc 100644 --- a/docs/cli/developer/quickstart.md +++ b/docs/cli/developer-quickstart.md @@ -1,4 +1,9 @@ -# OneStop CLI tool for developers + +
+# OneStop CLI tool for Developers + +## Table of Contents + The `onestop-cli` tool provides a convenient command line interface for the OneStop search API. This tool is partly generated from the OpenAPI spec in the search module. We have added custom middleware for convenient syntax for frequently used filters and queries. diff --git a/docs/cli/public-user/quickstart.md b/docs/cli/public-user-quickstart.md similarity index 78% rename from docs/cli/public-user/quickstart.md rename to docs/cli/public-user-quickstart.md index ceae673..5a93c30 100644 --- a/docs/cli/public-user/quickstart.md +++ b/docs/cli/public-user-quickstart.md @@ -1,13 +1,35 @@ + +
# OneStop CLI tool +## Table of Contents +* [OneStop Info](#onestop-info) +* [Installation](#installation) + * [Requirements](#requirements) + * [Install and run using a docker container (golang not required)](#install-and-run-using-a-docker-container-golang-not-required) + * [Download and use as Go package](#download-and-use-as-go-package) +* [Configuration](#configuration) +* [Usage](#usage) + * [Verbose](#verbose) + * [Run against a test or local API](#run-against-a-test-or-local-api) + * [Get](#get) + * [Search](#search) + * [by identifier](#by-identifier) + * [by parent identifier](#by-parent-identifier) + * [by date](#by-date) + * [by geometry](#by-geometry) + * [combinations](#combinations) + The `onestop-cli` tool provides a convenient command line interface for the OneStop search API. +There are three primary methods to search the data contained within onestop via the command line interface (CLI) tool. +If you're new to the OneStop ecosystem, you should interact with one or both of the running instances of the UI to get some familiarity with basic search features. Then if you choose to be more of a power user, we encourage you to explore the ins and outs of our API and CLI. -Read the [OneStop OpenAPI spec 2.0.0](https://app.swaggerhub.com/apis/cedardevs/one-stop_search_api/2.0.0). -Or check the [OneStop OpenAPI spec 2.4.0](https://app.swaggerhub.com/apis/cedarbot/OneStop/2.4.0). +## OneStop Info +* [OneStop OpenAPI spec 2.0.0](https://app.swaggerhub.com/apis/cedardevs/one-stop_search_api/2.0.0) +* [OneStop OpenAPI spec 2.4.0](https://app.swaggerhub.com/apis/cedarbot/OneStop/2.4.0) ## Installation - -### Requirements - +### Requirements Either golang, or docker. Direct downloads of binaries will be available in the future. @@ -157,6 +179,3 @@ Longhand query, including the `--verbose` flag to provide more logging: For complex query and filter structure, refer to the [short hand documentation](https://github.com/danielgtaylor/openapi-cli-generator/tree/master/shorthand). Note: As it is now, you cannot combine the flags with json shorthand. e.g. This will not work - `onestop searchcollection --area="POLYGON(( 22.686768 34.051522, 30.606537 34.051522, 30.606537 41.280903, 22.686768 41.280903, 22.686768 34.051522 ))" --query="satellite" filters[]{ type:datetime, after:2017-01-01T00:00:00Z, before:2017-02-01T00:00:00Z} ` - -
- diff --git a/docs/cli/public-user/scdr-files.md b/docs/cli/scdr-files.md similarity index 98% rename from docs/cli/public-user/scdr-files.md rename to docs/cli/scdr-files.md index b54ca8d..a01e1de 100644 --- a/docs/cli/public-user/scdr-files.md +++ b/docs/cli/scdr-files.md @@ -4,8 +4,6 @@ ## scdr-files configurations -Config locations - - Users can supply a configuration to map scdr-file type short names to OneStop IDs. See default config in [default config](cli/scdr-files-config.yaml). File can be yaml or json, but must be named "scdr-files-config" and placed in one of the following locations- project directory, current working directory, /etc/scdr-files/, or $HOME/.scdr-files. diff --git a/docs/public-user.md b/docs/public-user.md deleted file mode 100644 index c473edb..0000000 --- a/docs/public-user.md +++ /dev/null @@ -1,17 +0,0 @@ - -
- -# Public User Navigation Guide -As a public user of OneStop-cli, there are three primary methods to search the data contained within onestop via the command line interface (CLI) tool. If you're new to the OneStop ecosystem, you should interact with one or both of the running instances of the UI to get some familiarity with basic search features. Then if you choose to be more of a power user, we encourage you to explore the ins and outs of our API and and CLI. - -Take a look at our navigational suggestions below, or simply click the `Next` link at the bottom of the page to start at the top and work your way down. - -## Table of Contents -* Command Line Interface - - [Developer Quick Start](cli/developer/quickstart.md) - - public user - - [Quick Start](cli/public-user/quickstart.md) - - [scdr-files](cli/public-user/scdr-files.md) - -
- \ No newline at end of file From 0d782faa44c0ad9b473c244e580a0cdde85353c8 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 23 Jun 2021 15:40:54 -0600 Subject: [PATCH 097/100] 1509-Removed log_level from credentials-template.yml since that isn't credential information. Updated comment at top. --- onestop-python-client/config/credentials-template.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/onestop-python-client/config/credentials-template.yml b/onestop-python-client/config/credentials-template.yml index f94c70b..0fe300c 100644 --- a/onestop-python-client/config/credentials-template.yml +++ b/onestop-python-client/config/credentials-template.yml @@ -1,4 +1,4 @@ -#Copy me as credentials.yml and update with values and exclude the file from git +# This is a template to use for confidential information. Do not edit this file, but copy it to a different location. #NESDIS-SANDBOX sandbox: access_key: access_key_value_here @@ -8,5 +8,3 @@ sandbox: registry: username: rw_user password: rw_user_pwd - -log_level: INFO \ No newline at end of file From 45bb70a105b8dbbc0e12779c472ea549bc80651d Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 23 Jun 2021 16:34:09 -0600 Subject: [PATCH 098/100] 1509-In docs folder created files for onestop-python-client, helm, and scripts. Removed old now obsolete readmes from other folders. --- docs/README.md | 43 ++++++-- docs/build-pipeline.md | 42 ++++++++ docs/helm.md | 43 ++++++++ docs/onestop-python-client.md | 80 ++++++++++++++ docs/scripts.md | 66 ++++++++++++ onestop-python-client/README.md | 180 -------------------------------- onestop-python-client/setup.py | 2 +- scripts/README.md | 112 -------------------- scripts/sme/README.md | 39 ------- 9 files changed, 268 insertions(+), 339 deletions(-) create mode 100644 docs/build-pipeline.md create mode 100644 docs/helm.md create mode 100644 docs/onestop-python-client.md create mode 100644 docs/scripts.md delete mode 100644 onestop-python-client/README.md delete mode 100644 scripts/README.md delete mode 100644 scripts/sme/README.md diff --git a/docs/README.md b/docs/README.md index a8f99a1..088a4ec 100644 --- a/docs/README.md +++ b/docs/README.md @@ -17,13 +17,42 @@

## Table of contents +* [onestop-python-client](#onestop-python-client) +* [Python Scripts](#python-scripts) +* [Helm](#helm) +* [CLI](#cli) +* [Build Pipeline and Test Execution](build-pipeline) -- [Project Overview](#project-overview) -- [Navigating The Documentation](#navigating-the-documentation) - - [By User Type](#by-user-type) - - [By Project Component](#by-project-component) -- [External Documentation](#docs/cli/developer/quickstart.md) +This OneStop-clients project is a collection of clients to aid in communicating with OneStop and directly with the cloud. -## Project Overview -OneStop-clients is an open-sourced commandline interface and subject matter consumer clients ... +## [onestop-python-client](onestop-python-client) +The onestop-python-client is a tool for subject matter experts (SME) to publish and consume metadata to and from OneStop as well as directly to the cloud. +This would enable someone to feed data into OneStop, have OneStop digest it, and then read it out via a python script. +[onestop-python-client](onestop-python-client) - More details. + +Additional information: +* [onestop-test-data repository readme](https://github.com/cedardevs/onestop-test-data/blob/master/README.md) - loading test data into OneStop. +* [OneStop documentation](https://cedardevs.github.io/onestop/) - OneStop documentation. + +## [Python Scripts](scripts) +There are some sample python scripts that use the onestop-python-client in the scripts directory. + +[python scripts](scripts) - More details. + +## [Helm](helm) +There is a helm directory full of helm charts to create different kubernetes containers which each contain from this repository the onestop-python-client code and the scripts directory. + They have python installed so that a SME user could execute scripts from within. + +[Helm](helm) - More details. + +## [CLI](cli) +The CLI is an open-sourced commandline interface for OneStop's search API. + +* [Developer Quickstart](cli/developer-quickstart) + +* [Public User Quickstart](cli/public-user-quickstart) + +* [SCDR Files](cli/scdr-files) + +[CLI](cli) - More details. \ No newline at end of file diff --git a/docs/build-pipeline.md b/docs/build-pipeline.md new file mode 100644 index 0000000..9f22549 --- /dev/null +++ b/docs/build-pipeline.md @@ -0,0 +1,42 @@ + +
+ +# Build Pipeline and Test Execution + +## Table of Contents +* [CircleCI](#circleci) +* [Building Manually](#building-manually) +* [Test Execution](#test-execution) + +## CircleCI +Currently, this project uses CircleCI to build the multiple images needed. If you example the circleci configuration file you will see what tests it executes and images it builds with what tags. + +## Building Manually +* If you change the onestop-python-client code then run this, from the project root: + +``` +docker build . -t cedardevs/onestop-python-client:latest +``` + +* If you modify just the scripts then run this (only need to do the one relevant for your script), from the project root: + +``` +docker build ./scripts/sqs-to-registry -t cedardevs/onestop-s3-handler:latest +``` + +``` +docker build ./scripts/sme/ -t cedardevs/onestop-sme:latest +``` + +## Test Execution +To execute the onestop-python-client tests via python's unittest execute this from the onestop-python-client directory: + +``` +python3 -m unittest discover +``` + +If you wish to run a specific test file, here's an example: + +``` +python -m unittest test/unit/util/test_S3MessageAdapter.py +``` \ No newline at end of file diff --git a/docs/helm.md b/docs/helm.md new file mode 100644 index 0000000..328cc8e --- /dev/null +++ b/docs/helm.md @@ -0,0 +1,43 @@ + +
+ +# Helm + +## Table of Contents +* [Intro](#intro) +* [Helm Configuration](#helm-configuration) +* [Create and Start the Script Container](#create-and-start-the-script-container) + +## Intro +This project has a helm directory which is set up to pull a onestop-python-client image (specified in the image section in `helm//values.yml`) and create a kubernetes container with that image inside. The container should be able to communicate to the configured OneStop stack (specified in the conf section in `helm//values.yml`). + It also copies the onestop-python-client and scripts directories into the container. + +## Helm Configuration +The helm charts are setup to create a configuration file from the template at `helm//values.yml` and copy it to `/etc/config/config.yml` within the container. You don't have to use this file but most likely one will be necessary in a location where the scripts can access it. + +Please see the [onestop-python-client configuration](onestop-python-client#configuration) section for configuration information. + +Please see the [scripts](scripts) documentation for information on how to pass in a configuration file via CLI and execute the scripts. + +## Create and Start the Script Container +The helm install command, done from the root of this repository, will use the charts in the helm directory to create the specified container. + +In this example we will create the `sme` using the helm charts and configuration information in this repo from `helm/onestop-sqs-consumer` +1. cd to the root of this project +1. `helm uninstall sme` +1. `helm install sme helm/onestop-sqs-consumer` + +To check the container status execute `kubectl get pods` and look for the pod with the expected name, as defined by the `name` field in the `helm//Chart.yaml`: + +``` +(base) ~/repo/onestop-clients 07:00 PM$ kubectl get pods +NAME READY STATUS RESTARTS AGE +sme-onestop-sqs-consumer-5c678675f7-q2s7h 0/1 Pending 0 26s +``` +If it isn't in a 'Running' state within about 10 seconds then something is probably wrong. If it hasn't crashed yet (indicated by a STATUS of CrashBackLoop) then one possibility is a connection timeout trying to connect to a resource. + +Once the container is running you can exec into the container (much like "sshing") via this command, use the NAME from the `kubectl get pods` command: + +``` +kubectl exec --stdin --tty sme-onestop-sqs-consumer-5c678675f7-q2s7h -- /bin/bash +``` diff --git a/docs/onestop-python-client.md b/docs/onestop-python-client.md new file mode 100644 index 0000000..e447bc1 --- /dev/null +++ b/docs/onestop-python-client.md @@ -0,0 +1,80 @@ + +
+ +# OneStop Clients + +## Table of Contents +* [Prerequisites](#prerequisites) +* [Credentials](#credentials) +* [Configuration](#configuration) +* [Usage](#usage) +* [How to manually publish a new version of this client](#how-to-manually-publish-a-new-version-of-this-client) + +This python package provides an API to connect to OneStop's event stream (aka Inventory Manager). There are several utility modules in the onestop-python-client for posting to Registry or using kafka publishing/consuming to OneStop. There are also some cloud specific utility classes. + +## Prerequisites +If you need to bring up the OneStop stack, see the [OneStop quickstart documentation](https://github.com/cedardevs/onestop/blob/master/docs/developer/quickstart.md#quick-start-kubernetes--helm--skaffold) + +## Credentials +Copy the `onestop-python-client/config/credentials-template.yml` to a file and fill out the information you will need. If you are using a helm container then copy it to that container. + +## Configuration +Here are some configuration values and what they represent. You don't need everything, it depends on what onestop-python-client classes you are using. +If you are using the helm generated configuration file then look in the [helm configuration section](helm#helm-configuration) for what file to modify. + +* _metadata_type - should be granule or collection, depending on what you are sending/receiving. +* schema_registry, registry_base_url, and onestop_base_url - set to what you are communicating with, especially if not on cedar-devs talking to its OneStop. +* AWS section - there's several config values for AWS you probably need to change, many are set to testing values. +* Kafka section - There is a whole Kafka section that if you are using kafka you might need to adjust this. This isn't perhaps the most preferred way to submit to OneStop. [OneStop Kafka Topics](https://github.com/cedardevs/onestop/blob/master/kafka-common/src/main/java/org/cedar/onestop/kafka/common/constants/Topics.java) are defined here on how they get named if you do need to listen to a topic. It isn't created until information is published to it (be it via OneStop or these scripts). +* log_level - If you are troubleshooting or just want to see a more granular log level set this to DEBUG. + +## Usage +Once you have the OneStop stack (or your own kafka broker + schema registry) running, you are ready to install this package and start consuming messages. + +The `onestop_client` can be downloaded via pip, like so- + +`python3 -m pip install onestop-python-client-cedardevs` + +To test the import, try- + +``` +$ python3 +>>> import onestop_client +``` + +Look here for more information on executing [scripts](scripts). + +## How to manually publish a new version of this client +See the [build pipeline](build-pipeline) for how these images are automatically published. + +First you will need to setup your credentials. Create $HOME/.pypirc and update it with the cedardevs username, pw, and token. It will look like the following- +``` +[pypi] + username = __token__ + password = +``` +You'll need a couple tools to create the distribution and then publish it. To install these tools, run the following command- + +``` +python3 -m pip install --user --upgrade setuptools wheel twine +``` +Note: make sure the version on the setup file is changed + +To build the new distribution- +``` +python3 setup.py sdist bdist_wheel +``` + +That should create/update the dist/ directory. + +Now to push that to the PyPi repo- + +``` +python3 -m twine upload dist/* +``` + +#### Install onestop-python-client-cedardevs package + +``` +pip install onestop-python-client-cedardevs +``` diff --git a/docs/scripts.md b/docs/scripts.md new file mode 100644 index 0000000..43db8ca --- /dev/null +++ b/docs/scripts.md @@ -0,0 +1,66 @@ + +
+ +# Python Scripts for onestop-python-client + +## Table of Contents +* [Usage](#usage) +* [Setup](#setup) + * [Helm](#helm) + * [Manually Setup Python Environment](#manually-setup-python-environment) +* [Load Data into OneStop](#load-data-into-onestop) + * [onestop-test-data repository](#onestop-test-data-repositoryhttpsgithubcomcedardevsonestop-test-data) + * [osim-deployment repository](#osim-deployment-repositoryhttpsgithubcomcedardevsosim-deployment) +* [OneStop Quickstart](https://cedardevs.github.io/onestop/developer/quickstart) + +## Usage +Depending on what the script's imports are you may have to install some dependencies via `pip install ...`. +Once ready to execute a script go to the root directory of this project. An example command might be: + +`python scripts/sme/sme.py -cred cred.yml` + +NOTE: + * For some scripts you need a credentials file manually and specify the relative location on the command-line via `-cred` + * The default configuration is set to the location helm will create it, `/etc/config/config.yml`. If you need to specify a different one use the `-conf` command line argument. [Configuration](helm) information is spelled out for helm, since some values you may have to modify if using helm. + +## Setup +To use the onestop-python-client there are two options: +* Use our [Helm](helm) charts (Preferred and easiest way) +* Or manually set up your python environment + +### Helm +It is recommended to use our helm charts to create the script container. Go [here](helm) for more information. + +### Manually Setup Python Environment +* Install conda (miniconda works). +* Restart terminal or source files to recognize conda commands. +* Create a new conda environment and activate it (not convinced you need this) + * `conda create -n onestop-clients python=3` + * `conda activate onestop-clients` + * `pip install setuptools` + +* Install any libraries needed by your script + * Ex: `pip install PyYaml` + +* Install onestop-python-client: + 1. `pip uninstall onestop-python-client-cedardevs` + 1. [Build the onestop-python-client](build-pipeline) if you have modified the code, otherwise it will access the image on github. + 1. `pip install ./onestop-python-client` + + To test the import, try this. It shouldn't give an error: + + ``` + $ python3 + >>> import onestop_client + ``` + +## Load Data into OneStop +There are several repositories to aid in loading data into a OneStop. Please read the appropriate repository's readme for accurate and up to date usage information. + +### [onestop-test-data repository](https://github.com/cedardevs/onestop-test-data) + `./upload.sh demo http://localhost/onestop/api/registry` + +### [osim-deployment repository](https://github.com/cedardevs/osim-deployment) + From the osim-deployment repository there is a staging-scripts directory with scripts for loading some data: + + `./copyS3objects.sh -max_files=5 copy-config/archive-testing-demo-csb.sh` diff --git a/onestop-python-client/README.md b/onestop-python-client/README.md deleted file mode 100644 index 77986a6..0000000 --- a/onestop-python-client/README.md +++ /dev/null @@ -1,180 +0,0 @@ -# OneStop Clients - -This python package provides an API to connect to OneStop's event stream (aka Inventory Manager). At this early stage there is only a single module for consuming messages from the kafka brokers that back OneStop. -## AWS Credentials -Copy credentials-template.yml to credentials.yml and insert your ACCESS_KEY and SECRET_KEY - -## KafkaPublisher -Relies on fastavro <1.0 and confluent-kafka <1.5 - -## prerequisites -You will need a kafka broker and a schema-registry running to test this package. To bring up the OneStop stack, see the [OneStop quickstart documentation](https://github.com/cedardevs/onestop/blob/master/docs/developer/quickstart.md#quick-start-kubernetes--helm--skaffold) - -## usage -Once you have the OneStop stack (or your own kafka broker + schema registry) running, you are ready to install the package and start consuming messages. - -The `onestop_client` can be downloaded via pip, like so- - -`python3 -m pip install onestop-python-client-cedardevs` - -To test the import, try- - -``` -$ python3 ->>> import onestop_client -``` - -Now we are ready to try a script. Our first example, [smeFunc.py](#examples/smeFunc.py), imports our onestop_client package, and passes to it the id, topic, and message handler function. Our library then handles the work to connect to kafka and deserialize the message. - -Here is how to run it in k8s so that it can connect to the kafka broker and schema registry- -``` -kubectl apply -f examples/pyconsumer-pod.yml -``` - -At the moment, that pod will tail -f /dev/null to stay open so you can exec into the container with - -` -kubectl exec -it pod/pyconsumer -- bash -` -# In the container -Manually add smeFunc.py -Install requests library ->pip install requests - -# In the cluster load some test data into the cluster -./upload.sh IM /Users/dneufeld/repos/onestop-test-data/DEM http://localhost/registry - -#Test it out using cli args -python smeFunc.py -cmd consume -b onestop-dev-cp-kafka:9092 -s http://onestop-dev-cp-schema-registry:8081 -t psi-registry-collection-parsed-changelog -g sme-test -o earliest - - -python smeFunc.py -cmd produce -b onestop-dev-cp-kafka:9092 -s http://onestop-dev-cp-schema-registry:8081 -t psi-collection-input-unknown - -Or you can use env vars available so you can run this - -``` -python ./smeFunc.py -b $KAFKA_BROKERS -s $SCHEMA_REGISTRY -t $TOPIC -g $GROUP_ID -o $OFFSET -``` - -# packaing and publishing new version -======= -The general purpose of this python package is to provide an API to connect to OneStop's event stream (aka Inventory Manager). -This would enable someone to feed data into OneStop, have OneStop digest it, and then read it out via a python script, such as the example [smeFunc.py](#examples/smeFunc.py). -See the OneStop readme for an example of loading test data into OneStop. -At this early stage there is only a single module for consuming messages from the kafka brokers that back OneStop. - -## Prerequisites -1. Since you will need a kafka broker and a schema-registry running you will need OneStop and start it up - [OneStop quickstart documentation](https://github.com/cedardevs/onestop/blob/master/docs/developer/quickstart.md#quick-start-kubernetes--helm--skaffold) - Setup and start up the OneStop stack - -2. Install this python-client and other dependencies via pip - `pip install -r requirements.txt` - - To test the import, try this and it shouldn't give an error: - - ``` - $ python3 - >>> import onestop_client - ``` - -Now you are ready to start consuming messages. - -## Load Test Data -If you need to load test data then look in the OneStop repo's [OneStop quickstart documentation](https://github.com/cedardevs/onestop/blob/master/docs/developer/quickstart.md#quick-start-kubernetes--helm--skaffold) -for information on loading test data. - -## Example - -Our first example, [smeFunc.py](#examples/smeFunc.py), imports our onestop_client package, and passes to it the id, topic, and message handler function. -Our library then handles the work to connect to kafka and deserialize the message. - -1. Here is how to run it in k8s so that the python script can connect to the kafka broker and schema registry: - ``` - kubectl apply -f examples/pyconsumer-pod.yml - ``` - -1. Run this so you can exec the python script within the container: - - ``` - kubectl exec -it pyconsumer bash - ``` - -1. Then there should be environment variables (you can verify via `echo $OFFSET`) available so you can run this: - - ``` - python ./smeFunc.py -b $KAFKA_BROKERS -s $SCHEMA_REGISTRY -t $TOPIC -g $GROUP_ID -o $OFFSET - ``` - - If not some sensible defaults are in pyconsumer-pod.yml: - - ``` - python ./smeFunc.py -b onestop-dev-cp-kafka:9092 -s http://onestop-dev-cp-schema-registry:8081 -t psi-registry-granule-parsed-changelo21` -g sme-test -o earliest - ``` - - NOTE: - If an error prints out of `ERROR Message handler failed: 'NoneType' object is not subscriptable` that implies the data it was traversing does not have one of the requested values. - - Example: If this was in the python script you ran `print(value['fileInformation']['name'])` but the data does not have a value of `fileInformation` it will throw that error. - - To fix this you can simply remove ['fileInformation'] - -## How to publish a new version of this client ->>>>>>> master:python-client/README.md -First you will need to setup your credentials. Create $HOME/.pypirc and update it with the cedardevs username, pw, and token. It will look like the following- -``` -[pypi] - username = __token__ - password = -``` -You'll need a couple tools to create the distribution and then publish it. To install these tools, run the following command- - -``` -python3 -m pip install --user --upgrade setuptools wheel twine -``` -Note: make sure the version on the setup file is changed - -To build the new distribution- -``` -python3 setup.py sdist bdist_wheel -``` - -That should create/update the dist/ directory. - -Now to push that to the PyPi repo- - -``` -python3 -m twine upload dist/* -``` - -#### Install onestop-python-client-cedardevs package - -``` -pip install onestop-python-client-cedardevs -``` - -importing onestop-python-client-cedardevs package - -producer module have the following functions to import - produce: initiate sending a message to Kafka - list_topics: Request list of topics from cluster - produce_raw_message: Uses user's inputs to construct a structured input value - produce_and_publish_raw_collection: raw collection input value and key to initiate sending message to Kafka - produce_and_publish_raw_granule: raw granule input value and key to initiate sending message to Kafka - - ``` - -from onestop.producer import ... - -``` - -consumer module have the following functions to import: - consume: consume messages from a given topic - -``` - -from onestop.consumer import ... - -``` - -##Docker -docker build --tag cedardevs/onestop-pyconsumer:latest -docker push cedardevs/onestop-pyconsumer:latest \ No newline at end of file diff --git a/onestop-python-client/setup.py b/onestop-python-client/setup.py index 19ff9fd..8707d5e 100644 --- a/onestop-python-client/setup.py +++ b/onestop-python-client/setup.py @@ -1,6 +1,6 @@ import setuptools -with open("README.md", "r") as fh: +with open("../docs/onestop-python-client/README.md", "r") as fh: long_description = fh.read() setuptools.setup( diff --git a/scripts/README.md b/scripts/README.md deleted file mode 100644 index 4773928..0000000 --- a/scripts/README.md +++ /dev/null @@ -1,112 +0,0 @@ -# Using onestop-python-client - -## Table of Contents -* [Setup](#setup) - * [Helm](#helm) - * [Use Helm to Create a Script Container](#use-helm-to-create-a-script-container) - * [Using Helm Config File](#using-helm-config-file) - * [Helm Pulling of Image](#helm-pulling-of-image) - * [Startup Helm Script Container](#startup-helm-script-container) - * [Manually Setup Environment](#manually-setup-environment) -* [Building](#building) - * [Rebuilding Code or Scripts](#rebuilding-code-or-scripts) - * [Rebuilding Containers](#rebuilding-containers) -* [Load Data into OneStop](#load-data-into-onestop) - * [onestop-test-data repository](#onestop-test-data-repositoryhttpsgithubcomcedardevsonestop-test-data) - * [osim-deployment repository](#osim-deployment-repositoryhttpsgithubcomcedardevsosim-deployment) -* [OneStop Quickstart](https://cedardevs.github.io/onestop/developer/quickstart) - -## Setup -To use onestop-python-client there are two options: helm or manually. - -### Helm -#### Use Helm to Create a Script Container -We use helm to pull a OneStop-Clients image (specified in `helm//values.yml`) and deploy a kubernetes container that can communicate to the configured OneStop. It also copies over the onestop-python-client and scripts directories to the container. - -Those configuration values are in this repo under `helm//values.yml`. Our helm is configured to create a configuration file in the script container at `/etc/confif/confif.yml` from the appropriate values.yml. You can use this or create your own configuration file and put it in the script container. Our scripts are configured to use the command-line parameter `conf` or will look for the helm configuration file that isn't specified. - -#### Using Helm Config File -If you are going to use the helm generated configuration file then you should probably edit the conf section in the helm values.yaml file for the container you will have helm create (Ex. 1helm/onestop-sqs-consumer/values.yaml1). - * *_metadata_type - should be granule or collection, depending on what you are sending/receiving. - * schema_registry, registry_base_url, and onestop_base_url - set to what you are communicating with, especially if not on cedar-devs talking to its OneStop. - * AWS section - there's several config values for AWS you probably need to change, many are set to testing values. - * Kafka section - There is a whole Kafka section that if you are using kafka you might need to adjust this. This isn't perhaps the most preferred way to submit to OneStop. - * log_level - If you are troubleshooting or just want to see a more granular log level set this to DEBUG. - -#### Helm Pulling of Image -When you run the helm install command helm pulls the specified image from the repository that is indicated in the helm values yaml file. - -#### Startup Helm Script Container -The helm install command, done from the root of this repository, will use the charts in the helm directory to create a container called `sme` using the helm charts and configuration information in this repo fom `helm/onestop-sqs-consumer` - * cd to the root of this repository - * `helm uninstall sme` - * `helm install sme helm/onestop-sqs-consumer` - -To check on the container run this and look for the pod with the : - -`kubectl get pods` -``` -(base) ~/repo/onestop-clients 07:00 PM$ kubectl get pods -NAME READY STATUS RESTARTS AGE -sme-onestop-sqs-consumer-5c678675f7-q2s7h 0/1 Pending 0 26s -``` -If it isn't in a 'Running' state within 10 seconds then something is probably wrong. If it hasn't crashed yet, CrashBackLoop state, then it is probably a timeout problem trying to connect to a resource. - -Once the container is running, which should only be a matter of seconds, you can "ssh" into the container via this command. - -NOTE: you need to have the container name listed in the `kubectl get pods` command results for this command: - -`kubectl exec --stdin --tty sme-onestop-sqs-consumer-5c678675f7-kmpvn -- /bin/bash` - -### Manually Setup Environment -* Install conda (miniconda works). -* Restart terminal or source files to recognize conda commands. -* Create a new conda environment and activate it (not convinced you need this) - * `conda create -n onestop-clients python=3` - * `conda activate onestop-clients` - * `pip install setuptools` - -* Install any libraries needed by your script - * Ex: `pip install PyYaml` - - `pip install ./onestop-python-client` - - To test the import, try this and it shouldn't give an error: - - ``` - $ python3 - >>> import onestop_client - ``` - -## Building -Building locally is not necessary if you are using the images that we build automatically. Currently, we build an image via docker files with the tag 'latest' when *any* commits, even branches, are made to github and trigger CircleCI. -You might want to do this is to make code changes, build them, and then run your python script against that pip installed onestop-python-client locally. - -### Rebuilding Code or Scripts -* Install the latest onestop-python-client into directory - - `pip uninstall onestop-python-client-cedardevs` - - `pip install ./onestop-python-client` (run from root of this repository) - -### Rebuilding Containers -* If the onestop-python-client code changes then run: - - `docker build . -t cedardevs/onestop-python-client:latest` - -* If just the scripts change - - `docker build ./scripts/sqs-to-registry -t cedardevs/onestop-s3-handler` - - `docker build ./scripts/sme/ -t cedardevs/onestop-sme:latest` - -## Load Data into OneStop -There are several repositories to aid in loading data into a OneStop. Please read the appropriate repository's readme for accurate and up to date usage information. - -### [onestop-test-data repository](https://github.com/cedardevs/onestop-test-data) - `./upload.sh demo http://localhost/onestop/api/registry` - -### [osim-deployment repository](https://github.com/cedardevs/osim-deployment) - From the osim-deployment repository there is a staging-scripts directory with scripts for loading some data: - - `./copyS3objects.sh -max_files=5 copy-config/archive-testing-demo-csb.sh` diff --git a/scripts/sme/README.md b/scripts/sme/README.md deleted file mode 100644 index 72a4c2b..0000000 --- a/scripts/sme/README.md +++ /dev/null @@ -1,39 +0,0 @@ -# SME Script - -## AWS Credentials -Populate values for ACCESS_KEY and SECRET_KEY in credentials.yml - -## Helm Values -Update values in onestop-sqs-consumer/vaules.yaml - -## Prerequisites -You will need a kafka broker and a schema-registry running to test this package. To bring up the OneStop stack, see the [OneStop quickstart documentation](https://github.com/cedardevs/onestop/blob/master/docs/developer/quickstart.md#quick-start-kubernetes--helm--skaffold) - -### Start up kubernetes clusters using skaffold - -``skaffold dev --status-check=false --force=false`` - -### Load test data to expose Kafka Topics -```./upload.sh IM COOPS/ localhost/onestop/api/registry``` - -### Install onestop-python-client repo into directory - -``pip install ./onestop-python-client `` - -## Usage - -### Upload CSB Data to first topic (psi-granule-input-unknown) -```python launch_e2e.py -conf config/aws-util-config-dev.yml -cred config/credentials-template.yml``` - -### Start up sme container -```helm install sme helm/onestop-sqs-consumer``` - -### Exec into sme container and run extraction code - -```kubectl exec -it -- bash``` - -```python sme.py``` - - -### Look at newly added data in parsed-granule-input topic -```python smeFunc.py``` From 7e41832c6eac488e74613a9fbdf912bd73513798 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 23 Jun 2021 16:40:21 -0600 Subject: [PATCH 099/100] 1509-Updated onestop-python-client setup with correct readme location. --- onestop-python-client/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/setup.py b/onestop-python-client/setup.py index 8707d5e..d907f1f 100644 --- a/onestop-python-client/setup.py +++ b/onestop-python-client/setup.py @@ -1,6 +1,6 @@ import setuptools -with open("../docs/onestop-python-client/README.md", "r") as fh: +with open("../docs/onestop-python-client.md", "r") as fh: long_description = fh.read() setuptools.setup( @@ -9,7 +9,7 @@ author="CEDARDEVS", author_email="cedar.cires@colorado.edu", description="A python package for processing messages from the NOAA OneStop event stream (aka Inventory Manager).", - long_description="This package provides subject matter experts an API to interact with the kafka topics backing OneStop.", + long_description="This package provides subject matter experts an API to interact with OneStop via kafka, cloud, and REST.", long_description_content_type="text/markdown", url="https://github.com/cedardevs/onestop-clients", packages=setuptools.find_packages(exclude=("tests",)), From 4fe81b1a2afed48f40b8f4acf43bb5fe2f560dd8 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 23 Jun 2021 16:47:45 -0600 Subject: [PATCH 100/100] 1509-Removed reading readme from onestop-python-client, wasn't being used anyways. Docker cannot see one directory above working directory. --- onestop-python-client/setup.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/onestop-python-client/setup.py b/onestop-python-client/setup.py index d907f1f..5754bba 100644 --- a/onestop-python-client/setup.py +++ b/onestop-python-client/setup.py @@ -1,8 +1,5 @@ import setuptools -with open("../docs/onestop-python-client.md", "r") as fh: - long_description = fh.read() - setuptools.setup( name="onestop-python-client-cedardevs", version="0.2.5",