From 4e12e1dd793df67b88fef38a24a84d5e0f12cd04 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 13 Apr 2021 13:21:42 -0600 Subject: [PATCH 01/49] Changed S3Utils, S3MessageAdapter classes' constructors(adjusted documentation) to take dictionary with extra parameters allowed as well as methods within this class not to reference config but the variable that was set. Adjusted effected tests. --- .../config/aws-util-config-dev.yml | 2 + .../onestop/util/S3MessageAdapter.py | 110 ++++++------- onestop-python-client/onestop/util/S3Utils.py | 113 +++++++------ .../tests/SqsHandlersTest.py | 40 +++-- .../tests/util/S3MessageAdapterTest.py | 41 ++++- .../tests/util/S3UtilsTest.py | 151 +++++++++--------- scripts/launch_e2e.py | 45 ++++-- scripts/launch_pyconsumer.py | 12 +- 8 files changed, 291 insertions(+), 223 deletions(-) diff --git a/onestop-python-client/config/aws-util-config-dev.yml b/onestop-python-client/config/aws-util-config-dev.yml index ee1ad95..c30683e 100644 --- a/onestop-python-client/config/aws-util-config-dev.yml +++ b/onestop-python-client/config/aws-util-config-dev.yml @@ -3,9 +3,11 @@ log_level: INFO # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs +sqs_name: 'foobar' sqs_max_polls: 2 s3_region: "us-east-2" s3_bucket: archive-testing-demo +s3_key: 'ABI-L1b-RadF/2019/298/15/OR_ABI-L1b-RadF-M6C15_G16_s20192981500369_e20192981510082_c20192981510166.nc' #AWS config values for 2nd vault in different region vault_name: archive-vault-new diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index d640b77..1dda78c 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -1,10 +1,4 @@ -import yaml from onestop.util.ClientLogger import ClientLogger -""" -from onestop.info.ImMessage import ImMessage -from onestop.info.FileMessage import FileMessage -from onestop.info.Link import Link -""" from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord, Publishing, ErrorEvent from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location import FileLocation,FileLocationType @@ -14,81 +8,67 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.discovery import Discovery, Link - class S3MessageAdapter: """ A class used to extract information from sqs messages that have been triggered by s3 events and transform it into correct format for publishing to IM Registry Attributes ---------- - conf: yaml file - csb-data-stream-config.yml - s3_utils: S3Utils object - used to access objects inside of s3 buckets - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated - prefix_mapping: Dict - contains mapping of various line offices and their associated collection id + access_bucket: str + Cloud bucket to put in the links field when transformed. + type: str + COLLECTION or GRANULE + file_id_prefix: str + File prefix returned as fileIdentifier + collection_id: str + Collection this data belongs to. Returned as parent identifier. + log_level: str + The log level to use for this class (Defaults to 'INFO') - Methods - ------- - collection_id_map(s3_key) - given an s3 key that contains one of the NESDIS line offices in its path, it will provide the corresponding collection id - - transform(recs) - transforms sqs message triggered by s3 event to correct format for publishing to IM registry - """ - def __init__(self, conf_loc, s3_utils): - """ - - :param conf_loc: yaml file - csb-data-stream-config.yml - :param s3_utils: S3Utils object - used to access objects inside of s3 buckets - - Other Attributes - ---------------- logger: ClientLogger object utilizes python logger library and creates logging for our specific needs logger.info: ClientLogger object logging statement that occurs when the class is instantiated - prefix_mapping: Dict - contains mapping of various line offices and their associated collection id - - """ - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False) - self.logger.info("Initializing " + self.__class__.__name__) - self.s3_utils = s3_utils - self.prefix_mapping = self.conf['prefixMap'] - - def collection_id_map(self, s3_key): + Methods + ------- + transform(recs) + transforms sqs message triggered by s3 event to correct format for publishing to IM registry + """ + def __init__(self, access_bucket, type, file_id_prefix, collection_id, log_level = 'INFO', **wildargs): """ - Given an s3 key that contains one of the NESDIS line offices in its path, it will provide the corresponding collection id + Parameters + ---------- + access_bucket: str + access bucket to put in the links field when transformed. + type: str + COLLECTION or GRANULE + file_id_prefix: str + File prefix returned as fileIdentifier + collection_id: str + Collection this data belongs to. Returned as parent identifier. + log_level: str + Log level for when logging in class. - :param s3_key: str - key path of object in s3 bucket - - :return: str - associated line office collection id """ - # Looks through our prefix map and returns appropriate collection id - for key in self.prefix_mapping: - if key in s3_key: - return self.prefix_mapping[key] + self.access_bucket = access_bucket + self.type = type + self.file_id_prefix = file_id_prefix + self.collection_id = collection_id + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) + self.logger.info("Initializing " + self.__class__.__name__) + if wildargs: + self.logger.error("There were extra constructor arguments: " + str(wildargs)) def transform(self, recs): """ Transforms sqs message triggered by s3 event to correct format for publishing to IM registry - :param recs: dict - sqs event message + Parameters: + ---------- + recs: dict + sqs event message to transform :return: ParsedRecord Object The Parsed Record class is an avro schema generated class @@ -111,8 +91,8 @@ def transform(self, recs): fileInformation = FileInformation(name=file_name, size=file_size, checksums=[checkSum], optionalAttributes={}) # Relationship - relationshipType = RelationshipType(type=self.conf['type']) - relationship = Relationship(id=self.conf['collection_id'], type=relationshipType) + relationshipType = RelationshipType(type=self.type) + relationship = Relationship(id=self.collection_id, type=relationshipType) # File Location fileLocationType = FileLocationType(type='ARCHIVE') @@ -127,12 +107,12 @@ def transform(self, recs): publishing = Publishing(isPrivate=True) # Discovery - access_obj_uri = self.conf['access_bucket'] + "/" + s3_key + access_obj_uri = self.access_bucket + "/" + s3_key link1 = Link(linkName="Amazon S3", linkUrl=access_obj_uri, linkProtocol="HTTPS", linkFunction="download") link2 = Link(linkName="Amazon S3", linkUrl=s3_obj_uri, linkProtocol="Amazon:AWS:S3", linkFunction="download") # To Change? Come back to this later - parent_identifier = self.conf['collection_id'] - file_identifier = self.conf['file_identifier_prefix'] + file_name[:-4] + parent_identifier = self.collection_id + file_identifier = self.file_id_prefix + file_name[:-4] # Initializing most fields to their default values in the avro schema so that it doesn't cause an error in Kafka discovery = Discovery(links=[link1, link2], title=file_name, parentIdentifier=parent_identifier, diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index 7bb0fbe..60fb876 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -1,5 +1,5 @@ import logging -import yaml + import uuid import boto3 import botocore @@ -15,69 +15,70 @@ class S3Utils: Attributes ---------- - conf: yaml file - aws-util-config-dev.yml - cred: yaml file - credentials.yml - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated + access_key: str + Cloud access key + + secret_key: str + Cloud secret key + + log_level: str + The log level to use for this class (Defaults to 'INFO') + + logger: ClientLogger object + Creates logging for us to log to. Methods ------- - connect(client_type, region) - connects to a boto3 client + connect(client_type, region) + connects to a boto3 client - objectkey_exists(bucket, s3_key) - checks to see if a s3 key path exists in a particular bucket + objectkey_exists(bucket, s3_key) + checks to see if a s3 key path exists in a particular bucket - get_uuid_metadata(boto_client, bucket, s3_key) - returns metadata uuid of an s3 object if it has one, otherwise prints that one does not exist + get_uuid_metadata(boto_client, bucket, s3_key) + returns metadata uuid of an s3 object if it has one, otherwise prints that one does not exist - add_uuid_metadata(boto_client, bucket, s3_key) - adds metadata uuid to an s3 object + add_uuid_metadata(boto_client, bucket, s3_key) + adds metadata uuid to an s3 object - upload_s3(boto_client, local_file, bucket, s3_key, overwrite) - uploads a file to s3 bucket + upload_s3(boto_client, local_file, bucket, s3_key, overwrite) + uploads a file to s3 bucket - get_csv_s3(boto_client, bucket, key) - gets a csv file from s3 bucket using smart open library + get_csv_s3(boto_client, bucket, key) + gets a csv file from s3 bucket using smart open library - read_bytes_s3(boto_client, bucket, key) - returns raw information of s3 object + read_bytes_s3(boto_client, bucket, key) + returns raw information of s3 object - upload_archive(boto_client, vault_name, src_data) - Add an archive to an Amazon S3 Glacier vault. The upload occurs synchronously. + upload_archive(boto_client, vault_name, src_data) + Add an archive to an Amazon S3 Glacier vault. The upload occurs synchronously. - s3_to_glacier(boto_client, bucket_name, key) - Changes storage class of s3 object from s3 -> glacier. Utilizes s3 client type + s3_to_glacier(boto_client, bucket_name, key) + Changes storage class of s3 object from s3 -> glacier. Utilizes s3 client type - s3_to_glacier_object_lock(boto_client, bucket_name, key, object_lock_mode, object_lock_retention) - Changes storage class of s3 object from s3 -> glacier and places it in object lock mode. Utilizes s3 client type + s3_to_glacier_object_lock(boto_client, bucket_name, key, object_lock_mode, object_lock_retention) + Changes storage class of s3 object from s3 -> glacier and places it in object lock mode. Utilizes s3 client type - s3_restore(boto_client, bucket_name, key, days) - Restores an object in S3 glacier back to S3 for specified amount of days + s3_restore(boto_client, bucket_name, key, days) + Restores an object in S3 glacier back to S3 for specified amount of days - retrieve_inventory(boto_client, vault_name) - Initiate an Amazon Glacier inventory-retrieval job + retrieve_inventory(boto_client, vault_name) + Initiate an Amazon Glacier inventory-retrieval job - retrieve_inventory_results(vault_name, boto_client, job_id) - Retrieve the results of an Amazon Glacier inventory-retrieval job + retrieve_inventory_results(vault_name, boto_client, job_id) + Retrieve the results of an Amazon Glacier inventory-retrieval job """ conf = None - def __init__(self, conf_loc, cred_loc): - - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - with open(cred_loc) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False) + def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): + self.access_key = access_key + self.secret_key = secret_key + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) + if wildargs: + self.logger.error("There were extra constructor arguments: " + str(wildargs)) + def connect(self, client_type, region): """ Connects to a boto3 client @@ -92,21 +93,29 @@ def connect(self, client_type, region): """ if client_type == "s3": - boto = boto3.client("s3", aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key'], region_name=region) + boto = boto3.client( + "s3", + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, + region_name=region) if client_type == "s3_resource": - boto = boto3.resource("s3", region_name=region, aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key'] ) + boto = boto3.resource( + "s3", + region_name=region, + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key) if client_type == "glacier": - boto = boto3.client("glacier", region_name=region, aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key']) + boto = boto3.client( + "glacier", + region_name=region,aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key) if client_type == "session": boto = boto3.Session( - aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key'], + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, ) return boto diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/tests/SqsHandlersTest.py index 12323ef..bbe4210 100644 --- a/onestop-python-client/tests/SqsHandlersTest.py +++ b/onestop-python-client/tests/SqsHandlersTest.py @@ -1,7 +1,7 @@ import json import unittest import boto3 - +import yaml from moto import mock_s3 from moto import mock_sqs from tests.utils import abspath_from_relative, create_delete_message @@ -54,9 +54,22 @@ class SqsHandlerTest(unittest.TestCase): def setUp(self): print("Set it up!") + + with open(abspath_from_relative(__file__, "../config/csb-data-stream-config-template.yml")) as f: + self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../config/aws-util-config-dev.yml")) as f: + self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../config/credentials-template.yml")) as f: + self.cred = yaml.load(f, Loader=yaml.FullLoader) + self.wp = WebPublisher(self.wp_config, self.cred_config) - self.su = S3Utils(self.aws_config, self.cred_config) - self.s3ma = S3MessageAdapter(self.csb_config, self.su) + self.su = S3Utils(self.cred['sandbox']['access_key'], + self.cred['sandbox']['secret_key'], + "DEBUG") + self.s3ma = S3MessageAdapter(self.stream_conf['access_bucket'], + self.stream_conf['type'], + self.stream_conf['file_identifier_prefix'], + self.stream_conf['collection_id']) def tearDown(self): print("Tear it down!") @@ -64,19 +77,21 @@ def tearDown(self): @mock_s3 @mock_sqs def init_s3(self): - bucket = self.su.conf['s3_bucket'] - key = self.su.conf['s3_key'] + bucket = self.cloud_conf['s3_bucket'] + key = self.cloud_conf['s3_key'] boto_client = self.su.connect("s3", None) boto_client.create_bucket(Bucket=bucket) boto_client.put_object(Bucket=bucket, Key=key, Body="foobar") - sqs_client = boto3.client('sqs', region_name=self.su.conf['s3_region']) - sqs_queue = sqs_client.create_queue(QueueName=self.su.conf['sqs_name']) + sqs_client = boto3.client('sqs', region_name=self.cloud_conf['s3_region']) + sqs_queue = sqs_client.create_queue(QueueName=self.cloud_conf['sqs_name']) self.sqs = SqsConsumer(self.aws_config, self.cred_config) - message = create_delete_message(self.su.conf['s3_region'], bucket, key) + message = create_delete_message(self.cloud_conf['s3_region'], bucket, key) sqs_client.send_message(QueueUrl=sqs_queue['QueueUrl'], MessageBody=json.dumps(message)) - return sqs_queue['QueueUrl'] + sqs_queue['QueueUrl'] + @mock_s3 + @mock_sqs def delete_handler_wrapper(self, recs): handler = create_delete_handler(self.wp) result = handler(recs) @@ -85,5 +100,8 @@ def delete_handler_wrapper(self, recs): @mock_sqs def test_delete_handler(self): mock_queue_url = self.init_s3() - sqs_queue = boto3.resource('sqs', region_name=self.su.conf['s3_region']).Queue(mock_queue_url) - self.sqs.receive_messages(sqs_queue, self.su.conf['sqs_max_polls'], self.delete_handler_wrapper) + sqs_queue = boto3.resource('sqs', region_name=self.stream_conf['s3_region']).Queue(mock_queue_url) + self.sqs.receive_messages(sqs_queue, self.stream_conf['sqs_max_polls'], self.delete_handler_wrapper) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/tests/util/S3MessageAdapterTest.py index 41a8f9d..a960737 100644 --- a/onestop-python-client/tests/util/S3MessageAdapterTest.py +++ b/onestop-python-client/tests/util/S3MessageAdapterTest.py @@ -1,4 +1,6 @@ import unittest +import yaml + from moto import mock_s3 from tests.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils @@ -51,22 +53,35 @@ class S3MessageAdapterTest(unittest.TestCase): def setUp(self): print("Set it up!") - self.s3_utils = S3Utils(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"), - abspath_from_relative(__file__, "../../config/credentials-template.yml")) - self.s3ma = S3MessageAdapter(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml"), - self.s3_utils) + + with open(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml")) as f: + self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml")) as f: + self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../../config/credentials-template.yml")) as f: + self.cred = yaml.load(f, Loader=yaml.FullLoader) + + self.s3_utils = S3Utils(self.cred['sandbox']['access_key'], + self.cred['sandbox']['secret_key'], + "DEBUG") + self.s3ma = S3MessageAdapter(self.stream_conf['access_bucket'], + self.stream_conf['type'], + self.stream_conf['file_identifier_prefix'], + self.stream_conf['collection_id']) + + self.region = self.cloud_conf['s3_region'] + self.bucket = self.cloud_conf['s3_bucket'] def tearDown(self): print("Tear it down!") def test_parse_config(self): - self.assertFalse(self.s3ma.conf['collection_id']==None) - + self.assertFalse(self.stream_conf['collection_id'] == None) @mock_s3 def test_transform(self): - s3 = self.s3_utils.connect('s3', self.s3_utils.conf['s3_region']) - location = {'LocationConstraint': self.s3_utils.conf['s3_region']} + s3 = self.s3_utils.connect('s3', self.region) + location = {'LocationConstraint': self.region} bucket = 'nesdis-ncei-csb-dev' key = 'csv/file1.csv' key2 = 'csv/file2.csv' @@ -81,4 +96,14 @@ def test_transform(self): print(payload) self.assertTrue(payload!=None) + @mock_s3 + def test_extra_parameters_constructor(self): + testParams = {"access_bucket": "blah1", + "type": "blah2", + "file_id_prefix": "blah3", + "collection_id": "blah4", + "extra": "extra value"} + self.assertRaises(Exception, S3MessageAdapter(**testParams)) +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py index 34850ad..acb0af4 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/tests/util/S3UtilsTest.py @@ -1,126 +1,130 @@ import csv import unittest import uuid +import yaml + from moto import mock_s3 from moto import mock_glacier - from tests.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils class S3UtilsTest(unittest.TestCase): - su = None def setUp(self): print("Set it up!") - self.su = S3Utils(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"), - abspath_from_relative(__file__, "../../config/credentials.yml")) - def tearDown(self): - print("Tear it down!") - # Remove files from bucket + with open(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml")) as f: + self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml")) as f: + self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../../config/credentials-template.yml")) as f: + self.cred = yaml.load(f, Loader=yaml.FullLoader) - def test_parse_config(self): - self.assertFalse(self.su.conf['sqs_url']==None) + self.s3_utils = S3Utils(self.cred['sandbox']['access_key'], + self.cred['sandbox']['secret_key'], + "DEBUG") + + self.region = self.cloud_conf['s3_region'] + self.region2 = self.region + self.bucket = self.cloud_conf['s3_bucket'] @mock_s3 def test_get_uuid_metadata(self): - boto_client = self.su.connect("s3_resource", None) + boto_client = self.s3_utils.connect("s3_resource", None) s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - region = self.su.conf['s3_region'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) + + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) obj_uuid = str(uuid.uuid4()) - boto_client.Object(bucket, s3_key).put(Bucket=bucket, Key=s3_key, Body="my_body", Metadata={'object-uuid': obj_uuid}) + boto_client.Object(self.bucket, s3_key).put(Bucket=self.bucket, Key=s3_key, Body="my_body", Metadata={'object-uuid': obj_uuid}) - self.assertFalse(self.su.get_uuid_metadata(boto_client, bucket, s3_key) == None) + self.assertFalse(self.s3_utils.get_uuid_metadata(boto_client, self.bucket, s3_key) == None) @mock_s3 def test_add_uuid_metadata(self): - region = self.su.conf['s3_region'] - boto_client = self.su.connect("s3_resource", region) + boto_client = self.s3_utils.connect("s3_resource", self.region) s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - boto_client.Object(bucket, s3_key).put(Bucket=bucket, Key=s3_key, Body="my_body") + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + boto_client.Object(self.bucket, s3_key).put(Bucket=self.bucket, Key=s3_key, Body="my_body") - self.assertTrue(self.su.add_uuid_metadata(boto_client, bucket, s3_key)) + self.assertTrue(self.s3_utils.add_uuid_metadata(boto_client, self.bucket, s3_key)) @mock_s3 def test_add_file_s3(self): - boto_client = self.su.connect("s3", None) + boto_client = self.s3_utils.connect("s3", None) local_file = abspath_from_relative(__file__, "../data/file4.csv") s3_key = "csv/file4.csv" - bucket = self.su.conf['s3_bucket'] - region = self.su.conf['s3_region'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) overwrite = True - self.assertTrue(self.su.upload_s3(boto_client, local_file, bucket, s3_key, overwrite)) + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_key, overwrite)) + @mock_s3 def test_get_csv_s3(self): - boto_client = self.su.connect("session", None) + boto_session = self.s3_utils.connect("session", None) + s3 = self.s3_utils.connect('s3', self.cloud_conf['s3_region']) + location = {'LocationConstraint': self.region} s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - sm_open_file = self.su.get_csv_s3(boto_client, bucket, s3_key) + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.put_object(Bucket=self.bucket, Key=s3_key, Body="body") + + sm_open_file = self.s3_utils.get_csv_s3(boto_session, self.bucket, s3_key) # print("reading csv:" + line.decode('utf-8')) csv_reader = csv.DictReader(sm_open_file) for row in csv_reader: print(str(row["LON"])) + @mock_s3 def test_read_bytes_s3(self): - boto_client = self.su.connect("s3", None) + boto_client = self.s3_utils.connect("s3", None) s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - self.assertTrue(self.su.read_bytes_s3(boto_client, bucket, s3_key)) + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) + boto_client.put_object(Bucket=self.bucket, Key=s3_key, Body="body") + + self.assertTrue(self.s3_utils.read_bytes_s3(boto_client, self.bucket, s3_key)) @mock_s3 def test_add_files(self): - boto_client = self.su.connect("s3", None) + boto_client = self.s3_utils.connect("s3", None) local_files = ["file1_s3.csv", "file2.csv", "file3.csv"] - bucket = self.su.conf['s3_bucket'] - region = self.su.conf['s3_region'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) overwrite = True - s3_file = None + for file in local_files: local_file = abspath_from_relative(__file__, "../data/" + file) s3_file = "csv/" + file - self.assertTrue(self.su.upload_s3(boto_client, local_file, bucket, s3_file, overwrite)) + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_file, overwrite)) @mock_s3 @mock_glacier def test_s3_cross_region(self): print('Cross Region Vault Upload ------------- ') key = "csv/file1.csv" - # grabs te region and bucket name from the config file - region = self.su.conf['s3_region'] - bucket = self.su.conf['s3_bucket'] # makes connection to low level s3 client - s3 = self.su.connect('s3', region) - location = {'LocationConstraint': region} - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.put_object(Bucket=bucket, Key=key, Body="body") + s3 = self.s3_utils.connect('s3', self.region) + location = {'LocationConstraint': self.region} + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.put_object(Bucket=self.bucket, Key=key, Body="body") # Reads object data and stores it into a variable - file_data = self.su.read_bytes_s3(s3, bucket, key) + file_data = self.s3_utils.read_bytes_s3(s3, self.bucket, key) # Redirecting upload to vault in second region - glacier = self.su.connect("glacier", self.su.conf['s3_region2']) - vault_name = self.su.conf['vault_name'] + glacier = self.s3_utils.connect("glacier", self.region2) + vault_name = self.cloud_conf['vault_name'] glacier.create_vault(vaultName=vault_name) print('vault name: ' + str(vault_name)) - print('region name: ' + str(self.su.conf['s3_region2'])) + print('region name: ' + str(self.region2)) print('-------file data---------') print(file_data) - response = self.su.upload_archive(glacier, vault_name, file_data) + response = self.s3_utils.upload_archive(glacier, vault_name, file_data) self.assertTrue(response['archiveId']!=None) @@ -134,18 +138,15 @@ def test_s3_to_glacier(self): print("S3 to Glacier---------") key = "csv/file1_s3.csv" - # grabs te region and bucket name from the config file - region = self.su.conf['s3_region'] - bucket = self.su.conf['s3_bucket'] # Create boto3 low level api connection - s3 = self.su.connect('s3', region) - location = {'LocationConstraint': region} - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.put_object(Bucket=bucket, Key=key, Body="body") + s3 = self.s3_utils.connect('s3', self.region) + location = {'LocationConstraint': self.region} + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.put_object(Bucket=self.bucket, Key=key, Body="body") # Using the S3 util class invoke the change of storage class - response = self.su.s3_to_glacier(s3, bucket, key) + response = self.s3_utils.s3_to_glacier(s3, self.bucket, key) print(response['ResponseMetadata']['HTTPHeaders']['x-amz-storage-class']) # Assert 'x-amz-storage-class': 'GLACIER' @@ -157,18 +158,16 @@ def test_s3_restore(self): Uses high level api to restore object from glacier to s3 """ - region = self.su.conf['s3_region2'] - bucket = self.su.conf['s3_bucket'] key = "csv/file1_s3.csv" days = 3 # use high level api - s3 = self.su.connect('s3_resource', region) - location = {'LocationConstraint': region} - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.Object(bucket, key).put(Bucket=bucket, Key=key, Body="body") + s3 = self.s3_utils.connect('s3_resource', self.region2) + location = {'LocationConstraint': self.region2} + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.Object(self.bucket, key).put(Bucket=self.bucket, Key=key, Body="body") - self.assertTrue(self.su.s3_restore(s3, bucket, key, days) != None) + self.assertTrue(self.s3_utils.s3_restore(s3, self.bucket, key, days) != None) @mock_glacier def test_retrieve_inventory(self): @@ -178,12 +177,12 @@ def test_retrieve_inventory(self): # Using glacier api initiates job and returns archive results # Connect to your glacier vault for retrieval - glacier = self.su.connect("glacier", self.su.conf['s3_region2']) - vault_name = self.su.conf['vault_name'] + glacier = self.s3_utils.connect("glacier", self.region2) + vault_name = self.cloud_conf['vault_name'] glacier.create_vault(vaultName=vault_name) - response = self.su.retrieve_inventory(glacier, vault_name) + response = self.s3_utils.retrieve_inventory(glacier, vault_name) self.assertTrue(response['jobId']!= None) ''' @@ -203,7 +202,13 @@ def test_retrieve_inventory_results(self, jobid): self.assertTrue(inventory != None) ''' - + @mock_s3 + def test_extra_parameters_constructor(self): + testParams = {"access_key": "blah", + "secret_key": "blah", + "log_level": "DEBUG", + "extra": "extra value"} + self.assertRaises(Exception, S3Utils(**testParams)) if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/scripts/launch_e2e.py b/scripts/launch_e2e.py index 2d5b79b..6d60b2c 100644 --- a/scripts/launch_e2e.py +++ b/scripts/launch_e2e.py @@ -1,6 +1,8 @@ import argparse import json import os +import yaml + from onestop.util.SqsConsumer import SqsConsumer from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter @@ -55,8 +57,8 @@ def handler(recs): # Upload to archive file_data = s3_utils.read_bytes_s3(s3_client, bucket, s3_key) - glacier = s3_utils.connect("glacier", s3_utils.conf['s3_region']) - vault_name = s3_utils.conf['vault_name'] + glacier = s3_utils.connect("glacier", cloud_conf['s3_region']) + vault_name = cloud_conf['vault_name'] resp_dict = s3_utils.upload_archive(glacier, vault_name, file_data) @@ -106,9 +108,9 @@ def handler(recs): # High-level api s3_resource = s3_utils.connect("s3_resource", None) - bucket = s3_utils.conf['s3_bucket'] + bucket = cloud_conf['s3_bucket'] overwrite = True - sqs_max_polls = s3_utils.conf['sqs_max_polls'] + sqs_max_polls = cloud_conf['sqs_max_polls'] # Add 3 files to bucket local_files = ["file1.csv", "file4.csv"] s3_file = None @@ -141,18 +143,35 @@ def handler(recs): # Get configuration file path locations conf_loc = args.pop('conf') cred_loc = args.pop('cred') + stream_conf_loc = args.pop('cred') - # Upload a test file to s3 bucket - s3_utils = S3Utils(conf_loc, cred_loc) + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), cred_loc))) as f: + cred = yaml.load(f, Loader=yaml.FullLoader) + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), conf_loc))) as f: + cloud_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), stream_conf_loc))) as f: + stream_conf = yaml.load(f, Loader=yaml.FullLoader) - # Low-level api ? Can we just use high level revisit me! - s3_client = s3_utils.connect("s3", None) + s3_utils = S3Utils(cred['sandbox']['access_key'], + cred['sandbox']['secret_key'], + "DEBUG") - bucket = s3_utils.conf['s3_bucket'] + bucket = cloud_conf['s3_bucket'] + sqs_max_polls = cloud_conf['sqs_max_polls'] - sqs_max_polls = s3_utils.conf['sqs_max_polls'] + #Source + access_bucket = stream_conf['access_bucket'] - # Add 3 files to bucket + #Onestop related + file_id_prefix = stream_conf['file_identifier_prefix'] + file_format = stream_conf['format'] + headers = stream_conf['headers'] + type = stream_conf['type'] + + # Low-level api ? Can we just use high level revisit me! + s3_client = s3_utils.connect("s3", None) + + # Upload test files to s3 bucket local_files = ["file1.csv", "file4.csv"] s3_file = None for file in local_files: @@ -162,9 +181,11 @@ def handler(recs): if not s3_utils.upload_s3(s3_client, local_file, bucket, s3_file, True): exit("Error setting up for e2e: The test files were not uploaded to the s3 bucket therefore the tests cannot continue.") + + # Receive s3 message and MVM from SQS queue sqs_consumer = SqsConsumer(conf_loc, cred_loc) - s3ma = S3MessageAdapter("config/csb-data-stream-config.yml", s3_utils) + s3ma = S3MessageAdapter(access_bucket, headers, type, file_id_prefix, "DEBUG") wp = WebPublisher("config/web-publisher-config-dev.yml", cred_loc) queue = sqs_consumer.connect() diff --git a/scripts/launch_pyconsumer.py b/scripts/launch_pyconsumer.py index f9dbcf6..7850f38 100644 --- a/scripts/launch_pyconsumer.py +++ b/scripts/launch_pyconsumer.py @@ -1,4 +1,6 @@ import os +import yaml + from onestop.util.SqsConsumer import SqsConsumer from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter @@ -49,6 +51,10 @@ def handler(recs): if __name__ == '__main__': conf_loc = "/etc/config/config.yml" cred_loc = "creds.yml" + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "creds.yml"))) as f: + cred = yaml.load(f, Loader=yaml.FullLoader) + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "/etc/config/config.yml"))) as f: + conf = yaml.load(f, Loader=yaml.FullLoader) registry_user = os.environ.get("REGISTRY_USERNAME") registry_pwd = os.environ.get("REGISTRY_PASSWORD") @@ -71,8 +77,10 @@ def handler(recs): r = open(cred_loc, "r") # # Receive s3 message and MVM from SQS queue - s3_utils = S3Utils(conf_loc, cred_loc) - sqs_max_polls = s3_utils.conf['sqs_max_polls'] + s3_utils = S3Utils(cred['sandbox']['access_key'], + cred['sandbox']['secret_key'], + "DEBUG") + sqs_max_polls = conf['sqs_max_polls'] sqs_consumer = SqsConsumer(conf_loc, cred_loc) queue = sqs_consumer.connect() From 47d9d335752ac6169849547d941e75958f94ddc7 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 15 Apr 2021 17:05:13 -0600 Subject: [PATCH 02/49] 1500-WebPublisher adjusted some documentation wording and added test_WebPublisher_unit as unit test until create folder structure for integration vs unit tests. --- onestop-python-client/onestop/WebPublisher.py | 28 ++-- .../tests/test_WebPublisher_unit.py | 145 ++++++++++++++++++ 2 files changed, 159 insertions(+), 14 deletions(-) create mode 100644 onestop-python-client/tests/test_WebPublisher_unit.py diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index 55ca06c..d944f8f 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -8,28 +8,28 @@ class WebPublisher: Attributes ---------- registry_base_url: str - url for registry endpoint + URL for registry endpoint registry_username: str - username for posting metadata to registry + Registry username where credentials needed registry_password: str - password for posting metadata to registry + Registry password where credentials needed onestop_base_url: str - url for onestop endpoint + URL for OneStop endpoint logger.info: str logging level Methods ------- publish_registry(metadata_type, uuid, payload, method) - Publish to registry with either POST,PUT, OR PATCH methods + Publish an item to registry with either POST, PUT, OR PATCH methods delete_registry(metadata_type, uuid) - Deletes item from registry + Delete an item from registry search_registry(metadata_type, uuid) - Searches for an item in registry given its metadata type and uuid + Search for an item in registry given its metadata type and uuid search_onestop(metadata_type, payload) - Acquires the item, collection or granule, from OneStop + Search for an item in OneStop given its metadata type and payload search criteria get_granules_onestop(self, uuid) - Acquires granules from OneStop given the uuid + Search for a granule in OneStop given its uuid """ conf = None @@ -84,12 +84,12 @@ def publish_registry(self, metadata_type, uuid, payload, method): def delete_registry(self, metadata_type, uuid): """ - Deletes item from registry + Delete an item from registry :param metadata_type: str metadata type (GRANULE/COLLECTION) :param uuid: str - uuid you want to publish with + uuid you want to delete :return: str response message indicating if delete was successful @@ -105,7 +105,7 @@ def delete_registry(self, metadata_type, uuid): def search_registry(self, metadata_type, uuid): """ - Searches for an item in registry given its metadata type and uuid + Search for an item in registry given its metadata type and uuid :param metadata_type: str metadata type (GRANULE/COLLECTION) @@ -126,7 +126,7 @@ def search_registry(self, metadata_type, uuid): def search_onestop(self, metadata_type, payload): """ - Searches for an item in OneStop given its metadata type and payload search criteria. + Search for an item in OneStop given its metadata type and payload search criteria. :param metadata_type: str metadata type (GRANULE/COLLECTION) @@ -147,7 +147,7 @@ def search_onestop(self, metadata_type, payload): def get_granules_onestop(self, uuid): """ - Searches for a granule in OneStop given its uuid + Search for a granule in OneStop given its uuid :param uuid: str uuid you want search for diff --git a/onestop-python-client/tests/test_WebPublisher_unit.py b/onestop-python-client/tests/test_WebPublisher_unit.py new file mode 100644 index 0000000..3e987fb --- /dev/null +++ b/onestop-python-client/tests/test_WebPublisher_unit.py @@ -0,0 +1,145 @@ +import json +import unittest + +from unittest.mock import ANY +from unittest import mock +from moto import mock_s3 +from onestop.WebPublisher import WebPublisher + +class WebPublisherTest(unittest.TestCase): + username="admin" + password="a_password" + uuid = "9f0a5ff2-fcc0-5bcb-a225-024b669c9bba" + registry_base_url = "https://localhost/onestop/api/registry" + registry_full_url_granule = registry_base_url + "/metadata/granule/" + uuid + registry_full_url_collection = registry_base_url + "/metadata/collection/" + uuid + onestop_base_url = "https://localhost/onestop/api/search" + + payloadDict = { + "fileInformation": { + "name": "file2.csv", + "size": 1385, + "checksums": [{ + "algorithm": "MD5", + "value": "44d2452e8bc2c8013e9c673086fbab7a" + }] + }, + "relationships": [ + {"type": "COLLECTION", + "id": "fdb56230-87f4-49f2-ab83-104cfd073177" + } + ], + "fileLocations": { + "nesdis-ncei-csb-dev/csv/file2.csv": { + "uri": "https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com/csv/file2.csv", + "type": "ACCESS", + "restricted": False, + "serviceType": "HTTPS", + "asynchronous": False + } + }, + "discovery": { + "title": "file2.csv", + "parentIdentifier": "fdb56230-87f4-49f2-ab83-104cfd073177", + "fileIdentifier": "gov.noaa.ncei.csb:file2" + } + } + + addlocDict = { + "fileLocations": { + "Crt3a-Hq2SGUp8n8QSRNpFIf59kmMONqaKlJ_7-Igd8ijMM62deLdtVkiYwlaePbC4JNCsfeg5i-DWDmwxLIx9V-OGgiQp_CZ0rEFXIZxM_ZPyGu7TTv8wwos5SvAI6xDURhzoCH-w": { + "uri": "/282856304593/vaults/noaa-nesdis-ncei-vault-test/archives/Crt3a-Hq2SGUp8n8QSRNpFIf59kmMONqaKlJ_7-Igd8ijMM62deLdtVkiYwlaePbC4JNCsfeg5i-DWDmwxLIx9V-OGgiQp_CZ0rEFXIZxM_ZPyGu7TTv8wwos5SvAI6xDURhzoCH-w", + "type": "ACCESS", + "restricted": True, + "serviceType": "Amazon:AWS:Glacier", + "asynchronous": True + } + } + } + + + def setUp(self): + print("Set it up!") + + self.wp = WebPublisher(self.registry_base_url, + self.username, + self.password, + self.onestop_base_url, + 'DEBUG') + + def tearDown(self): + print("Tear it down!") + + def mocked_requests_patch(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + print ("args: "+str(args)+" kwargs: "+str(kwargs)) + + return MockResponse({"key1":"value1"}, 200) + + @mock_s3 + @mock.patch('requests.post', side_effect=mocked_requests_patch) + def test_publish(self, mock_get): + payload = json.dumps(self.payloadDict) + self.wp.publish_registry("granule", self.uuid, payload, "POST") + + mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.put', side_effect=mocked_requests_patch) + def test_publish(self, mock_get): + payload = json.dumps(self.payloadDict) + self.wp.publish_registry("granule", self.uuid, payload, "PUT") + + mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.patch', side_effect=mocked_requests_patch) + def test_add_glacier_location(self, mock_get): + payload = json.dumps(self.addlocDict) + self.wp.publish_registry("granule", self.uuid, payload, "PATCH") + + mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.delete', side_effect=mocked_requests_patch) + def test_delete_registry_granule(self, mock_get): + self.wp.delete_registry("granule", self.uuid) + + mock_get.assert_called_with(url = self.registry_full_url_granule, headers = ANY, auth = ANY, verify = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.delete', side_effect=mocked_requests_patch) + def test_delete_registry_collection(self, mock_get): + self.wp.delete_registry("collection", self.uuid) + + mock_get.assert_called_with(url = self.registry_full_url_collection, headers = ANY, auth = ANY, verify = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 85a9096d5415bd606934c9d00c7a69b0722f764d Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 15 Apr 2021 17:17:05 -0600 Subject: [PATCH 03/49] 1500-Adjusted documentation indentation in WebPublisher --- onestop-python-client/onestop/WebPublisher.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index d944f8f..75ee99f 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -7,29 +7,29 @@ class WebPublisher: Attributes ---------- - registry_base_url: str - URL for registry endpoint - registry_username: str - Registry username where credentials needed - registry_password: str - Registry password where credentials needed - onestop_base_url: str - URL for OneStop endpoint - logger.info: str - logging level + registry_base_url: str + URL for registry endpoint + registry_username: str + Registry username where credentials needed + registry_password: str + Registry password where credentials needed + onestop_base_url: str + URL for OneStop endpoint + logger.info: str + logging level Methods ------- - publish_registry(metadata_type, uuid, payload, method) - Publish an item to registry with either POST, PUT, OR PATCH methods - delete_registry(metadata_type, uuid) - Delete an item from registry - search_registry(metadata_type, uuid) - Search for an item in registry given its metadata type and uuid - search_onestop(metadata_type, payload) - Search for an item in OneStop given its metadata type and payload search criteria - get_granules_onestop(self, uuid) - Search for a granule in OneStop given its uuid + publish_registry(metadata_type, uuid, payload, method) + Publish an item to registry with either POST, PUT, OR PATCH methods + delete_registry(metadata_type, uuid) + Delete an item from registry + search_registry(metadata_type, uuid) + Search for an item in registry given its metadata type and uuid + search_onestop(metadata_type, payload) + Search for an item in OneStop given its metadata type and payload search criteria + get_granules_onestop(self, uuid) + Search for a granule in OneStop given its uuid """ conf = None From 85ada2290c305218bbe93cae1cccb3e7c622b7e2 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 16 Apr 2021 15:52:18 -0600 Subject: [PATCH 04/49] 1500-Changed CsbExtractor class constructor(adjusted documentation) to take dictionary with extra parameters allowed as well as methods within this class not to reference config but the variable that was set. Adjusted effected tests. --- .../onestop/extract/CsbExtractor.py | 127 ++++++------------ .../tests/extractor/CsbExtractorTest.py | 85 +++++++----- 2 files changed, 98 insertions(+), 114 deletions(-) diff --git a/onestop-python-client/onestop/extract/CsbExtractor.py b/onestop-python-client/onestop/extract/CsbExtractor.py index e79cddc..b1006cb 100644 --- a/onestop-python-client/onestop/extract/CsbExtractor.py +++ b/onestop-python-client/onestop/extract/CsbExtractor.py @@ -2,61 +2,33 @@ from datetime import datetime class CsbExtractor: + """ A class used to extract geospatial data from csv files in an s3 bucket - Attributes - ---------- - su : S3 Utils object - an instance of the s3 utils class used to connect to the corresponding s3 bucket to get access to the csv file for extraction - boto_client: boto3 client - specific boto3 client type (s3, s3_resource, glacier, session) used to access aws resources - bucket: str - the name of the s3 bucket in which you want to access - key: str - the name of key path for the specific item you want to access in the bucket - - Methods ------- is_csv(file_name) - checks to see if the given file is of type csv + Verifies a file name ends with '.csv' get_spatial_temporal_bounds(lon_column_name, lat_column_name, date_column_name) - extracts min/max longitude and latitude values as well as beginning and ending dates from specified csv file + Gets the spacial bounding box for the open file. This seeks to the start of the file at start and the end. extract_coords(max_lon, max_lat, min_lon, min_lat) - extracts specific coordinates corresponding to min/max longitude and latitude values given from get_spatial_temporal_bounds(....) method + Given the max/min lon and lat, the function will parse the csv file to extract the coordinates within the given bounding box. """ - def __init__(self, su, key): - """ - :param su: S3 Utils object - an instance of the s3 utils class used to connect to the corresponding s3 bucket to get access to the csv file for extraction - :param key: str - the name of key path for the specific item you want to access in the bucket - - Other Attributes - ________________ - boto_client: boto3 client - specific boto3 client type (s3, s3_resource, glacier, session) used to access aws resources - bucket: str - the name of the s3 bucket in which you want to access + @staticmethod + def is_csv(file_name): """ - self.su = su - boto_client = self.su.connect("session", None) - bucket = self.su.conf['s3_bucket'] - self.key = key - - def is_csv(self, file_name): - """ - Checks to see if the given file is of type csv + Verifies a file name ends with '.csv' :param file_name: str - the name of the file in the s3 bucket i.e. file1.csv + File name with extension on the end. - :return: boolean - True if the file name contains .csv and False otherwise + :return: str + True if ends with csv + False if doesn't end with csv """ csv_str = '.csv' if file_name.endswith(csv_str): @@ -64,28 +36,22 @@ def is_csv(self, file_name): return False - # def smart_open_read(self, key): - # boto_client = self.su.connect("session", None) - # bucket = self.su.conf['s3_bucket'] - # self.su.read_csv_s3(boto_client, bucket, key) - - - def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_column_name): + @staticmethod + def get_spatial_temporal_bounds(sm_open_file, lon_column_name, lat_column_name, date_column_name): """ - Extracts min/max longitude and latitude values as well as beginning and ending dates from specified csv file + Gets the spacial bounding box for the open file. This seeks to the start of the file at start and the end. + :param sm_open_file: file-like object + A file-like object that is open, say from smart_open's sm_open. :param lon_column_name: str - name of longitude column in the csv file + Longitude column name :param lat_column_name: str - name of the latitude column in the csv file + Latitude column name :param date_column_name: str - name of the date column in the csv file + Date column name :return: dict - Key : Value - geospatial (str) -> List[float] containing min/max longitude and latitude values - temporal (str) -> List[str] containing beginning and end dates - + geospatial and temporal fields of the bounding box for given constraints. """ lon_min_val = None lon_max_val = None @@ -99,9 +65,7 @@ def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_col # variable to be returned in string format begin_date_str = '' - boto_client = self.su.connect("session", None) - bucket = self.su.conf['s3_bucket'] - sm_open_file = self.su.get_csv_s3(boto_client, bucket, self.key) + sm_open_file.seek(0) csv_reader = csv.DictReader(sm_open_file) for row in csv_reader: @@ -151,43 +115,40 @@ def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_col "temporal": [begin_date_str, end_date_str] } + sm_open_file.seek(0) return geospatial_temporal_bounds - - def extract_coords(self, max_lon, max_lat, min_lon, min_lat): + @staticmethod + def extract_coords(sm_open_file, max_lon, max_lat, min_lon, min_lat): """ - Extracts specific coordinates corresponding to min/max longitude and latitude values given from get_spatial_temporal_bounds(....) method - - :param max_lon: float - maximum longitude value - :param max_lat: float - maximum latitude value - :param min_lon: float - minimum longitude value - :param min_lat: float - minimum latitude value - - :return: List[ List[Float] ] - Returns a list of lists. Each list contains floats (longitude and latitude ) value pairs corresponding to - one of the min/max latitude and longitude values that were extracted previously from get_spatial_temporal_bounds (...) + Given the max/min lon and lat, the function will parse the csv file to extract the coordinates within the given bounding box. + + :param sm_open_file: file-like object + A file-like object that is open, say from smart_open's sm_open. + :param max_lon: str + Maximum longitude + :param max_lat: str + Maximum latitude + :param min_lon: str + Minimum longitude + :param min_lat: str + Minimum latitude + + :return: list + List of the the coordinates (no duplicates) within the file that are within the given bounding box. """ - # Keeps track of all coordinates that needs to be added to json payload coords = [] - boto_client = self.su.connect("session", None) - bucket = self.su.conf['s3_bucket'] - sm_open_file = self.su.get_csv_s3(boto_client, bucket, self.key) + sm_open_file.seek(0) csv_reader = csv.DictReader(sm_open_file) - for row in csv_reader: - if float( row['LAT'] ) == min_lat or float( row['LAT'] ) == max_lat or float( - row['LON'] ) == min_lon or float( row['LON'] ) == max_lon: + if float( row['LAT'] ) == min_lat or float( row['LAT'] ) == max_lat or \ + float( row['LON'] ) == min_lon or float( row['LON'] ) == max_lon: coord = [float( row['LON'] ), float( row['LAT'] )] - - # check to see if that coordinate has already been appended to the list that is keeping track of our coordinates + # if this coordinate has already been appended to the list to return (no duplicates) if coord not in coords: coords.append( coord ) + sm_open_file.seek(0) return coords - diff --git a/onestop-python-client/tests/extractor/CsbExtractorTest.py b/onestop-python-client/tests/extractor/CsbExtractorTest.py index 7dbbc9e..72bdbcc 100644 --- a/onestop-python-client/tests/extractor/CsbExtractorTest.py +++ b/onestop-python-client/tests/extractor/CsbExtractorTest.py @@ -1,35 +1,53 @@ import unittest +import os + +from moto import mock_s3 from onestop.extract.CsbExtractor import CsbExtractor from onestop.util.S3Utils import S3Utils -from tests.utils import abspath_from_relative - class CsbExtractorTest(unittest.TestCase): - # def setUp(self): - # print("Set it up!") - # file_name = '../data/file4.csv' - # self.csb_extractor = CsbExtractor(file_name) - def setUp(self): print("Set it up!") - key = "public/NESDIS/CSB/file4.csv" - self.su = S3Utils( abspath_from_relative( __file__, "../../config/aws-util-config-dev.yml" ), - abspath_from_relative(__file__, "../../config/credentials.yml") ) - self.csb_extractor = CsbExtractor(self.su, key) + self.root_proj_path = os.getcwd() + self.assertIsNotNone(self.root_proj_path) + self.key = "tests/data/file4.csv" + # Use open instead of our methodfor simplicity and reliability, plus not testing our code here. + self.file_obj = open(self.root_proj_path + '/' + self.key) + + config_dict = { + "access_key": "test_access_key", + "secret_key": "test_secret_key", + "log_level": "DEBUG" + } + + self.s3_utils = S3Utils(**config_dict) + self.bucket = "bucket" + self.region = "region" def tearDown(self): print("Tear it down!") + self.file_obj.close() def test_is_csv(self): - csv_str = '.csv' - self.assertTrue(self.csb_extractor.is_csv(self.csb_extractor.file_name)) + self.assertTrue(CsbExtractor.is_csv("test/blah/file.csv"), "Failed to determine a csv file name was a csv file.") + def test_is_not_csv(self): + self.assertFalse(CsbExtractor.is_csv("test/blah/file.txt"), "Failed to determine a csv file name was not a csv file.") - def test_get_geospatial_temporal_bounds(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') + @mock_s3 + def test_csb_SME_user_path(self): + # Setup bucket and file to read + s3 = self.s3_utils.connect('s3', self.region) + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) + self.s3_utils.upload_s3(s3, self.root_proj_path + '/' + self.key, self.bucket, self.key, True) + self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key)) + + # This is how we would expect an external user to get the file. + sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect("session", None), self.bucket, self.key) + + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME') coords = bounds_dict["geospatial"] - print(str(coords)) self.assertEqual(coords[0], -96.847995) self.assertEqual(coords[1], 29.373065) self.assertEqual(coords[2], -92.747995) @@ -39,38 +57,43 @@ def test_get_geospatial_temporal_bounds(self): self.assertEqual(date_rng[0], '2018-04-10T14:00:06.000Z' ) self.assertEqual(date_rng[1], '2020-04-10T14:00:06.000Z' ) + def test_get_geospatial_temporal_bounds(self): + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + + coords = bounds_dict["geospatial"] + self.assertEqual(coords[0], -96.847995) + self.assertEqual(coords[1], 29.373065) + self.assertEqual(coords[2], -92.747995) + self.assertEqual(coords[3], 33.373065) + + date_rng = bounds_dict["temporal"] + self.assertEqual(date_rng[0], '2018-04-10T14:00:06.000Z' ) + self.assertEqual(date_rng[1], '2020-04-10T14:00:06.000Z' ) def test_get_min_lon(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + coords = bounds_dict["geospatial"] min_lon = coords[0] self.assertEqual(min_lon, -96.847995) - def test_get_max_datetime(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') + + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + date_rng = bounds_dict["temporal"] end_date = date_rng[1] self.assertEqual(end_date, '2020-04-10T14:00:06.000Z') - def test_get_min_datetime(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + date_rng = bounds_dict["temporal"] begin_date = date_rng[0] self.assertEqual(begin_date, '2018-04-10T14:00:06.000Z') - def test_extract_coords(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') - coords = bounds_dict["geospatial"] - - min_lon = coords[0] - min_lat = coords[1] - max_lon = coords[2] - max_lat = coords[3] - - coords = self.csb_extractor.extract_coords(max_lon, max_lat, min_lon, min_lat) + coords = CsbExtractor.extract_coords(self.file_obj, -92.747995, 33.373065, -96.847995, 29.373065) result = [[ -94.847995, 29.373065 From 15cfaa33a1e1a3fe3b0e8360de01f5483f5bec7d Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 19 Apr 2021 13:28:40 -0600 Subject: [PATCH 05/49] 1500-Added unit tests for WebPublisher. Made sure using autospec=True --- onestop-python-client/tests/test_WebPublisher_unit.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/onestop-python-client/tests/test_WebPublisher_unit.py b/onestop-python-client/tests/test_WebPublisher_unit.py index 3e987fb..4a97f80 100644 --- a/onestop-python-client/tests/test_WebPublisher_unit.py +++ b/onestop-python-client/tests/test_WebPublisher_unit.py @@ -84,7 +84,7 @@ def json(self): return MockResponse({"key1":"value1"}, 200) @mock_s3 - @mock.patch('requests.post', side_effect=mocked_requests_patch) + @mock.patch('requests.post', side_effect=mocked_requests_patch, autospec=True) def test_publish(self, mock_get): payload = json.dumps(self.payloadDict) self.wp.publish_registry("granule", self.uuid, payload, "POST") @@ -96,7 +96,7 @@ def test_publish(self, mock_get): mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) @mock_s3 - @mock.patch('requests.put', side_effect=mocked_requests_patch) + @mock.patch('requests.put', side_effect=mocked_requests_patch, autospec=True) def test_publish(self, mock_get): payload = json.dumps(self.payloadDict) self.wp.publish_registry("granule", self.uuid, payload, "PUT") @@ -108,7 +108,7 @@ def test_publish(self, mock_get): mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) @mock_s3 - @mock.patch('requests.patch', side_effect=mocked_requests_patch) + @mock.patch('requests.patch', side_effect=mocked_requests_patch, autospec=True) def test_add_glacier_location(self, mock_get): payload = json.dumps(self.addlocDict) self.wp.publish_registry("granule", self.uuid, payload, "PATCH") @@ -120,7 +120,7 @@ def test_add_glacier_location(self, mock_get): mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) @mock_s3 - @mock.patch('requests.delete', side_effect=mocked_requests_patch) + @mock.patch('requests.delete', side_effect=mocked_requests_patch, autospec=True) def test_delete_registry_granule(self, mock_get): self.wp.delete_registry("granule", self.uuid) @@ -131,7 +131,7 @@ def test_delete_registry_granule(self, mock_get): mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) @mock_s3 - @mock.patch('requests.delete', side_effect=mocked_requests_patch) + @mock.patch('requests.delete', side_effect=mocked_requests_patch, autospec=True) def test_delete_registry_collection(self, mock_get): self.wp.delete_registry("collection", self.uuid) From 1e629ab31e343e52c3fc93f839b26dc566e710a0 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 10:08:53 -0600 Subject: [PATCH 06/49] 1500-Changed KafkaConsumer class constructors(adjusted documentation) to take dictionary with extra parameters allowed as well as methods within this class not to reference config but the variable that was set. Adjusted effected tests. Removed get_logger method as it wasn't used and we used a different logger then. Added checks for if security wasn't enabled. --- .../onestop/KafkaConsumer.py | 182 ++++++------ .../tests/KafkaConsumerTest.py | 264 ++++++++++++++++++ scripts/sme/smeFunc.py | 2 +- 3 files changed, 349 insertions(+), 99 deletions(-) create mode 100644 onestop-python-client/tests/KafkaConsumerTest.py diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index e45d6cc..a3d1e95 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -1,11 +1,9 @@ -import logging -import yaml - from confluent_kafka.schema_registry import SchemaRegistryClient from confluent_kafka.error import KafkaError from confluent_kafka import DeserializingConsumer from confluent_kafka.schema_registry.avro import AvroDeserializer from confluent_kafka.serialization import StringDeserializer +from onestop.util.ClientLogger import ClientLogger class KafkaConsumer: """ @@ -13,109 +11,97 @@ class KafkaConsumer: Attributes ---------- - conf: yaml file - kafka-publisher-config-dev.yml - logger: Logger object - utilizes python logger library and creates logging for our specific needs - logger.info: Logger object - logging statement that occurs when the class is instantiated - metadata_type: str - type of metadata (COLLECTION or GRANULE) - brokers: str - brokers (kubernetes service) - group_id: str - Client group id string. All clients sharing the same group.id belong to the same group - auto_offset_reset: str - Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) - schema_registry: str - schema registry (kubernetes service) - security: boolean - defines if security is in place - collection_topic: str - collection topic you want to consume - granule_topic: str - granule topic you want to consume + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + group_id: str + Client group id string. All clients sharing the same group.id belong to the same group + auto_offset_reset: str + Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) + schema_registry: str + schema registry (kubernetes service) + security_enabled: boolean + Whether to use security for the kafka schema registry client. + security_caLoc: str + Kafka schema registry certification authority (CA) file location. + security_keyLoc: str + Kafka schema registry client's private key file location. + security_certLoc: str + Kafka schema registry client's public key file location. + collection_topic_consume: str + collection topic you want to consume + granule_topic_consume: str + granule topic you want to consume + logger: Logger object + utilizes python logger library and creates logging for our specific needs Methods ------- - get_logger(log_name, create_file) - creates logger file - - register_client() - registers to schema registry client based on configs + register_client() + registers to schema registry client based on configs - create_consumer(registry_client) - subscribes to topic defined in configs and creates a consumer to deserialize messages from topic + connect() + utilizes register_client() and create_consumer(registry_client) to connect to schema registry and allow for consumption of topics - connect() - utilizes register_client() and create_consumer(registry_client) to connect to schema registry and allow for consumption of topics + create_consumer(registry_client) + subscribes to topic defined in configs and creates a consumer to deserialize messages from topic - consume(metadata_consumer, handler) - asynchronously polls for messages in the connected topic, results vary depending on the handler function that is passed into it + consume(metadata_consumer, handler) + asynchronously polls for messages in the connected topic, results vary depending on the handler function that is passed into it """ - conf = None - - def __init__(self, conf_loc): - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = self.get_logger(self.__class__.__name__, False) - self.logger.info("Initializing " + self.__class__.__name__) - self.metadata_type = self.conf['metadata_type'] - self.brokers = self.conf['brokers'] - self.group_id = self.conf['group_id'] - self.auto_offset_reset = self.conf['auto_offset_reset'] - self.schema_registry = self.conf['schema_registry'] - self.security = self.conf['security']['enabled'] - - self.collection_topic = self.conf['collection_topic_consume'] - self.granule_topic = self.conf['granule_topic_consume'] - if self.metadata_type not in ['COLLECTION', 'GRANULE']: - raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - - def get_logger(self, log_name, create_file): + def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_registry, security, collection_topic_consume, granule_topic_consume, log_level = 'INFO', **wildargs): """ - Utilizes python logger library and creates logging - - :param log_name: str - name of log to be created - :param create_file: boolean - defines whether of not you want a logger file to be created - - :return: Logger object + Attributes + ---------- + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + group_id: str + Client group id string. All clients sharing the same group.id belong to the same group + auto_offset_reset: str + Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) + schema_registry: str + schema registry (kubernetes service) URL + security: dict + enabled boolean: Whether to use security for kafka schema registry client. + caLoc str: Kafka schema registry certification authority (CA) file location. + keyLoc str: Kafka schema registry client's private key file location. + certLoc str: Kafka schema registry client's public key file location. + + collection_topic_consume: str + collection topic you want to consume + granule_topic_consume: str + granule topic you want to consume + log_level: str + What log level to use for this class """ - # create logger - log = logging.getLogger() - - # create formatter and add it to the handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + self.metadata_type = metadata_type + self.brokers = brokers + self.group_id = group_id + self.auto_offset_reset = auto_offset_reset + self.schema_registry = schema_registry + self.security_enabled = security['enabled'] - if self.conf['log_level'] == "DEBUG": - log.setLevel(level=logging.DEBUG) - else: - if self.conf['log_level'] == "INFO": - log.setLevel(level=logging.INFO) - else: - log.setLevel(level=logging.ERROR) + if self.security_enabled: + self.security_caLoc = security['caLoc'] + self.security_keyLoc = security['keyLoc'] + self.security_certLoc = security['certLoc'] - fh = None - if create_file: - # create file handler for logger. - fh = logging.FileHandler(log_name) - fh.setFormatter(formatter) + self.collection_topic_consume = collection_topic_consume + self.granule_topic_consume = granule_topic_consume - # create console handler for logger. - ch = logging.StreamHandler() - ch.setFormatter(formatter) + if self.metadata_type not in ['COLLECTION', 'GRANULE']: + raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - # add handlers to logger. - if create_file: - log.addHandler(fh) + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) + self.logger.info("Initializing " + self.__class__.__name__) - log.addHandler(ch) - return log + if wildargs: + self.logger.error("There were extra constructor arguments: " + str(wildargs)) def register_client(self): """ @@ -125,10 +111,10 @@ def register_client(self): """ reg_conf = {'url': self.schema_registry} - if self.security: - reg_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - reg_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - reg_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + if self.security_enabled: + reg_conf['ssl.ca.location'] = self.security_caLoc + reg_conf['ssl.key.location'] = self.security_keyLoc + reg_conf['ssl.certificate.location'] = self.security_certLoc registry_client = SchemaRegistryClient(reg_conf) return registry_client @@ -166,11 +152,11 @@ def create_consumer(self, registry_client): consumer_conf = {'bootstrap.servers': self.brokers} - if self.security: + if self.security_enabled: consumer_conf['security.protocol'] = 'SSL' - consumer_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - consumer_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - consumer_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + consumer_conf['ssl.ca.location'] = self.security_caLoc + consumer_conf['ssl.key.location'] = self.security_keyLoc + consumer_conf['ssl.certificate.location'] = self.security_certLoc meta_consumer_conf = consumer_conf meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8') diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/tests/KafkaConsumerTest.py new file mode 100644 index 0000000..e7c3f08 --- /dev/null +++ b/onestop-python-client/tests/KafkaConsumerTest.py @@ -0,0 +1,264 @@ +import unittest + +from unittest.mock import ANY, patch, MagicMock, call +from onestop.KafkaConsumer import KafkaConsumer +from confluent_kafka.schema_registry import SchemaRegistryClient +from confluent_kafka.serialization import StringDeserializer + +class KafkaConsumerTest(unittest.TestCase): + kp = None + conf_w_security = None + conf_wo_security = None + + @classmethod + def setUp(cls): + print("Set it up!") + cls.conf_w_security = { + "metadata_type" : "GRANULE", + "brokers" : "onestop-dev-cp-kafka:9092", + "group_id" : "sme-test", + "auto_offset_reset" : "earliest", + "schema_registry" : "http://onestop-dev-cp-schema-registry:8081", + "security" : { + "enabled" : True, + "caLoc" : "/etc/pki/tls/cert.pem", + "keyLoc" : "/etc/pki/tls/private/kafka-user.key", + "certLoc" : "/etc/pki/tls/certs/kafka-user.crt" + }, + "collection_topic_consume" : "psi-collection-input-unknown", + "granule_topic_consume" : "psi-granule-input-unknown", + "log_level" : "DEBUG" + } + cls.conf_wo_security = dict(cls.conf_w_security) + # Remove security credential section. + cls.conf_wo_security['security'] = { + "enabled":False + } + + @classmethod + def tearDown(self): + print("Tear it down!") + + def test_init_happy_nonconditional_params(self): + consumer = KafkaConsumer(**self.conf_w_security) + + self.assertEqual(consumer.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(consumer.brokers, self.conf_w_security['brokers']) + self.assertEqual(consumer.group_id, self.conf_w_security['group_id']) + self.assertEqual(consumer.auto_offset_reset, self.conf_w_security['auto_offset_reset']) + self.assertEqual(consumer.schema_registry, self.conf_w_security['schema_registry']) + self.assertEqual(consumer.security_enabled, self.conf_w_security['security']['enabled']) + self.assertEqual(consumer.collection_topic_consume, self.conf_w_security['collection_topic_consume']) + self.assertEqual(consumer.granule_topic_consume, self.conf_w_security['granule_topic_consume']) + + def test_init_security_enabled(self): + consumer = KafkaConsumer(**self.conf_w_security) + + self.assertEqual(consumer.security_caLoc, self.conf_w_security['security']['caLoc']) + self.assertEqual(consumer.security_keyLoc, self.conf_w_security['security']['keyLoc']) + self.assertEqual(consumer.security_certLoc, self.conf_w_security['security']['certLoc']) + + def test_init_security_disabled(self): + consumer = KafkaConsumer(**self.conf_wo_security) + + self.assertRaises(AttributeError, getattr, consumer, "security_caLoc") + self.assertRaises(AttributeError, getattr, consumer, "security_keyLoc") + self.assertRaises(AttributeError, getattr, consumer, "security_certLoc") + + def test_init_metadata_type_valid(self): + consumer = KafkaConsumer(**self.conf_w_security) + + self.assertEqual(consumer.metadata_type, self.conf_w_security['metadata_type']) + + def test_init_metadata_type_invalid(self): + wrong_metadata_type_config = dict(self.conf_w_security) + wrong_metadata_type_config['metadata_type'] = "invalid_type" + + self.assertRaises(ValueError, KafkaConsumer, **wrong_metadata_type_config) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_w_security(self, mock_client): + schema_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] + } + mock_client.return_value = None + + consumer = KafkaConsumer(**self.conf_w_security) + consumer.register_client() + + mock_client.assert_called() + mock_client.assert_called_with(ANY, schema_conf) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_wo_security(self, mock_client): + schema_conf = { + 'url' : self.conf_wo_security['schema_registry'] + } + mock_client.return_value = None + + consumer = KafkaConsumer(**self.conf_wo_security) + consumer.register_client() + + mock_client.assert_called_with(ANY, schema_conf) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_w_security_collection = dict(self.conf_w_security) + conf_w_security_collection['metadata_type'] = "COLLECTION" + + consumer = KafkaConsumer(**conf_w_security_collection) + reg_client = consumer.register_client() + reg_client.get_latest_version = MagicMock() + consumer.create_consumer(reg_client) + + # Verify AvroDeserializer called with expected registry client + mock_avro_deserializer.assert_called_with(ANY, reg_client) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_collection_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_w_security_collection = dict(self.conf_w_security) + topic = conf_w_security_collection['collection_topic_consume'] + conf_w_security_collection['metadata_type'] = 'COLLECTION' + + consumer = KafkaConsumer(**conf_w_security_collection) + reg_client = MagicMock() + consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into DeserializingConsumer + mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_w_security_collection['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], + 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_collection['group_id'], + 'auto.offset.reset': conf_w_security_collection['auto_offset_reset'] + }) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_collection_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_wo_security_collection = dict(self.conf_wo_security) + topic = conf_wo_security_collection['collection_topic_consume'] + conf_wo_security_collection['metadata_type'] = 'COLLECTION' + + consumer = KafkaConsumer(**conf_wo_security_collection) + reg_client = MagicMock() + consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify no security passed into DeserializingConsumer + mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_wo_security_collection['brokers'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_wo_security_collection['group_id'], + 'auto.offset.reset': conf_wo_security_collection['auto_offset_reset'] + }) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_w_security_granule = dict(self.conf_w_security) + topic = conf_w_security_granule['granule_topic_consume'] + conf_w_security_granule['metadata_type'] = 'GRANULE' + + consumer = KafkaConsumer(**conf_w_security_granule) + reg_client = MagicMock() + consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into DeserializingConsumer + mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_w_security_granule['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], + 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_granule['group_id'], + 'auto.offset.reset': conf_w_security_granule['auto_offset_reset'] + }) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_wo_security_granule = dict(self.conf_wo_security) + topic = conf_wo_security_granule['granule_topic_consume'] + conf_wo_security_granule['metadata_type'] = 'GRANULE' + + # Verify security taken into consideration + meta_consumer_conf = {'bootstrap.servers': conf_wo_security_granule['brokers'], + 'key.deserializer': StringDeserializer('utf-8'), + 'value.deserializer': mock_avro_deserializer, + 'group.id': conf_wo_security_granule['group_id'], + 'auto.offset.reset': conf_wo_security_granule['auto_offset_reset'] + } + + consumer = KafkaConsumer(**conf_wo_security_granule) + reg_client = MagicMock() + consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify no security passed into DeserializingConsumer called with expected configuration + meta_consumer_conf['key.deserializer'] = ANY + meta_consumer_conf['value.deserializer'] = ANY + mock_deserializing_consumer.assert_called_with(meta_consumer_conf) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + def test_connect(self): + mock_client = MagicMock() + + consumer = KafkaConsumer(**self.conf_w_security) + consumer.register_client = MagicMock(return_value=mock_client) + consumer.create_consumer = MagicMock(return_value=MagicMock(mock_client)) + consumer.connect() + + consumer.register_client.assert_called_once() + consumer.create_consumer.assert_called_with(mock_client) + + @patch('confluent_kafka.cimpl.Message') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_consume(self, mock_metadata_consumer, mock_message): + mock_message_key = 'key1' + mock_message_value = 'value1' + consumer = KafkaConsumer(**self.conf_w_security) + consumer.register_client = MagicMock(return_value=MagicMock()) + mock_message.key.return_value = mock_message_key + mock_message.value.return_value = mock_message_value + mock_metadata_consumer.poll.side_effect = [None, mock_message, Exception] + mock_handler = MagicMock() + + # Would have liked not having the try/catch but it wasn't ignoring the exception. Just need to not fail due to end of loop. + try: + self.assertRaises(Exception, consumer.consume(mock_metadata_consumer, mock_handler)) + except Exception as e: + print("Ignoring exception: {}".format(e)) + + # Verify kafka consumer poll called expected number of times + self.assertTrue(mock_metadata_consumer.poll.call_count == 3) + mock_metadata_consumer.poll.assert_has_calls([call(10), call(10), call(10)]) + + # Verify callback function was called once with expected message attributes + mock_handler.assert_called_once() + mock_handler.assert_called_with(mock_message_key, mock_message_value) + + if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/scripts/sme/smeFunc.py b/scripts/sme/smeFunc.py index 2e11d51..084e15b 100644 --- a/scripts/sme/smeFunc.py +++ b/scripts/sme/smeFunc.py @@ -27,7 +27,7 @@ def handler(key,value): if __name__ == '__main__': kafka_consumer = KafkaConsumer("scripts/config/kafka-publisher-config-dev.yml") - kafka_consumer.granule_topic = 'psi-granule-parsed' + kafka_consumer.granule_topic_consume = 'psi-granule-parsed' metadata_consumer = kafka_consumer.connect() kafka_consumer.consume(metadata_consumer, lambda k, v: handler(k, v)) """ From b63bfe868d48e3e574e3a5f0b75b1d6dff5897ef Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 10:11:07 -0600 Subject: [PATCH 07/49] 1500-Adjusted KafkaConsumer create_consumer to not do duplicate code, using a topic variable instead. Removed changing name of the consumer_conf to meta_consumer_conf. Added additional logging. --- .../onestop/KafkaConsumer.py | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index a3d1e95..54744cb 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -108,7 +108,7 @@ def register_client(self): Registers to schema registry client based on configs :return: SchemaRegistryClient (confluent kafka library) - """ + """ reg_conf = {'url': self.schema_registry} if self.security_enabled: @@ -116,6 +116,7 @@ def register_client(self): reg_conf['ssl.key.location'] = self.security_keyLoc reg_conf['ssl.certificate.location'] = self.security_certLoc + self.logger.info("Creating SchemaRegistryClient with configuration:"+str(reg_conf)) registry_client = SchemaRegistryClient(reg_conf) return registry_client @@ -138,18 +139,21 @@ def create_consumer(self, registry_client): :return: DeserializingConsumer object """ - metadata_schema = None topic = None if self.metadata_type == "COLLECTION": - metadata_schema = registry_client.get_latest_version(self.collection_topic + '-value').schema.schema_str - topic = self.collection_topic + topic = self.collection_topic_consume if self.metadata_type == "GRANULE": - metadata_schema = registry_client.get_latest_version(self.granule_topic + '-value').schema.schema_str - topic = self.granule_topic + topic = self.granule_topic_consume - metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) + self.logger.debug("topic: "+str(topic)) + + # This topic naming scheme is how OneStop creates the topics. + latest_schema = registry_client.get_latest_version(topic + '-value') + metadata_schema = latest_schema.schema.schema_str + self.logger.debug("metadata_schema: "+metadata_schema) + metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) consumer_conf = {'bootstrap.servers': self.brokers} if self.security_enabled: @@ -158,13 +162,14 @@ def create_consumer(self, registry_client): consumer_conf['ssl.key.location'] = self.security_keyLoc consumer_conf['ssl.certificate.location'] = self.security_certLoc - meta_consumer_conf = consumer_conf - meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8') - meta_consumer_conf['value.deserializer'] = metadata_deserializer - meta_consumer_conf['group.id'] = self.group_id - meta_consumer_conf['auto.offset.reset'] = self.auto_offset_reset + consumer_conf['key.deserializer'] = StringDeserializer('utf-8') + consumer_conf['value.deserializer'] = metadata_deserializer + consumer_conf['group.id'] = self.group_id + consumer_conf['auto.offset.reset'] = self.auto_offset_reset - metadata_consumer = DeserializingConsumer(meta_consumer_conf) + self.logger.debug("meta_consumer_conf: "+str(consumer_conf)) + metadata_consumer = DeserializingConsumer(consumer_conf) + self.logger.debug("topic: "+str(topic)) metadata_consumer.subscribe([topic]) return metadata_consumer @@ -183,15 +188,16 @@ def consume(self, metadata_consumer, handler): while True: try: msg = metadata_consumer.poll(10) + self.logger.debug("Message received: "+str(msg)) if msg is None: - print('No Messages') + self.logger.info('No Messages') continue + self.logger.debug("Message key="+str(msg.key())+" value="+str(msg.value())) key = msg.key() value = msg.value() - except KafkaError: raise try: @@ -199,4 +205,5 @@ def consume(self, metadata_consumer, handler): except Exception as e: self.logger.error("Message handler failed: {}".format(e)) break + self.logger.debug("Closing metadata_consumer") metadata_consumer.close() From 74e7f6f5c03c5943d6b1cdd98dc04bdcc68d9fc9 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 10:19:42 -0600 Subject: [PATCH 08/49] 1500-Added __init__.py to tests directory so was discoverable/module. --- onestop-python-client/tests/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 onestop-python-client/tests/__init__.py diff --git a/onestop-python-client/tests/__init__.py b/onestop-python-client/tests/__init__.py new file mode 100644 index 0000000..e69de29 From a07e642ee13f34b6baa5d3679292a5403cdfed8c Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 12:47:34 -0600 Subject: [PATCH 09/49] 1500-in KafkaConsumer renamed variables so tad more generic. Makes it clearer how similar code is to KafkaPublisher class. --- .../onestop/KafkaConsumer.py | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 54744cb..18c489e 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -91,8 +91,8 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r self.security_keyLoc = security['keyLoc'] self.security_certLoc = security['certLoc'] - self.collection_topic_consume = collection_topic_consume - self.granule_topic_consume = granule_topic_consume + self.collection_topic = collection_topic_consume + self.granule_topic = granule_topic_consume if self.metadata_type not in ['COLLECTION', 'GRANULE']: raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") @@ -109,15 +109,15 @@ def register_client(self): :return: SchemaRegistryClient (confluent kafka library) """ - reg_conf = {'url': self.schema_registry} + conf = {'url': self.schema_registry} if self.security_enabled: - reg_conf['ssl.ca.location'] = self.security_caLoc - reg_conf['ssl.key.location'] = self.security_keyLoc - reg_conf['ssl.certificate.location'] = self.security_certLoc + conf['ssl.ca.location'] = self.security_caLoc + conf['ssl.key.location'] = self.security_keyLoc + conf['ssl.certificate.location'] = self.security_certLoc - self.logger.info("Creating SchemaRegistryClient with configuration:"+str(reg_conf)) - registry_client = SchemaRegistryClient(reg_conf) + self.logger.info("Creating SchemaRegistryClient with configuration:"+str(conf)) + registry_client = SchemaRegistryClient(conf) return registry_client def connect(self): @@ -141,10 +141,10 @@ def create_consumer(self, registry_client): """ topic = None if self.metadata_type == "COLLECTION": - topic = self.collection_topic_consume + topic = self.collection_topic if self.metadata_type == "GRANULE": - topic = self.granule_topic_consume + topic = self.granule_topic self.logger.debug("topic: "+str(topic)) @@ -154,21 +154,21 @@ def create_consumer(self, registry_client): metadata_schema = latest_schema.schema.schema_str self.logger.debug("metadata_schema: "+metadata_schema) metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) - consumer_conf = {'bootstrap.servers': self.brokers} + conf = {'bootstrap.servers': self.brokers} if self.security_enabled: - consumer_conf['security.protocol'] = 'SSL' - consumer_conf['ssl.ca.location'] = self.security_caLoc - consumer_conf['ssl.key.location'] = self.security_keyLoc - consumer_conf['ssl.certificate.location'] = self.security_certLoc - - consumer_conf['key.deserializer'] = StringDeserializer('utf-8') - consumer_conf['value.deserializer'] = metadata_deserializer - consumer_conf['group.id'] = self.group_id - consumer_conf['auto.offset.reset'] = self.auto_offset_reset - - self.logger.debug("meta_consumer_conf: "+str(consumer_conf)) - metadata_consumer = DeserializingConsumer(consumer_conf) + conf['security.protocol'] = 'SSL' + conf['ssl.ca.location'] = self.security_caLoc + conf['ssl.key.location'] = self.security_keyLoc + conf['ssl.certificate.location'] = self.security_certLoc + + conf['key.deserializer'] = StringDeserializer('utf-8') + conf['value.deserializer'] = metadata_deserializer + conf['group.id'] = self.group_id + conf['auto.offset.reset'] = self.auto_offset_reset + + self.logger.debug("conf: "+str(conf)) + metadata_consumer = DeserializingConsumer(conf) self.logger.debug("topic: "+str(topic)) metadata_consumer.subscribe([topic]) return metadata_consumer From da934c40324310b255dfee857ef356b21cfd7fe8 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 12:59:07 -0600 Subject: [PATCH 10/49] 1500-Fixed KafkaConsumerTest (thought intellij refactor of var name would notice this usage). --- onestop-python-client/tests/KafkaConsumerTest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/tests/KafkaConsumerTest.py index e7c3f08..776e21a 100644 --- a/onestop-python-client/tests/KafkaConsumerTest.py +++ b/onestop-python-client/tests/KafkaConsumerTest.py @@ -48,8 +48,8 @@ def test_init_happy_nonconditional_params(self): self.assertEqual(consumer.auto_offset_reset, self.conf_w_security['auto_offset_reset']) self.assertEqual(consumer.schema_registry, self.conf_w_security['schema_registry']) self.assertEqual(consumer.security_enabled, self.conf_w_security['security']['enabled']) - self.assertEqual(consumer.collection_topic_consume, self.conf_w_security['collection_topic_consume']) - self.assertEqual(consumer.granule_topic_consume, self.conf_w_security['granule_topic_consume']) + self.assertEqual(consumer.collection_topic, self.conf_w_security['collection_topic_consume']) + self.assertEqual(consumer.granule_topic, self.conf_w_security['granule_topic_consume']) def test_init_security_enabled(self): consumer = KafkaConsumer(**self.conf_w_security) From 92e12c9dc1253ecd0acb3434936c0d25f2276b72 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 12:59:42 -0600 Subject: [PATCH 11/49] 1500-KafkaConsumer consolidated config for deserializer. --- onestop-python-client/onestop/KafkaConsumer.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 18c489e..76078cc 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -154,7 +154,13 @@ def create_consumer(self, registry_client): metadata_schema = latest_schema.schema.schema_str self.logger.debug("metadata_schema: "+metadata_schema) metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) - conf = {'bootstrap.servers': self.brokers} + conf = { + 'bootstrap.servers': self.brokers, + 'key.deserializer': StringDeserializer('utf-8'), + 'value.deserializer': metadata_deserializer, + 'group.id': self.group_id, + 'auto.offset.reset': self.auto_offset_reset + } if self.security_enabled: conf['security.protocol'] = 'SSL' @@ -162,11 +168,6 @@ def create_consumer(self, registry_client): conf['ssl.key.location'] = self.security_keyLoc conf['ssl.certificate.location'] = self.security_certLoc - conf['key.deserializer'] = StringDeserializer('utf-8') - conf['value.deserializer'] = metadata_deserializer - conf['group.id'] = self.group_id - conf['auto.offset.reset'] = self.auto_offset_reset - self.logger.debug("conf: "+str(conf)) metadata_consumer = DeserializingConsumer(conf) self.logger.debug("topic: "+str(topic)) From ba1740838c3ef9c6c488374228b37eb45155bf30 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 17:50:02 -0600 Subject: [PATCH 12/49] 1500-Changed KafkaPublisher class constructors(adjusted documentation) to take dictionary with extra parameters allowed as well as methods within this class not to reference config but the variable that was set. Removed logging import and changed to ClientLogger. --- .../onestop/KafkaPublisher.py | 182 ++++++++---------- 1 file changed, 82 insertions(+), 100 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index d357de8..125174b 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -1,13 +1,11 @@ -import logging -from uuid import UUID import json -import yaml +from uuid import UUID from confluent_kafka.schema_registry import SchemaRegistryClient from confluent_kafka.error import KafkaError from confluent_kafka import SerializingProducer from confluent_kafka.schema_registry.avro import AvroSerializer - +from onestop.util.ClientLogger import ClientLogger class KafkaPublisher: """ @@ -15,114 +13,98 @@ class KafkaPublisher: Attributes ---------- - conf: yaml file - config/kafka-publisher-config-dev.yml - logger: Logger object - utilizes python logger library and creates logging for our specific needs - logger.info: Logger object - logging statement that occurs when the class is instantiated - metadata_type: str - type of metadata (COLLECTION or GRANULE) - brokers: str - brokers (kubernetes service) - schema_registry: str - schema registry (kubernetes service) - security: boolean - defines if security is in place - collection_topic: str - collection topic you want to consume - granule_topic: str - granule topic you want to consume + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + schema_registry: str + schema registry (kubernetes service) + security_enabled: boolean + defines if security is in place + security_caLoc: str + Kafka schema registry certification authority (CA) file location. + security_keyLoc: str + Kafka schema registry client's private key file location. + security_certLoc: str + Kafka schema registry client's public key file location. + collection_topic: str + collection topic you want to produce to + granule_topic: str + granule topic you want to produce to + logger: Logger object + utilizes python logger library and creates logging for our specific needs Methods ------- - get_logger(log_name, create_file) - creates logger file + register_client() + registers to schema registry client based on configs - register_client() - registers to schema registry client based on configs + create_producer(registry_client) + creates a SerializingProducer object to produce to kafka topic - create_producer(registry_client) - creates a SerializingProducer object to produce to kafka topic + connect() + utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics - connect() - utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics + publish_collection(collection_producer, collection_uuid, content_dict, method) + Publish collection to collection topic - publish_collection(collection_producer, collection_uuid, content_dict, method) - Publish collection to collection topic - - publish_granule(granule_producer, record_uuid, collection_uuid, content_dict) - Publish granule to granule topic + publish_granule(granule_producer, record_uuid, collection_uuid, content_dict) + Publish granule to granule topic """ - conf = None - - def __init__(self, conf_loc): - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = self.get_logger(self.__class__.__name__, False) - self.logger.info("Initializing " + self.__class__.__name__) - self.metadata_type = self.conf['metadata_type'] - self.brokers = self.conf['brokers'] - self.schema_registry = self.conf['schema_registry'] - self.security = self.conf['security']['enabled'] - - self.collection_topic = self.conf['collection_topic_produce'] - self.granule_topic = self.conf['granule_topic_produce'] - - if self.metadata_type not in ['COLLECTION', 'GRANULE']: - raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - - def get_logger(self, log_name, create_file): + def __init__(self, metadata_type, brokers, schema_registry, security, collection_topic_publish, granule_topic_publish, log_level='INFO', **wildargs): """ - Utilizes python logger library and creates logging - - :param log_name: str - name of log to be created - :param create_file: boolean - defines whether of not you want a logger file to be created - - :return: Logger object + Attributes + ---------- + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + group_id: str + Client group id string. All clients sharing the same group.id belong to the same group + auto_offset_reset: str + Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) + schema_registry: str + schema registry (kubernetes service) URL + security: dict + enabled boolean: Whether to use security for kafka schema registry client. + caLoc str: Kafka schema registry certification authority (CA) file location. + keyLoc str: Kafka schema registry client's private key file location. + certLoc str: Kafka schema registry client's public key file location. + + collection_topic: str + collection topic you want to produce to + granule_topic: str + granule topic you want to produce to """ + self.metadata_type = metadata_type + self.brokers = brokers + self.schema_registry = schema_registry + self.security_enabled = security['enabled'] - # create logger - log = logging.getLogger() - - # create formatter and add it to the handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + if self.security_enabled: + self.security_caLoc = security['caLoc'] + self.security_keyLoc = security['keyLoc'] + self.security_certLoc = security['certLoc'] - if self.conf['log_level'] == "DEBUG": - log.setLevel(level=logging.DEBUG) - else: - if self.conf['log_level'] == "INFO": - log.setLevel(level=logging.INFO) - else: - log.setLevel(level=logging.ERROR) - - fh = None - if create_file: - # create file handler for logger. - fh = logging.FileHandler(log_name) - fh.setFormatter(formatter) + self.collection_topic = collection_topic_publish + self.granule_topic = granule_topic_publish - # create console handler for logger. - ch = logging.StreamHandler() - ch.setFormatter(formatter) + if self.metadata_type not in ['COLLECTION', 'GRANULE']: + raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - # add handlers to logger. - if create_file: - log.addHandler(fh) + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) + self.logger.info("Initializing " + self.__class__.__name__) - log.addHandler(ch) - return log + if wildargs: + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def connect(self): """ Utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics :return: SerializingProducer Object - based on config values + based on initial constructor values """ registry_client = self.register_client() metadata_producer = self.create_producer(registry_client) @@ -137,10 +119,10 @@ def register_client(self): reg_conf = {'url': self.schema_registry} - if self.security: - reg_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - reg_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - reg_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + if self.security_enabled: + reg_conf['ssl.ca.location'] = self.security_caLoc + reg_conf['ssl.key.location'] = self.security_keyLoc + reg_conf['ssl.certificate.location'] = self.security_certLoc registry_client = SchemaRegistryClient(reg_conf) return registry_client @@ -153,7 +135,7 @@ def create_producer(self, registry_client): get this from register_client() :return: SerializingProducer Object - based on config values + based on initial constructor values """ metadata_schema = None @@ -166,11 +148,11 @@ def create_producer(self, registry_client): metadata_serializer = AvroSerializer(metadata_schema, registry_client) producer_conf = {'bootstrap.servers': self.brokers} - if self.security: + if self.security_enabled: producer_conf['security.protocol'] = 'SSL' - producer_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - producer_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - producer_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + producer_conf['ssl.ca.location'] = self.security_caLoc + producer_conf['ssl.key.location'] = self.security_keyLoc + producer_conf['ssl.certificate.location'] = self.security_certLoc meta_producer_conf = producer_conf meta_producer_conf['value.serializer'] = metadata_serializer @@ -180,7 +162,7 @@ def create_producer(self, registry_client): def delivery_report(self, err, msg): """ - Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). + Called once for each message produced to indicate delivery of message. Triggered by poll() or flush(). :param err: str err produced after publishing, if there is one From 392788aee0f8714b021d9a8fb96b89f2278a686b Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 18:06:35 -0600 Subject: [PATCH 13/49] 1500-Changed KafkaConsumerTest(s) to have vars named exp where it makes sense. Added test for extra arguments via constructor. Fixed test for testing less parameters passed in, because more could have made it through. --- .../tests/KafkaConsumerTest.py | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/tests/KafkaConsumerTest.py index 776e21a..d1e6195 100644 --- a/onestop-python-client/tests/KafkaConsumerTest.py +++ b/onestop-python-client/tests/KafkaConsumerTest.py @@ -76,9 +76,14 @@ def test_init_metadata_type_invalid(self): self.assertRaises(ValueError, KafkaConsumer, **wrong_metadata_type_config) + def test_init_extra_params(self): + conf = dict(self.conf_wo_security) + conf['junk_key'] = 'junk_value' + KafkaConsumer(**conf) + @patch.object(SchemaRegistryClient, '__init__', autospec=True) def test_register_client_w_security(self, mock_client): - schema_conf = { + exp_security_conf = { 'url':self.conf_w_security['schema_registry'], 'ssl.ca.location': self.conf_w_security['security']['caLoc'], 'ssl.key.location': self.conf_w_security['security']['keyLoc'], @@ -90,19 +95,25 @@ def test_register_client_w_security(self, mock_client): consumer.register_client() mock_client.assert_called() - mock_client.assert_called_with(ANY, schema_conf) + mock_client.assert_called_with(ANY, exp_security_conf) @patch.object(SchemaRegistryClient, '__init__', autospec=True) def test_register_client_wo_security(self, mock_client): - schema_conf = { - 'url' : self.conf_wo_security['schema_registry'] + exp_security_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] } mock_client.return_value = None consumer = KafkaConsumer(**self.conf_wo_security) consumer.register_client() - - mock_client.assert_called_with(ANY, schema_conf) + try: + mock_client.assert_called_with(ANY, exp_security_conf) + except: + return + raise AssertionError('Expected register_client() to not have been called with security arguments.') @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') @@ -199,7 +210,7 @@ def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, m @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): conf_wo_security_granule = dict(self.conf_wo_security) - topic = conf_wo_security_granule['granule_topic_consume'] + exp_topic = conf_wo_security_granule['granule_topic_consume'] conf_wo_security_granule['metadata_type'] = 'GRANULE' # Verify security taken into consideration @@ -215,13 +226,14 @@ def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information - reg_client.get_latest_version.assert_called_with(topic + '-value') + reg_client.get_latest_version.assert_called_with(exp_topic + '-value') # Verify no security passed into DeserializingConsumer called with expected configuration - meta_consumer_conf['key.deserializer'] = ANY - meta_consumer_conf['value.deserializer'] = ANY - mock_deserializing_consumer.assert_called_with(meta_consumer_conf) - mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + exp_arguments = dict(meta_consumer_conf) + exp_arguments['key.deserializer'] = ANY + exp_arguments['value.deserializer'] = ANY + mock_deserializing_consumer.assert_called_with(exp_arguments) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([exp_topic]) def test_connect(self): mock_client = MagicMock() @@ -260,5 +272,5 @@ def test_consume(self, mock_metadata_consumer, mock_message): mock_handler.assert_called_once() mock_handler.assert_called_with(mock_message_key, mock_message_value) - if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 3426472c26738ae8866329fecfad58335f440bac Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 09:56:56 -0600 Subject: [PATCH 14/49] 1500-KafkaConsumerTest improved the formatting of dicts, tested return values a little better where could, and changed a test of assertTrue to assertEqual so got to see what the actual value was when failed. --- .../tests/KafkaConsumerTest.py | 101 ++++++++++-------- 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/tests/KafkaConsumerTest.py index d1e6195..1246789 100644 --- a/onestop-python-client/tests/KafkaConsumerTest.py +++ b/onestop-python-client/tests/KafkaConsumerTest.py @@ -3,7 +3,6 @@ from unittest.mock import ANY, patch, MagicMock, call from onestop.KafkaConsumer import KafkaConsumer from confluent_kafka.schema_registry import SchemaRegistryClient -from confluent_kafka.serialization import StringDeserializer class KafkaConsumerTest(unittest.TestCase): kp = None @@ -124,11 +123,13 @@ def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consume consumer = KafkaConsumer(**conf_w_security_collection) reg_client = consumer.register_client() reg_client.get_latest_version = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify AvroDeserializer called with expected registry client mock_avro_deserializer.assert_called_with(ANY, reg_client) + self.assertIsNotNone(deser_consumer) + @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_collection_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): @@ -138,24 +139,28 @@ def test_create_consumer_collection_w_security(self, mock_deserializing_consumer consumer = KafkaConsumer(**conf_w_security_collection) reg_client = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information reg_client.get_latest_version.assert_called_with(topic + '-value') # Verify security passed into DeserializingConsumer - mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_w_security_collection['brokers'], - 'security.protocol': 'SSL', - 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], - 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], - 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], - 'key.deserializer': ANY, - 'value.deserializer': ANY, - 'group.id': conf_w_security_collection['group_id'], - 'auto.offset.reset': conf_w_security_collection['auto_offset_reset'] - }) + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_collection['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], + 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_collection['group_id'], + 'auto.offset.reset': conf_w_security_collection['auto_offset_reset'] + }) mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + self.assertIsNotNone(deser_consumer) + @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_collection_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): @@ -165,20 +170,24 @@ def test_create_consumer_collection_wo_security(self, mock_deserializing_consume consumer = KafkaConsumer(**conf_wo_security_collection) reg_client = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information reg_client.get_latest_version.assert_called_with(topic + '-value') # Verify no security passed into DeserializingConsumer - mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_wo_security_collection['brokers'], - 'key.deserializer': ANY, - 'value.deserializer': ANY, - 'group.id': conf_wo_security_collection['group_id'], - 'auto.offset.reset': conf_wo_security_collection['auto_offset_reset'] - }) + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_collection['brokers'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_wo_security_collection['group_id'], + 'auto.offset.reset': conf_wo_security_collection['auto_offset_reset'] + }) mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + self.assertIsNotNone(deser_consumer) + @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): @@ -188,24 +197,28 @@ def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, m consumer = KafkaConsumer(**conf_w_security_granule) reg_client = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information reg_client.get_latest_version.assert_called_with(topic + '-value') # Verify security passed into DeserializingConsumer - mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_w_security_granule['brokers'], - 'security.protocol': 'SSL', - 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], - 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], - 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], - 'key.deserializer': ANY, - 'value.deserializer': ANY, - 'group.id': conf_w_security_granule['group_id'], - 'auto.offset.reset': conf_w_security_granule['auto_offset_reset'] - }) + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_granule['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], + 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_granule['group_id'], + 'auto.offset.reset': conf_w_security_granule['auto_offset_reset'] + }) mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + self.assertIsNotNone(deser_consumer) + @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): @@ -213,28 +226,26 @@ def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, exp_topic = conf_wo_security_granule['granule_topic_consume'] conf_wo_security_granule['metadata_type'] = 'GRANULE' - # Verify security taken into consideration - meta_consumer_conf = {'bootstrap.servers': conf_wo_security_granule['brokers'], - 'key.deserializer': StringDeserializer('utf-8'), - 'value.deserializer': mock_avro_deserializer, - 'group.id': conf_wo_security_granule['group_id'], - 'auto.offset.reset': conf_wo_security_granule['auto_offset_reset'] - } - consumer = KafkaConsumer(**conf_wo_security_granule) reg_client = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information reg_client.get_latest_version.assert_called_with(exp_topic + '-value') # Verify no security passed into DeserializingConsumer called with expected configuration - exp_arguments = dict(meta_consumer_conf) - exp_arguments['key.deserializer'] = ANY - exp_arguments['value.deserializer'] = ANY - mock_deserializing_consumer.assert_called_with(exp_arguments) + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_granule['brokers'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_wo_security_granule['group_id'], + 'auto.offset.reset': conf_wo_security_granule['auto_offset_reset'] + }) mock_deserializing_consumer.return_value.subscribe.assert_called_with([exp_topic]) + self.assertIsNotNone(deser_consumer) + def test_connect(self): mock_client = MagicMock() @@ -265,7 +276,7 @@ def test_consume(self, mock_metadata_consumer, mock_message): print("Ignoring exception: {}".format(e)) # Verify kafka consumer poll called expected number of times - self.assertTrue(mock_metadata_consumer.poll.call_count == 3) + self.assertEqual(mock_metadata_consumer.poll.call_count, 3) mock_metadata_consumer.poll.assert_has_calls([call(10), call(10), call(10)]) # Verify callback function was called once with expected message attributes From 47c9dce8de9aa898e6997aecddf2e9c93687ffd8 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 10:32:47 -0600 Subject: [PATCH 15/49] 1500-Changed/added to KafkaPublisherTest(s). --- .../tests/KafkaPublisherTest.py | 326 +++++++++++++++++- 1 file changed, 318 insertions(+), 8 deletions(-) diff --git a/onestop-python-client/tests/KafkaPublisherTest.py b/onestop-python-client/tests/KafkaPublisherTest.py index 7d992ae..643d4f5 100644 --- a/onestop-python-client/tests/KafkaPublisherTest.py +++ b/onestop-python-client/tests/KafkaPublisherTest.py @@ -1,25 +1,335 @@ import unittest - import json from onestop.KafkaPublisher import KafkaPublisher +from unittest.mock import ANY, patch, MagicMock +from confluent_kafka.schema_registry import SchemaRegistryClient class KafkaPublisherTest(unittest.TestCase): kp = None + conf_w_security = None + conf_wo_security = None - def setUp(self): + @classmethod + def setUp(cls): print("Set it up!") - self.kp = KafkaPublisher("../config/kafka-publisher-config-dev.yml") + cls.conf_w_security = { + "metadata_type" : "GRANULE", + "brokers" : "onestop-dev-cp-kafka:9092", + "schema_registry" : "http://onestop-dev-cp-schema-registry:8081", + "security" : { + "enabled" : True, + "caLoc" : "/etc/pki/tls/cert.pem", + "keyLoc" : "/etc/pki/tls/private/kafka-user.key", + "certLoc" : "/etc/pki/tls/certs/kafka-user.crt" + }, + "collection_topic_publish" : "psi-collection-input-unknown", + "granule_topic_publish" : "psi-granule-input-unknown", + "log_level" : "DEBUG" + } + cls.conf_wo_security = dict(cls.conf_w_security) + # Remove security credential section. + cls.conf_wo_security['security'] = { + "enabled":False + } + @classmethod def tearDown(self): print("Tear it down!") - def test_parse_config(self): - self.assertFalse(self.kp.conf['brokers']==None) + def test_init_happy_nonconditional_params(self): + publisher = KafkaPublisher(**self.conf_w_security) + + self.assertEqual(publisher.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(publisher.brokers, self.conf_w_security['brokers']) + self.assertEqual(publisher.schema_registry, self.conf_w_security['schema_registry']) + self.assertEqual(publisher.security_enabled, self.conf_w_security['security']['enabled']) + self.assertEqual(publisher.collection_topic, self.conf_w_security['collection_topic_publish']) + self.assertEqual(publisher.granule_topic, self.conf_w_security['granule_topic_publish']) + + def test_init_security_enabled(self): + publisher = KafkaPublisher(**self.conf_w_security) + + self.assertEqual(publisher.security_caLoc, self.conf_w_security['security']['caLoc']) + self.assertEqual(publisher.security_keyLoc, self.conf_w_security['security']['keyLoc']) + self.assertEqual(publisher.security_certLoc, self.conf_w_security['security']['certLoc']) + + def test_init_security_disabled(self): + publisher = KafkaPublisher(**self.conf_wo_security) + + self.assertRaises(AttributeError, getattr, publisher, "security_caLoc") + self.assertRaises(AttributeError, getattr, publisher, "security_keyLoc") + self.assertRaises(AttributeError, getattr, publisher, "security_certLoc") + + def test_init_metadata_type_valid(self): + publisher = KafkaPublisher(**self.conf_w_security) + + self.assertEqual(publisher.metadata_type, self.conf_w_security['metadata_type']) + + def test_init_metadata_type_invalid(self): + wrong_metadata_type_config = dict(self.conf_w_security) + wrong_metadata_type_config['metadata_type'] = "invalid_type" + + self.assertRaises(ValueError, KafkaPublisher, **wrong_metadata_type_config) + + def test_init_extra_params(self): + conf = dict(self.conf_wo_security) + conf['junk_key'] = 'junk_value' + KafkaPublisher(**conf) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_w_security(self, mock_client): + exp_security_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] + } + mock_client.return_value = None + + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client() + + mock_client.assert_called() + mock_client.assert_called_with(ANY, exp_security_conf) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_wo_security(self, mock_client): + exp_security_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] + } + mock_client.return_value = None + + publisher = KafkaPublisher(**self.conf_wo_security) + publisher.register_client() + try: + mock_client.assert_called_with(ANY, exp_security_conf) + except: + return + raise AssertionError('Expected register_client() to not have been called with security arguments.') + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_calls_AvroSerializer(self, mock_serializing_publisher, mock_avro_serializer): + conf_w_security_collection = dict(self.conf_w_security) + conf_w_security_collection['metadata_type'] = "COLLECTION" + + publisher = KafkaPublisher(**conf_w_security_collection) + reg_client = publisher.register_client() + reg_client.get_latest_version = MagicMock() + publisher.create_producer(reg_client) + + # Verify AvroSerializer called with expected registry client + mock_avro_serializer.assert_called_with(ANY, reg_client) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_collection_w_security(self, mock_serializing_producer, mock_avro_serializer): + conf_w_security_collection = dict(self.conf_w_security) + topic = conf_w_security_collection['collection_topic_publish'] + conf_w_security_collection['metadata_type'] = 'COLLECTION' + + publisher = KafkaPublisher(**conf_w_security_collection) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into SerializingProducer + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_collection['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], + 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_collection_wo_security(self, mock_serializing_producer, mock_avro_serializer): + conf_wo_security_collection = dict(self.conf_wo_security) + topic = conf_wo_security_collection['collection_topic_publish'] + conf_wo_security_collection['metadata_type'] = 'COLLECTION' + + publisher = KafkaPublisher(**conf_wo_security_collection) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify no security passed into SerializingProducer + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_collection['brokers'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_granule_w_security(self, mock_serializing_producer, mock_avro_serializer): + conf_w_security_granule = dict(self.conf_w_security) + topic = conf_w_security_granule['granule_topic_publish'] + conf_w_security_granule['metadata_type'] = 'GRANULE' + + publisher = KafkaPublisher(**conf_w_security_granule) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into SerializingProducer + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_granule['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], + 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_granule_wo_security(self, mock_serializing_producer, mock_avro_serializer): + conf_wo_security_granule = dict(self.conf_wo_security) + exp_topic = conf_wo_security_granule['granule_topic_publish'] + conf_wo_security_granule['metadata_type'] = 'GRANULE' + + publisher = KafkaPublisher(**conf_wo_security_granule) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(exp_topic + '-value') + + # Verify no security passed into SerializingProducer called with expected configuration + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_granule['brokers'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + def test_connect(self): + mock_client = MagicMock() + + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client = MagicMock(return_value=mock_client) + publisher.create_producer = MagicMock(return_value=MagicMock(mock_client)) + publisher.connect() + + publisher.register_client.assert_called_once() + publisher.create_producer.assert_called_with(mock_client) + + def test_get_collection_key_from_uuid(self): + expKey = '12345678-1234-5678-1234-567812345678' + for uuid in [ + '{12345678-1234-5678-1234-567812345678}', + '12345678123456781234567812345678', + 'urn:uuid:12345678-1234-5678-1234-567812345678', + b'\x12\x34\x56\x78'*4, +# b'\x78\x56\x34\x12\x34\x12\x78\x56' + b'\x12\x34\x56\x78\x12\x34\x56\x78', +# {0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678}, +# 0x12345678123456781234567812345678, + ]: + with self.subTest(uuid=uuid): + print ("Testing uuid "+str(uuid)) + key = KafkaPublisher.get_collection_key_from_uuid(uuid) + print("Acquired uuid="+str(key)) + self.assertEqual(key, expKey) + + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_publish_collection(self, mock_collection_producer): + uuid = '{12345678-1234-5678-1234-567812345678}' + content_dict = { + 'title': 'this is a test', + 'location': 'somewhere in space' + } + method = 'PUT' + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client = MagicMock(return_value=MagicMock()) + mock_collection_producer.produce = MagicMock() + mock_collection_producer.poll.side_effect = [1] + + publisher.publish_collection(mock_collection_producer, uuid, content_dict, method) + + # Verify kafka produce called once + mock_collection_producer.produce.assert_called_with( + topic=self.conf_w_security['collection_topic_publish'], + value={ + 'type': 'collection', + 'content': json.dumps(content_dict), + 'contentType': 'application/json', + 'method': method, + 'source': 'unknown', + }, + key=publisher.get_collection_key_from_uuid(uuid), + on_delivery=publisher.delivery_report + ) + + # Verify kafka produce poll called once + mock_collection_producer.poll.assert_called_once() + + + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_publish_granule(self, mock_collection_producer): + uuid = '{12345678-1234-5678-1234-567812345678}' + content_dict = { + 'title': 'this is a test', + 'location': 'somewhere in space', + 'relationships': [{"type": "COLLECTION", + "id": '{12345678-1234-5678-1234-567812345678}'}], + 'errors': [], + 'analysis': 'No analysis', + 'fileLocations': 'archived', + 'fileInformation': 'no information', + 'discovery': 'AWS' + } + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client = MagicMock(return_value=MagicMock()) + mock_collection_producer.produce = MagicMock() + mock_collection_producer.poll.side_effect = [1] + + publisher.publish_granule(mock_collection_producer, uuid, content_dict) + + # Verify kafka produce called once + mock_collection_producer.produce.assert_called_with( + topic=self.conf_w_security['granule_topic_publish'], + value={ + 'type': 'granule', + 'content': json.dumps(content_dict), + #'contentType': 'application/json', + 'method': 'PUT', + 'source': 'unknown', + 'operation': None, + 'relationships': content_dict['relationships'], + 'errors': content_dict['errors'], + 'analysis': content_dict['analysis'], + 'fileLocations': {'fileLocation': content_dict['fileLocations']}, + 'fileInformation': content_dict['fileInformation'], + 'discovery': content_dict['discovery'] + }, + key=publisher.get_collection_key_from_uuid(uuid), + on_delivery=publisher.delivery_report + ) - def test_publish_collection(self): - print("Publish collection") - # Integration test TBD + # Verify kafka produce poll called once + mock_collection_producer.poll.assert_called_once() if __name__ == '__main__': unittest.main() \ No newline at end of file From 921490ac1b3d0cc113e1c8a62ef8ab3f00cd1aa8 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 10:34:45 -0600 Subject: [PATCH 16/49] 1500-In KafkaPublisher cleaned up documentation, added method to consolidate generating the key from UUID, and added a little logging. Consolidated topic generation code too. --- .../onestop/KafkaPublisher.py | 77 +++++++++++-------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index 125174b..047783c 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -48,7 +48,7 @@ class KafkaPublisher: publish_collection(collection_producer, collection_uuid, content_dict, method) Publish collection to collection topic - publish_granule(granule_producer, record_uuid, collection_uuid, content_dict) + publish_granule(granule_producer, collection_uuid, content_dict) Publish granule to granule topic """ @@ -137,27 +137,27 @@ def create_producer(self, registry_client): :return: SerializingProducer Object based on initial constructor values """ - metadata_schema = None + topic = None if self.metadata_type == "COLLECTION": - metadata_schema = registry_client.get_latest_version(self.collection_topic + '-value').schema.schema_str + topic = self.collection_topic if self.metadata_type == "GRANULE": - metadata_schema = registry_client.get_latest_version(self.granule_topic + '-value').schema.schema_str + topic = self.granule_topic + metadata_schema = registry_client.get_latest_version(topic + '-value').schema.schema_str metadata_serializer = AvroSerializer(metadata_schema, registry_client) - producer_conf = {'bootstrap.servers': self.brokers} + conf = {'bootstrap.servers': self.brokers} if self.security_enabled: - producer_conf['security.protocol'] = 'SSL' - producer_conf['ssl.ca.location'] = self.security_caLoc - producer_conf['ssl.key.location'] = self.security_keyLoc - producer_conf['ssl.certificate.location'] = self.security_certLoc + conf['security.protocol'] = 'SSL' + conf['ssl.ca.location'] = self.security_caLoc + conf['ssl.key.location'] = self.security_keyLoc + conf['ssl.certificate.location'] = self.security_certLoc - meta_producer_conf = producer_conf - meta_producer_conf['value.serializer'] = metadata_serializer + conf['value.serializer'] = metadata_serializer - metadata_producer = SerializingProducer(meta_producer_conf) + metadata_producer = SerializingProducer(conf) return metadata_producer def delivery_report(self, err, msg): @@ -174,14 +174,27 @@ def delivery_report(self, err, msg): else: self.logger.error('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) + @staticmethod + def get_collection_key_from_uuid(collection_uuid): + """ + Create a key to use in a kafka message from the given string representation of the collection UUID. + :param collection_uuid: str + collection string to turn into a key. + :return: + """ + if type(collection_uuid) == bytes: + return str(UUID(bytes=collection_uuid)) + else: + return str(UUID(hex=collection_uuid)) + def publish_collection(self, collection_producer, collection_uuid, content_dict, method): """ - Publish collection to collection topic + Publish a collection to the collection topic :param collection_producer: SerializingProducer use connect() :param collection_uuid: str - collection uuid that you want colelction to have + collection uuid that you want the collection to have :param content_dict: dict dictionary containing information you want to publish :param method: str @@ -190,11 +203,9 @@ def publish_collection(self, collection_producer, collection_uuid, content_dict, :return: str returns msg if publish is successful, kafka error if it wasn't successful """ - self.logger.info('Publish collection') - if type(collection_uuid) == bytes: - key = str(UUID(bytes=collection_uuid)) - else: - key = str(UUID(hex=collection_uuid)) + self.logger.info('Publishing collection') + + key = self.get_collection_key_from_uuid(collection_uuid) value_dict = { 'type': 'collection', @@ -204,20 +215,22 @@ def publish_collection(self, collection_producer, collection_uuid, content_dict, 'source': 'unknown', } try: - collection_producer.produce(topic=self.collection_topic, value=value_dict, key=key, - on_delivery=self.delivery_report) + self.logger.debug('Publishing collection with topic='+self.collection_topic+' key='+key+' value='+str(value_dict)) + collection_producer.produce( + topic=self.collection_topic, + value=value_dict, + key=key, + on_delivery=self.delivery_report) except KafkaError: raise collection_producer.poll() - def publish_granule(self, granule_producer, record_uuid, collection_uuid, content_dict): + def publish_granule(self, granule_producer, collection_uuid, content_dict): """ - Publishes granule to granule topic + Publish a granule to the granule topic :param granule_producer: SerializingProducer use connect() - :param record_uuid: str - record uuid associated with the granule :param collection_uuid: str collection uuid associated with the granule :param content_dict: dict @@ -228,10 +241,8 @@ def publish_granule(self, granule_producer, record_uuid, collection_uuid, conten """ self.logger.info('Publish granule') - if type(record_uuid) == bytes: - key = str(UUID(bytes=collection_uuid)) - else: - key = str(UUID(hex=collection_uuid)) + key = self.get_collection_key_from_uuid(collection_uuid) + """ if type(collection_uuid) == bytes: content_dict['relationships'] = [{"type": "COLLECTION", "id": collection_uuid.hex()}] @@ -264,8 +275,12 @@ def publish_granule(self, granule_producer, record_uuid, collection_uuid, conten } try: - granule_producer.produce(topic=self.granule_topic, value=value_dict, key=key, - on_delivery=self.delivery_report) + self.logger.debug('Publishing granule with topic='+self.granule_topic+' key='+key+' value='+str(value_dict)) + granule_producer.produce( + topic=self.granule_topic, + value=value_dict, + key=key, + on_delivery=self.delivery_report) except KafkaError: raise granule_producer.poll() From a337c39f88e953d1a99fc9bfc3d15677e63864bd Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 10:59:27 -0600 Subject: [PATCH 17/49] 1500-Adjusted csb config variable name from file_identifier_prefix to file_id_prefix as it is in half the other places including S3MessageAdapter constructor. --- .../config/csb-data-stream-config-template.yml | 2 +- scripts/config/csb-data-stream-config.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/config/csb-data-stream-config-template.yml b/onestop-python-client/config/csb-data-stream-config-template.yml index 887c9be..56bad99 100644 --- a/onestop-python-client/config/csb-data-stream-config-template.yml +++ b/onestop-python-client/config/csb-data-stream-config-template.yml @@ -9,7 +9,7 @@ registry_base_url: http://localhost/onestop/api/registry onestop_base_url: http://localhost/onestop/api/search/search access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com -file_identifier_prefix: "gov.noaa.ncei.csb:" +file_id_prefix: "gov.noaa.ncei.csb:" prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' diff --git a/scripts/config/csb-data-stream-config.yml b/scripts/config/csb-data-stream-config.yml index 1556ab9..24a7cf6 100644 --- a/scripts/config/csb-data-stream-config.yml +++ b/scripts/config/csb-data-stream-config.yml @@ -6,7 +6,7 @@ collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 psi_registry_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com -file_identifier_prefix: "gov.noaa.ncei.csb:" +file_id_prefix: "gov.noaa.ncei.csb:" prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' From 5aab6d67df06f6faed7e43343b2dcfa2cf271a38 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 11:28:56 -0600 Subject: [PATCH 18/49] 1500-Fixed lack of carriage returnin S3Utils for legibility. --- onestop-python-client/onestop/util/S3Utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index 60fb876..e2f2e32 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -109,7 +109,8 @@ def connect(self, client_type, region): if client_type == "glacier": boto = boto3.client( "glacier", - region_name=region,aws_access_key_id=self.access_key, + region_name=region, + aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key) if client_type == "session": From 5975e1d24d47ccb80fa758b71b18019b1e57f7c6 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 7 May 2021 11:21:15 -0600 Subject: [PATCH 19/49] 1500-Changed SqsConsumer class constructor to take dictionary with extra parameters allowed. Refactored out of SqsConsumer the connecting part and put into S3Utils, this left only log_level as class var. Put creating a Queue object into receive_messages (can refactor out if ever need again, but single line didn't seem to warrent its own method). Added debug logging. --- .../onestop/util/SqsConsumer.py | 95 +++++++------------ 1 file changed, 33 insertions(+), 62 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index f782cc5..e7ceed4 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -1,10 +1,7 @@ -import logging -from datetime import datetime, timezone -import yaml -import boto3 import json -from onestop.util.ClientLogger import ClientLogger +from datetime import datetime, timezone +from onestop.util.ClientLogger import ClientLogger class SqsConsumer: """ @@ -12,101 +9,75 @@ class SqsConsumer: Attributes ---------- - conf: yaml file - aws-util-config-dev.yml - cred: yaml file - credentials.yml - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated + logger: ClientLogger object + utilizes python logger library and creates logging for our specific needs Methods ------- - connect() - connects a boto sqs instance based on configurations in conf and cred yml files - - receive_messages(queue, sqs_max_polls, cb) - polls for messages in the queue + receive_messages(sqs_client, sqs_queue_name, sqs_max_polls, cb) + polls for messages in the queue """ - conf = None - def __init__(self, conf_loc, cred_loc): + def __init__(self, log_level = 'INFO', **wildargs): """ - - :param conf_loc: yaml file - aws-util-config-dev.yml - :param cred_loc: yaml file - credentials.yml - - Other Attributes - ---------------- - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated - + Attributes + ---------- + log_level: str + The log level to use for this class (Defaults to 'INFO') """ - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - with open(cred_loc) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False) + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) - def connect(self): - """ - Connects a boto sqs instance based on configurations in conf and cred yml files + if wildargs: + self.logger.error("There were extra constructor arguments: " + str(wildargs)) - :return: boto sqs - returns instance of boto sqs resource - """ - boto_session = boto3.Session(aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key']) - # Get the queue. This returns an SQS.Queue instance - sqs_session = boto_session.resource('sqs', region_name=self.conf['s3_region']) - sqs_queue = sqs_session.Queue(self.conf['sqs_url']) - self.logger.info("Connecting to " + self.conf['sqs_url']) - return sqs_queue - - def receive_messages(self, queue, sqs_max_polls, cb): + def receive_messages(self, sqs_client, sqs_queue_name, sqs_max_polls, cb): """ - Polls for messages in the queue + Polls for messages from an sqs queue - :param queue: boto sqs resource - instance of boto sqs resource given from connect() + :param sqs_client: boto SQS.Client + instance of boto sqs Client + :param sqs_queue_name: str + name of the queue to connect to. :param sqs_max_polls: int number of polls :param cb: function call back function - :return: Dependent on the call back function + :return: If the Message has a Records key then the call back function gets called on the Message. """ self.logger.info("Receive messages") + self.logger.info("Polling %d time(s) for SQS messages" % sqs_max_polls) + + sqs_queue = sqs_client.Queue(sqs_queue_name) i = 1 while i <= sqs_max_polls: self.logger.info("Polling attempt: " + str(i)) i = i + 1 - sqs_messages = queue.receive_messages(MaxNumberOfMessages=10, WaitTimeSeconds=10) + sqs_messages = sqs_queue.receive_messages( + MaxNumberOfMessages=10, + WaitTimeSeconds=10 + ) self.logger.info("Received %d messages." % len(sqs_messages)) + self.logger.debug("Messages: %s" % sqs_messages) for sqs_message in sqs_messages: try: # Log start time dt_start = datetime.now(tz=timezone.utc) - self.logger.info("Started processing message") + self.logger.info("Starting processing message") + self.logger.debug("Message: %s" % sqs_message) + self.logger.debug("Message body: %s" % sqs_message.body) message_body = json.loads(sqs_message.body) + self.logger.debug("Message body message: %s" % message_body['Message']) message_content = json.loads(message_body['Message']) if 'Records' in message_content: recs = message_content['Records'] - self.logger.info("Received message") self.logger.debug('Records: ' + str(recs)) else: self.logger.info("s3 event without records content received.") From 204a2bd0c4d7101476db87322bf621e1fdd34a07 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 7 May 2021 14:41:42 -0600 Subject: [PATCH 20/49] 1500-Decided to put "connect" back into SqsConsumer. Adjusted input parameters for receive_message so a user could create their own queue or use our connect to do so and pass either in. --- .../onestop/util/SqsConsumer.py | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index e7ceed4..bd7f98f 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -31,14 +31,24 @@ def __init__(self, log_level = 'INFO', **wildargs): if wildargs: self.logger.error("There were extra constructor arguments: " + str(wildargs)) - def receive_messages(self, sqs_client, sqs_queue_name, sqs_max_polls, cb): + def connect(self, sqs_resource, sqs_queue_name): + """ + Gets a boto SQS.Queue resource. + :param sqs_resource: boto SQS.Resource + SQS resource to create the queue from. + :param sqs_queue_name: str + SQS queue name to create and return a boto SQS.Queue object to. + :return: SQS.Queue + An SQS.Queue resource to use for Queue operations. + """ + return sqs_resource.create_queue(QueueName=sqs_queue_name) + + def receive_messages(self, sqs_queue, sqs_max_polls, cb): """ Polls for messages from an sqs queue - :param sqs_client: boto SQS.Client - instance of boto sqs Client - :param sqs_queue_name: str - name of the queue to connect to. + :param sqs_queue: boto SQS.Queue object + boto SQS Queue object. Can be generated by the method in this class. :param sqs_max_polls: int number of polls :param cb: function @@ -50,8 +60,6 @@ def receive_messages(self, sqs_client, sqs_queue_name, sqs_max_polls, cb): self.logger.info("Receive messages") self.logger.info("Polling %d time(s) for SQS messages" % sqs_max_polls) - sqs_queue = sqs_client.Queue(sqs_queue_name) - i = 1 while i <= sqs_max_polls: self.logger.info("Polling attempt: " + str(i)) From 47b3e5bd187e082c45af07aab585126afdf1828e Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 7 May 2021 17:26:56 -0600 Subject: [PATCH 21/49] 1500-Fixed some bugs in SqsConsumer. --- .../onestop/util/SqsConsumer.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index bd7f98f..4f503d8 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -60,11 +60,17 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): self.logger.info("Receive messages") self.logger.info("Polling %d time(s) for SQS messages" % sqs_max_polls) - i = 1 - while i <= sqs_max_polls: + if sqs_max_polls < 1: + raise ValueError('Max polling value should be greater than 0.') + + for i in range(1, sqs_max_polls+1): self.logger.info("Polling attempt: " + str(i)) - i = i + 1 + # boto3 SQS.Queue appears to have a subset of SQS.Client methods plus a few management queue ones. + # The ones they do share seem to have different return types. + # The message method names are different and return types different: + # Client.send_message and Queue.send_message and Queue.send_messages + # Client.receive_message and Queue.receive_messages sqs_messages = sqs_queue.receive_messages( MaxNumberOfMessages=10, WaitTimeSeconds=10 @@ -86,9 +92,10 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): if 'Records' in message_content: recs = message_content['Records'] - self.logger.debug('Records: ' + str(recs)) + self.logger.debug('Message "Records": %s' % recs) + cb(recs) else: - self.logger.info("s3 event without records content received.") + self.logger.info("s3 event message without 'Records' content received.") sqs_message.delete() @@ -98,9 +105,8 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): processing_time = dt_end - dt_start self.logger.info("Completed processing message (s):" + str(processing_time.microseconds * 1000)) - cb(recs) except: self.logger.exception( "An exception was thrown while processing a message, but this program will continue. The " - "message will not be deleted from the SQS queue. The message was: %s" % sqs_message.body) + "message will not be deleted from the SQS queue. The message was: %s" % sqs_message) From b2143aefb499ac3faff7d10b6b9f74b49ff2ae72 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 7 May 2021 17:27:15 -0600 Subject: [PATCH 22/49] 1500-Due to changing SqsConsumer class constructor to take dictionary with extra parameters allowed so adjusted the tests and added more with more verification. --- .../tests/util/SqsConsumerTest.py | 175 ++++++++++++++++-- 1 file changed, 159 insertions(+), 16 deletions(-) diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/tests/util/SqsConsumerTest.py index 4d6be77..87f9005 100644 --- a/onestop-python-client/tests/util/SqsConsumerTest.py +++ b/onestop-python-client/tests/util/SqsConsumerTest.py @@ -1,34 +1,177 @@ import unittest -import boto3 +import json + from moto import mock_sqs -from tests.utils import abspath_from_relative +from unittest.mock import ANY, patch, MagicMock, call +from onestop.util.S3Utils import S3Utils from onestop.util.SqsConsumer import SqsConsumer class SqsConsumerTest(unittest.TestCase): - sc = None + config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 's3_region': 'us-east-2', + 's3_bucket': 'archive-testing-demo', + 'sqs_url': 'https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs', + 'type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'registry_base_url': 'http://localhost/onestop/api/registry', + 'registry_username': 'admin', + 'registry_password': 'whoknows', + 'onestop_base_url': 'http://localhost/onestop/api/search/search', + 'log_level': 'DEBUG' + } + + records = [{"eventVersion":"2.1"}] + message = json.dumps( + {"Type": "Notification", + "MessageId": "9d0691d2-ae9c-58f9-a9f4-c8dcf05d87be", + "TopicArn": "arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1", + "Subject": "Amazon S3 Notification", + "Message": json.dumps({"Records": records}), + "Timestamp": "2021-05-06T21:15:45.427Z", + "SignatureVersion": "1", + "Signature": "Ui5s4uVgcMr5fjGmePCMgmi14Dx9oS8hIpjXXiQo+xZPgsHkUayz7dEeGmMGGt45l8blmZTZEbxJG+HVGfIUmQGRqoimwiLm+mIAaNIN/BV76FVFcQUIkORX8gYN0a4RS3HU8/ElrKFK8Iz0zpxJdjwxa3xPCDwu+dTotiLTJxSouvg8MmkkDnq758a8vZ9WK2PaOlZiZ3m8Mv2ZvLrozZ/DAAz48HSad6Mymhit82RpGCUxy4SDwXVlP/nLB01AS11Gp2HowJR8NXyStrZYzzQEc+PebITaExyikgTMiVhRHkmb7JrtZPpgZu2daQsSooqpwyIzb6pvgwu9W54jkw==", + "SigningCertURL": "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem", + "UnsubscribeURL": "https://sns.us-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1:e7a9a9f5-792e-48a6-9ec8-40f7f5a8f600" + }) + message_wo_records = json.dumps( + {"Type": "Notification", + "MessageId": "9d0691d2-ae9c-58f9-a9f4-c8dcf05d87be", + "TopicArn": "arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1", + "Subject": "Amazon S3 Notification", + "Message": "{}", + "Timestamp": "2021-05-06T21:15:45.427Z", + "SignatureVersion": "1", + "Signature": "Ui5s4uVgcMr5fjGmePCMgmi14Dx9oS8hIpjXXiQo+xZPgsHkUayz7dEeGmMGGt45l8blmZTZEbxJG+HVGfIUmQGRqoimwiLm+mIAaNIN/BV76FVFcQUIkORX8gYN0a4RS3HU8/ElrKFK8Iz0zpxJdjwxa3xPCDwu+dTotiLTJxSouvg8MmkkDnq758a8vZ9WK2PaOlZiZ3m8Mv2ZvLrozZ/DAAz48HSad6Mymhit82RpGCUxy4SDwXVlP/nLB01AS11Gp2HowJR8NXyStrZYzzQEc+PebITaExyikgTMiVhRHkmb7JrtZPpgZu2daQsSooqpwyIzb6pvgwu9W54jkw==", + "SigningCertURL": "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem", + "UnsubscribeURL": "https://sns.us-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1:e7a9a9f5-792e-48a6-9ec8-40f7f5a8f600" + }) + + @mock_sqs def setUp(self): print("Set it up!") - self.sc = SqsConsumer(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"), - abspath_from_relative(__file__, "../../config/credentials-template.yml")) + + self.s3_utils = S3Utils(**self.config_dict) + self.sqs_consumer = SqsConsumer(**self.config_dict) def tearDown(self): print("Tear it down!") - def test_parse_config(self): - self.assertFalse(self.sc.conf['sqs_url']==None) + @mock_sqs + def test_connect(self): + queue_name = 'test' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + expQueue = sqs_resource.create_queue(QueueName=queue_name) + queue = self.sqs_consumer.connect(sqs_resource, queue_name) + + self.assertEqual(expQueue.url, queue.url) + + # Kind of pointless since we catch every exception this doesn't fail when it should.... + @mock_sqs + def test_receive_messages_no_records(self): + mock_cb = MagicMock() + + # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + + # Send a test message lacking Records field + sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody= self.message_wo_records + ) + queue = sqs_resource.Queue(queue_name) + + self.sqs_consumer.receive_messages(queue, 1, mock_cb) + + # Verify callback function was called once with expected message attributes + mock_cb.assert_not_called() + + @mock_sqs + def test_receive_messages_fails_invalid_sqs_max_polls(self): + with self.assertRaises(ValueError): + self.sqs_consumer.receive_messages(MagicMock(), 0, MagicMock()) + + @mock_sqs + def test_receive_messages_polls_msgs_expected_times(self): + mock_cb = MagicMock() + queue = MagicMock() + + sqs_max_polls = 2 + self.sqs_consumer.receive_messages(queue, sqs_max_polls, mock_cb) + + # Verify polling called expected times + self.assertEqual(queue.receive_messages.call_count, sqs_max_polls) + + @mock_sqs + def test_receive_messages_callback_occurs(self): + mock_cb = MagicMock() + + # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody= self.message + ) + queue = sqs_resource.Queue(queue_name) + + self.sqs_consumer.receive_messages(queue, 1, mock_cb) + + # Verify callback function was called once with expected message attributes + mock_cb.assert_called_with(self.records) + + @mock_sqs + def test_happy_path(self): + mock_cb = MagicMock() + + # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + queue = self.sqs_consumer.connect(sqs_resource, queue_name) #sqs_resource.create_queue(QueueName=queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client.send_message( + QueueUrl=queue.url, + MessageBody= self.message + ) + + self.sqs_consumer.receive_messages(queue, 1, mock_cb) + + # Verify callback function was called once with expected message attributes + mock_cb.assert_called_with(self.records) + # An example using external send/receive methods @mock_sqs - def test_poll_messages(self): - # Create the mock queue beforehand and set its mock URL as the 'sqs_url' config value for SqsConsumer - boto_session = boto3.Session(aws_access_key_id=self.sc.cred['sandbox']['access_key'], - aws_secret_access_key=self.sc.cred['sandbox']['secret_key']) - sqs_session = boto_session.resource('sqs', region_name=self.sc.conf['s3_region']) - res = sqs_session.create_queue(QueueName="test_queue") - self.sc.conf['sqs_url'] = res.url - queue = self.sc.connect() - self.sc.receive_messages(queue, self.sc.conf['sqs_max_polls'], lambda *args, **kwargs: None) + def test_write_message_valid(self): + "Test the write_message method with a valid message" + sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + queue = sqs.create_queue(QueueName='test-skype-sender') + self.sqs_consumer.sqs_url = queue.url + skype_message = 'Testing with a valid message' + channel = 'test' + expected_message = str({'msg':f'{skype_message}', 'channel':channel}) + message = str({'msg':f'{skype_message}', 'channel':channel}) + queue.send_message(MessageBody=(message)) + sqs_messages = queue.receive_messages() + print('Message: %s'%sqs_messages) + print('Message0: %s'%sqs_messages[0]) + assert sqs_messages[0].body == expected_message, 'Message in skype-sender does not match expected' + print(f'The message in skype-sender SQS matches what we sent') + assert len(sqs_messages) == 1, 'Expected exactly one message in SQS' + print(f'\nExactly one message in skype-sender SQS') if __name__ == '__main__': unittest.main() \ No newline at end of file From 11f8845ef8f111c7d3a2632cace3c208751e13b2 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:00:06 -0600 Subject: [PATCH 23/49] 1500-fixed bug in tests/utils of message missing a carriage return. Just looks. --- onestop-python-client/tests/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/tests/utils.py b/onestop-python-client/tests/utils.py index 2f1e6d5..9cb7913 100644 --- a/onestop-python-client/tests/utils.py +++ b/onestop-python-client/tests/utils.py @@ -15,7 +15,8 @@ def create_delete_message(region, bucket, key): "Message": '''{ "Records": [{ "eventVersion": "2.1", "eventSource": "aws:s3", "awsRegion": "''' + region + '''", - "eventTime": "2020-12-14T20:56:08.725Z", "eventName": "ObjectRemoved:Delete", + "eventTime": "2020-12-14T20:56:08.725Z", + "eventName": "ObjectRemoved:Delete", "userIdentity": {"principalId": "AX8TWPQYA8JEM"}, "requestParameters": {"sourceIPAddress": "65.113.158.185"}, "responseElements": {"x-amz-request-id": "D8059E6A1D53597A", From 9048e5326d48cd85f059e487068cd07f464fb35e Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:01:30 -0600 Subject: [PATCH 24/49] 1500-Added logging to SqsHandlers and log_level method parameter. Adjusted SqsConsumer callback parameters to pass along log_level. --- .../onestop/util/SqsConsumer.py | 3 ++- .../onestop/util/SqsHandlers.py | 21 +++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index 4f503d8..4d97c34 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -25,6 +25,7 @@ def __init__(self, log_level = 'INFO', **wildargs): log_level: str The log level to use for this class (Defaults to 'INFO') """ + self.log_level = log_level self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) @@ -93,7 +94,7 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): if 'Records' in message_content: recs = message_content['Records'] self.logger.debug('Message "Records": %s' % recs) - cb(recs) + cb(recs, self.log_level) else: self.logger.info("s3 event message without 'Records' content received.") diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index 57be8da..ce0f010 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -1,3 +1,5 @@ +from onestop.util.ClientLogger import ClientLogger + def create_delete_handler(web_publisher): """ Creates a delete function handler to be used with SqsConsumer.receive_messages. @@ -7,21 +9,36 @@ def create_delete_handler(web_publisher): :param: web_publisher: WebPublisher object """ - def delete(records): - if records is None: + def delete(records, log_level='INFO'): + + logger = ClientLogger.get_logger('SqsHandlers', log_level, False) + logger.info("In create_delete_handler.delete() handler") + logger.debug("Records: %s"%records) + + if not records or records is None: + logger.info("Ending handler, records empty, records=%s"%records) return + record = records[0] if record['eventName'] != 'ObjectRemoved:Delete': + logger.info("Ending handler, eventName=%s"%record['eventName']) return + bucket = record['s3']['bucket']['name'] s3_key = record['s3']['object']['key'] s3_url = "s3://" + bucket + "/" + s3_key payload = '{"queries":[{"type": "fieldQuery", "field": "links.linkUrl", "value": "' + s3_url + '"}] }' search_response = web_publisher.search_onestop('granule', payload) + logger.debug('OneStop search response=%s'%search_response) response_json = search_response.json() + logger.debug('OneStop search response json=%s'%response_json) + logger.debug('OneStop search response data=%s'%response_json['data']) if len(response_json['data']) != 0: granule_uuid = response_json['data'][0]['id'] response = web_publisher.delete_registry('granule', granule_uuid) + print('delete_registry response: %s'%response) return response + logger.warning("OneStop search response has no 'data' field. Response=%s"%response_json) + return delete From 5e0d3ba87eed10f2078c591ae8f2b3bc575de13a Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:02:45 -0600 Subject: [PATCH 25/49] 1500-Added tests to SqsHandlersTest and removed config usage. --- .../tests/SqsHandlersTest.py | 274 +++++++++++++----- 1 file changed, 194 insertions(+), 80 deletions(-) diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/tests/SqsHandlersTest.py index bbe4210..3897169 100644 --- a/onestop-python-client/tests/SqsHandlersTest.py +++ b/onestop-python-client/tests/SqsHandlersTest.py @@ -1,8 +1,8 @@ import json import unittest -import boto3 -import yaml -from moto import mock_s3 + +from unittest import mock +from unittest.mock import patch from moto import mock_sqs from tests.utils import abspath_from_relative, create_delete_message from onestop.WebPublisher import WebPublisher @@ -13,95 +13,209 @@ class SqsHandlerTest(unittest.TestCase): - wp = None - su = None - s3ma = None - sqs = None - wp_config = abspath_from_relative(__file__, "../config/web-publisher-config-local.yml") - aws_config = abspath_from_relative(__file__, "../config/aws-util-config-dev.yml") - cred_config = abspath_from_relative(__file__, "../config/credentials-template.yml") - csb_config = abspath_from_relative(__file__, "../config/csb-data-stream-config.yml") - - collection_uuid = '5b58de08-afef-49fb-99a1-9c5d5c003bde' - payloadDict = { - "fileInformation": { - "name": "OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc", - "size": 30551050, - "checksums": [{ - "algorithm": "SHA1", - "value": "bf4c5b58f8d5f9445f7b277f988e5861184f775a" - }], - "format": "NetCDF" - }, - "relationships": [{ - "type": "COLLECTION", - "id": collection_uuid - }], - "fileLocations": { - "s3://noaa-goes16/ABI-L1b-RadF/2019/298/17/OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc": { - "uri": "s3://noaa-goes16/ABI-L1b-RadF/2019/298/17/OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc", - "type": "ACCESS", - "deleted": "false", - "restricted": "false", - "asynchronous": "false", - "locality": "us-east-2", - "lastModified": 1572025823000, - "serviceType": "Amazon:AWS:S3", - "optionalAttributes": {} - } - } - } def setUp(self): print("Set it up!") - with open(abspath_from_relative(__file__, "../config/csb-data-stream-config-template.yml")) as f: - self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../config/aws-util-config-dev.yml")) as f: - self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../config/credentials-template.yml")) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) - - self.wp = WebPublisher(self.wp_config, self.cred_config) - self.su = S3Utils(self.cred['sandbox']['access_key'], - self.cred['sandbox']['secret_key'], - "DEBUG") - self.s3ma = S3MessageAdapter(self.stream_conf['access_bucket'], - self.stream_conf['type'], - self.stream_conf['file_identifier_prefix'], - self.stream_conf['collection_id']) + self.config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', + 'type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'registry_base_url': 'http://localhost/onestop/api/registry', + 'registry_username': 'admin', + 'registry_password': 'whoknows', + 'onestop_base_url': 'http://localhost/onestop/api/search/search', + 'log_level': 'DEBUG' + } + + self.wp = WebPublisher(**self.config_dict) + self.s3_utils = S3Utils(**self.config_dict) + self.s3ma = S3MessageAdapter(**self.config_dict) + self.sqs_consumer = SqsConsumer(**self.config_dict) + + self.sqs_max_polls = 3 + self.region = 'us-east-2' + self.bucket = 'archive-testing-demo' + self.key = 'ABI-L1b-RadF/2019/298/15/OR_ABI-L1b-RadF-M6C15_G16_s20192981500369_e20192981510082_c20192981510166.nc' def tearDown(self): print("Tear it down!") - @mock_s3 + def mocked_search_response_data(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + print ("args: "+str(args)+" kwargs: "+str(kwargs)) + onestop_search_response = { + "data":[ + { + "attributes":{ + "serviceLinks":[ + + ], + "citeAsStatements":[ + + ], + "links":[ + { + "linkFunction":"download", + "linkUrl":"s3://archive-testing-demo-backup/public/NESDIS/CSB/csv/2019/12/01/20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv", + "linkName":"Amazon S3", + "linkProtocol":"Amazon:AWS:S3" + }, + { + "linkFunction":"download", + "linkUrl":"https://archive-testing-demo.s3-us-east-2.amazonaws.com/public/NESDIS/CSB/csv/2019/12/01/20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv", + "linkName":"Amazon S3", + "linkProtocol":"HTTPS" + } + ], + "internalParentIdentifier":"fdb56230-87f4-49f2-ab83-104cfd073177", + "filesize":63751, + "title":"20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv" + }, + "id":"77b11a1e-1b75-46e1-b7d6-99b5022ed113", + "type":"granule" + } + ], + "meta":{ + "took":1, + "total":6, + "exactCount":True + } + } + return MockResponse(onestop_search_response, 200) + + + def mocked_search_response_data_empty(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + print ("args: "+str(args)+" kwargs: "+str(kwargs)) + onestop_search_response = { + "data":[], + "meta":{ + "took":1, + "total":6, + "exactCount":True + } + } + return MockResponse(onestop_search_response, 200) + @mock_sqs - def init_s3(self): - bucket = self.cloud_conf['s3_bucket'] - key = self.cloud_conf['s3_key'] - boto_client = self.su.connect("s3", None) - boto_client.create_bucket(Bucket=bucket) - boto_client.put_object(Bucket=bucket, Key=key, Body="foobar") - - sqs_client = boto3.client('sqs', region_name=self.cloud_conf['s3_region']) - sqs_queue = sqs_client.create_queue(QueueName=self.cloud_conf['sqs_name']) - self.sqs = SqsConsumer(self.aws_config, self.cred_config) - message = create_delete_message(self.cloud_conf['s3_region'], bucket, key) - sqs_client.send_message(QueueUrl=sqs_queue['QueueUrl'], MessageBody=json.dumps(message)) - sqs_queue['QueueUrl'] - - @mock_s3 + @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_happy(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.region) + message = create_delete_message(self.region, self.bucket, self.key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_called_once() + mock_wp.delete_registry.assert_called_once() + @mock_sqs - def delete_handler_wrapper(self, recs): - handler = create_delete_handler(self.wp) - result = handler(recs) - self.assertTrue(result) + @mock.patch('requests.get', side_effect=mocked_search_response_data_empty, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_data_empty_ends_cb(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.region) + message = create_delete_message(self.region, self.bucket, self.key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_called_once() + mock_wp.delete_registry.assert_not_called() @mock_sqs - def test_delete_handler(self): - mock_queue_url = self.init_s3() - sqs_queue = boto3.resource('sqs', region_name=self.stream_conf['s3_region']).Queue(mock_queue_url) - self.sqs.receive_messages(sqs_queue, self.stream_conf['sqs_max_polls'], self.delete_handler_wrapper) + @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_no_records_ends_cb(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps({"Message":'''{"Records":[]}'''}) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_not_called() + mock_wp.delete_registry.assert_not_called() + + @mock_sqs + @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_eventName_not_delete_ends_cb(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps({"Message":'''{"Records":[{"eventName":"Unknown"}]}'''}) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_not_called() + mock_wp.delete_registry.assert_not_called() if __name__ == '__main__': unittest.main() \ No newline at end of file From 918c378b053da7918f679cf1806a5f68440f190c Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:17:33 -0600 Subject: [PATCH 26/49] 1500-Fixed SqsConsumerTest due to parameters into CB changing. skipped example test. --- onestop-python-client/tests/util/SqsConsumerTest.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/tests/util/SqsConsumerTest.py index 87f9005..7b5785f 100644 --- a/onestop-python-client/tests/util/SqsConsumerTest.py +++ b/onestop-python-client/tests/util/SqsConsumerTest.py @@ -2,7 +2,7 @@ import json from moto import mock_sqs -from unittest.mock import ANY, patch, MagicMock, call +from unittest.mock import MagicMock, ANY from onestop.util.S3Utils import S3Utils from onestop.util.SqsConsumer import SqsConsumer @@ -128,7 +128,7 @@ def test_receive_messages_callback_occurs(self): self.sqs_consumer.receive_messages(queue, 1, mock_cb) # Verify callback function was called once with expected message attributes - mock_cb.assert_called_with(self.records) + mock_cb.assert_called_with(self.records, ANY) @mock_sqs def test_happy_path(self): @@ -149,9 +149,10 @@ def test_happy_path(self): self.sqs_consumer.receive_messages(queue, 1, mock_cb) # Verify callback function was called once with expected message attributes - mock_cb.assert_called_with(self.records) + mock_cb.assert_called_with(self.records, ANY) # An example using external send/receive methods + @unittest.skip @mock_sqs def test_write_message_valid(self): "Test the write_message method with a valid message" From 3f39966f8fe4f42c3e385ad79e04ba014fdc1b17 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:22:33 -0600 Subject: [PATCH 27/49] 1500-Removed unused conf variable. --- onestop-python-client/onestop/util/S3Utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index e2f2e32..eebafe9 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -68,7 +68,6 @@ class S3Utils: retrieve_inventory_results(vault_name, boto_client, job_id) Retrieve the results of an Amazon Glacier inventory-retrieval job """ - conf = None def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): self.access_key = access_key From 4cffc3884e7d41e074494ca6bd122f0ce4cde9eb Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 09:21:19 -0600 Subject: [PATCH 28/49] 1500-Removed unused var conf from classes. --- onestop-python-client/onestop/WebPublisher.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index 75ee99f..d04eacc 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -31,7 +31,6 @@ class WebPublisher: get_granules_onestop(self, uuid) Search for a granule in OneStop given its uuid """ - conf = None def __init__(self, registry_base_url, registry_username, registry_password, onestop_base_url, log_level="INFO", **kwargs): self.registry_base_url = registry_base_url From 8280a374901c62c68eea722ebd6a7c087a57ce1f Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 09:21:58 -0600 Subject: [PATCH 29/49] 1500-Changed mock tests to not load configs but use mock data. --- .../tests/util/S3MessageAdapterTest.py | 32 +++++------- .../tests/util/S3UtilsTest.py | 52 +++++++++---------- 2 files changed, 38 insertions(+), 46 deletions(-) diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/tests/util/S3MessageAdapterTest.py index a960737..671695a 100644 --- a/onestop-python-client/tests/util/S3MessageAdapterTest.py +++ b/onestop-python-client/tests/util/S3MessageAdapterTest.py @@ -1,8 +1,6 @@ import unittest -import yaml from moto import mock_s3 -from tests.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter @@ -54,30 +52,24 @@ class S3MessageAdapterTest(unittest.TestCase): def setUp(self): print("Set it up!") - with open(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml")) as f: - self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml")) as f: - self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../../config/credentials-template.yml")) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) + config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', + 'type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'log_level': 'DEBUG' + } - self.s3_utils = S3Utils(self.cred['sandbox']['access_key'], - self.cred['sandbox']['secret_key'], - "DEBUG") - self.s3ma = S3MessageAdapter(self.stream_conf['access_bucket'], - self.stream_conf['type'], - self.stream_conf['file_identifier_prefix'], - self.stream_conf['collection_id']) + self.s3_utils = S3Utils(**config_dict) + self.s3ma = S3MessageAdapter(**config_dict) - self.region = self.cloud_conf['s3_region'] - self.bucket = self.cloud_conf['s3_bucket'] + self.region = 'us-east-2' def tearDown(self): print("Tear it down!") - def test_parse_config(self): - self.assertFalse(self.stream_conf['collection_id'] == None) - @mock_s3 def test_transform(self): s3 = self.s3_utils.connect('s3', self.region) diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py index acb0af4..47c8ade 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/tests/util/S3UtilsTest.py @@ -1,7 +1,6 @@ import csv import unittest import uuid -import yaml from moto import mock_s3 from moto import mock_glacier @@ -13,20 +12,21 @@ class S3UtilsTest(unittest.TestCase): def setUp(self): print("Set it up!") - with open(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml")) as f: - self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml")) as f: - self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../../config/credentials-template.yml")) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) + config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', + 'type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'log_level': 'DEBUG' + } - self.s3_utils = S3Utils(self.cred['sandbox']['access_key'], - self.cred['sandbox']['secret_key'], - "DEBUG") + self.s3_utils = S3Utils(**config_dict) - self.region = self.cloud_conf['s3_region'] - self.region2 = self.region - self.bucket = self.cloud_conf['s3_bucket'] + self.region = 'us-east-2' + self.region2 = 'eu-north-1' + self.bucket = 'archive-testing-demo' @mock_s3 def test_get_uuid_metadata(self): @@ -54,7 +54,7 @@ def test_add_uuid_metadata(self): @mock_s3 def test_add_file_s3(self): - boto_client = self.s3_utils.connect("s3", None) + boto_client = self.s3_utils.connect('client', 's3', None) local_file = abspath_from_relative(__file__, "../data/file4.csv") s3_key = "csv/file4.csv" location = {'LocationConstraint': self.region} @@ -65,8 +65,8 @@ def test_add_file_s3(self): @mock_s3 def test_get_csv_s3(self): - boto_session = self.s3_utils.connect("session", None) - s3 = self.s3_utils.connect('s3', self.cloud_conf['s3_region']) + boto_session = self.s3_utils.connect('session', None, None) + s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} s3_key = "csv/file1.csv" s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) @@ -81,7 +81,7 @@ def test_get_csv_s3(self): @mock_s3 def test_read_bytes_s3(self): - boto_client = self.s3_utils.connect("s3", None) + boto_client = self.s3_utils.connect('client', 's3', None) s3_key = "csv/file1.csv" boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) boto_client.put_object(Bucket=self.bucket, Key=s3_key, Body="body") @@ -90,7 +90,7 @@ def test_read_bytes_s3(self): @mock_s3 def test_add_files(self): - boto_client = self.s3_utils.connect("s3", None) + boto_client = self.s3_utils.connect('client', 's3', None) local_files = ["file1_s3.csv", "file2.csv", "file3.csv"] location = {'LocationConstraint': self.region} boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) @@ -108,7 +108,7 @@ def test_s3_cross_region(self): key = "csv/file1.csv" # makes connection to low level s3 client - s3 = self.s3_utils.connect('s3', self.region) + s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) s3.put_object(Bucket=self.bucket, Key=key, Body="body") @@ -117,8 +117,8 @@ def test_s3_cross_region(self): file_data = self.s3_utils.read_bytes_s3(s3, self.bucket, key) # Redirecting upload to vault in second region - glacier = self.s3_utils.connect("glacier", self.region2) - vault_name = self.cloud_conf['vault_name'] + glacier = self.s3_utils.connect('client', 'glacier', self.region2) + vault_name = 'archive-vault-new' glacier.create_vault(vaultName=vault_name) print('vault name: ' + str(vault_name)) print('region name: ' + str(self.region2)) @@ -140,7 +140,7 @@ def test_s3_to_glacier(self): key = "csv/file1_s3.csv" # Create boto3 low level api connection - s3 = self.s3_utils.connect('s3', self.region) + s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) s3.put_object(Bucket=self.bucket, Key=key, Body="body") @@ -172,13 +172,13 @@ def test_s3_restore(self): @mock_glacier def test_retrieve_inventory(self): """ - Initiates job for archive retrieval. Takes 3-5 hours to complete + Initiates job for archive retrieval. Takes 3-5 hours to complete if not mocked. """ # Using glacier api initiates job and returns archive results # Connect to your glacier vault for retrieval - glacier = self.s3_utils.connect("glacier", self.region2) - vault_name = self.cloud_conf['vault_name'] + glacier = self.s3_utils.connect('client', 'glacier', self.region2) + vault_name = 'archive-vault-new' glacier.create_vault(vaultName=vault_name) @@ -193,7 +193,7 @@ def test_retrieve_inventory_results(self, jobid): """ # Connect to your glacier vault for retrieval - glacier = self.su.connect("glacier", self.su.conf['region']) + glacier = self.su.connect('client', 'glacier', self.su.conf['region']) vault_name = self.su.conf['vault_name'] # Retrieve the job results From c16302eaf845fbab4cfe1862163e97cab563182e Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 09:25:02 -0600 Subject: [PATCH 30/49] 1500-refactored S3Utils connect to take in type parameter instead of us assuming if they say "glacier" they mean a client of service type "glacier". Little clearer to the user and in the code. This allows boto to catch the error of wrong service name specified and it gives a nice list of choices. Added else statement too for cases user specifies a type we don't expect, will add tests to. --- onestop-python-client/onestop/util/S3Utils.py | 61 +++++++++---------- .../tests/SqsHandlersTest.py | 16 ++--- .../tests/extractor/CsbExtractorTest.py | 4 +- .../tests/util/S3MessageAdapterTest.py | 2 +- .../tests/util/S3UtilsTest.py | 6 +- .../tests/util/SqsConsumerTest.py | 18 +++--- 6 files changed, 51 insertions(+), 56 deletions(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index eebafe9..f1bb8e2 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -30,7 +30,7 @@ class S3Utils: Methods ------- connect(client_type, region) - connects to a boto3 client + connects to a boto3 service objectkey_exists(bucket, s3_key) checks to see if a s3 key path exists in a particular bucket @@ -78,46 +78,41 @@ def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): if wildargs: self.logger.error("There were extra constructor arguments: " + str(wildargs)) - def connect(self, client_type, region): + def connect(self, type, service_name, region): """ - Connects to a boto3 client + Connects to a boto3 of specified type using the credentials provided in the constructor. - :param client_type: str - boto client type in which you want to access + :param type: str + boto object type to return, see return type. + :param service_name: str + (Optional for session type) boto service name in which you want to access :param region: str - name of aws region you want to access + (Optional for session type) name of aws region you want to access - :return: boto3 client - dependent on the client_type parameter + :return: boto3 connection object + A boto3 connection object; Client, Session, or Resource. """ - - if client_type == "s3": - boto = boto3.client( - "s3", + type = type.lower() + if type == 'session': + return boto3.Session( aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, - region_name=region) - - if client_type == "s3_resource": - boto = boto3.resource( - "s3", - region_name=region, + ) + elif type == 'client': + return boto3.client( + service_name, aws_access_key_id=self.access_key, - aws_secret_access_key=self.secret_key) - - if client_type == "glacier": - boto = boto3.client( - "glacier", + aws_secret_access_key=self.secret_key, + region_name=region) + elif type == 'resource': + return boto3.resource( + service_name, region_name=region, aws_access_key_id=self.access_key, - aws_secret_access_key=self.secret_key) - - if client_type == "session": - boto = boto3.Session( - aws_access_key_id=self.access_key, - aws_secret_access_key=self.secret_key, + aws_secret_access_key=self.secret_key ) - return boto + else: + raise Exception('Unknown boto3 type of %s'%type) def objectkey_exists(self, bucket, s3_key): """ @@ -235,11 +230,11 @@ def upload_s3(self, boto_client, local_file, bucket, s3_key, overwrite): self.logger.error("File to upload was not found. Path: "+local_file) return False - def get_csv_s3(self, boto_client, bucket, key): + def get_csv_s3(self, boto_session, bucket, key): """ gets a csv file from s3 bucket using smart open library - :param boto_client: session + :param boto_session: session utilizes boto session type :param bucket: str name of bucket @@ -249,7 +244,7 @@ def get_csv_s3(self, boto_client, bucket, key): :return: smart open file """ url = "s3://" + bucket + "/" + key - sm_open_file = sm_open(url, 'r', transport_params={'session': boto_client}) + sm_open_file = sm_open(url, 'r', transport_params={'session': boto_session}) return sm_open_file def read_bytes_s3(self, boto_client, bucket, key): diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/tests/SqsHandlersTest.py index 3897169..4dd2c9e 100644 --- a/onestop-python-client/tests/SqsHandlersTest.py +++ b/onestop-python-client/tests/SqsHandlersTest.py @@ -120,12 +120,12 @@ def json(self): @patch('onestop.WebPublisher') def test_delete_handler_happy(self, mock_wp, mock_response): queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url sqs_queue = sqs_resource.Queue(queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) message = create_delete_message(self.region, self.bucket, self.key) sqs_client.send_message( QueueUrl=sqs_queue_url, @@ -146,12 +146,12 @@ def test_delete_handler_happy(self, mock_wp, mock_response): @patch('onestop.WebPublisher') def test_delete_handler_data_empty_ends_cb(self, mock_wp, mock_response): queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url sqs_queue = sqs_resource.Queue(queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) message = create_delete_message(self.region, self.bucket, self.key) sqs_client.send_message( QueueUrl=sqs_queue_url, @@ -172,12 +172,12 @@ def test_delete_handler_data_empty_ends_cb(self, mock_wp, mock_response): @patch('onestop.WebPublisher') def test_delete_handler_no_records_ends_cb(self, mock_wp, mock_response): queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url sqs_queue = sqs_resource.Queue(queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) sqs_client.send_message( QueueUrl=sqs_queue_url, MessageBody=json.dumps({"Message":'''{"Records":[]}'''}) @@ -197,12 +197,12 @@ def test_delete_handler_no_records_ends_cb(self, mock_wp, mock_response): @patch('onestop.WebPublisher') def test_delete_handler_eventName_not_delete_ends_cb(self, mock_wp, mock_response): queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url sqs_queue = sqs_resource.Queue(queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) sqs_client.send_message( QueueUrl=sqs_queue_url, MessageBody=json.dumps({"Message":'''{"Records":[{"eventName":"Unknown"}]}'''}) diff --git a/onestop-python-client/tests/extractor/CsbExtractorTest.py b/onestop-python-client/tests/extractor/CsbExtractorTest.py index 72bdbcc..2c3ff72 100644 --- a/onestop-python-client/tests/extractor/CsbExtractorTest.py +++ b/onestop-python-client/tests/extractor/CsbExtractorTest.py @@ -38,13 +38,13 @@ def test_is_not_csv(self): @mock_s3 def test_csb_SME_user_path(self): # Setup bucket and file to read - s3 = self.s3_utils.connect('s3', self.region) + s3 = self.s3_utils.connect('client', 's3', self.region) s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) self.s3_utils.upload_s3(s3, self.root_proj_path + '/' + self.key, self.bucket, self.key, True) self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key)) # This is how we would expect an external user to get the file. - sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect("session", None), self.bucket, self.key) + sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect('session', None, None), self.bucket, self.key) bounds_dict = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME') coords = bounds_dict["geospatial"] diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/tests/util/S3MessageAdapterTest.py index 671695a..925be2e 100644 --- a/onestop-python-client/tests/util/S3MessageAdapterTest.py +++ b/onestop-python-client/tests/util/S3MessageAdapterTest.py @@ -72,7 +72,7 @@ def tearDown(self): @mock_s3 def test_transform(self): - s3 = self.s3_utils.connect('s3', self.region) + s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} bucket = 'nesdis-ncei-csb-dev' key = 'csv/file1.csv' diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py index 47c8ade..83be8f2 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/tests/util/S3UtilsTest.py @@ -30,7 +30,7 @@ def setUp(self): @mock_s3 def test_get_uuid_metadata(self): - boto_client = self.s3_utils.connect("s3_resource", None) + boto_client = self.s3_utils.connect('resource', 's3', None) s3_key = "csv/file1.csv" location = {'LocationConstraint': self.region} @@ -42,7 +42,7 @@ def test_get_uuid_metadata(self): @mock_s3 def test_add_uuid_metadata(self): - boto_client = self.s3_utils.connect("s3_resource", self.region) + boto_client = self.s3_utils.connect('resource', 's3', self.region) s3_key = "csv/file1.csv" @@ -162,7 +162,7 @@ def test_s3_restore(self): days = 3 # use high level api - s3 = self.s3_utils.connect('s3_resource', self.region2) + s3 = self.s3_utils.connect('resource', 's3' , self.region2) location = {'LocationConstraint': self.region2} s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) s3.Object(self.bucket, key).put(Bucket=self.bucket, Key=key, Body="body") diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/tests/util/SqsConsumerTest.py index 7b5785f..ef50b20 100644 --- a/onestop-python-client/tests/util/SqsConsumerTest.py +++ b/onestop-python-client/tests/util/SqsConsumerTest.py @@ -63,7 +63,7 @@ def tearDown(self): @mock_sqs def test_connect(self): queue_name = 'test' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) expQueue = sqs_resource.create_queue(QueueName=queue_name) queue = self.sqs_consumer.connect(sqs_resource, queue_name) @@ -76,11 +76,11 @@ def test_receive_messages_no_records(self): # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url # Send a test message lacking Records field - sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) sqs_client.send_message( QueueUrl=sqs_queue_url, MessageBody= self.message_wo_records @@ -114,11 +114,11 @@ def test_receive_messages_callback_occurs(self): # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) sqs_client.send_message( QueueUrl=sqs_queue_url, MessageBody= self.message @@ -136,11 +136,11 @@ def test_happy_path(self): # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) queue = self.sqs_consumer.connect(sqs_resource, queue_name) #sqs_resource.create_queue(QueueName=queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) sqs_client.send_message( QueueUrl=queue.url, MessageBody= self.message @@ -156,8 +156,8 @@ def test_happy_path(self): @mock_sqs def test_write_message_valid(self): "Test the write_message method with a valid message" - sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) - sqs = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) + sqs = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) queue = sqs.create_queue(QueueName='test-skype-sender') self.sqs_consumer.sqs_url = queue.url skype_message = 'Testing with a valid message' From f8c5bd0fb922e63030bb63b34dead24dc23d65c1 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 10:39:14 -0600 Subject: [PATCH 31/49] 1500-Changed moto dependency to moto[all] because of some issues with a moto version issue. https://github.com/spulec/moto/issues/3297 --- onestop-python-client/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 735dad7..9783885 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -8,5 +8,5 @@ argparse~=1.4.0 boto3~=1.15.11 requests~=2.24.0 botocore~=1.18.11 -moto==1.3.16.dev122 +moto[all]==2.0.5 undictify From ecfec1e61e411b67300c7050b7eb701c67a8b454 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 13:47:37 -0600 Subject: [PATCH 32/49] 1500-added tests for different connect types for S3Utils --- .../tests/util/S3UtilsTest.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py index 83be8f2..c002003 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/tests/util/S3UtilsTest.py @@ -2,7 +2,7 @@ import unittest import uuid -from moto import mock_s3 +from moto import mock_s3, mock_sqs from moto import mock_glacier from tests.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils @@ -28,6 +28,28 @@ def setUp(self): self.region2 = 'eu-north-1' self.bucket = 'archive-testing-demo' + @mock_sqs + def test_connect_session(self): + session = self.s3_utils.connect('Session', None, None) + + # No exception is called for unique method call + session.client('sqs') + session.resource('s3') + + @mock_sqs + def test_connect_client(self): + client = self.s3_utils.connect('Client', 'sqs', self.region) + + # No exception is called for unique method call + client.list_queues() + + @mock_sqs + def test_connect_resource(self): + resource = self.s3_utils.connect('Resource', 'sqs', self.region) + + # No exception is called for unique method call + resource.Queue(url='test') + @mock_s3 def test_get_uuid_metadata(self): boto_client = self.s3_utils.connect('resource', 's3', None) From e4c7fb46c6455e62bf9d8371ad35c7148c68904a Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 13:55:23 -0600 Subject: [PATCH 33/49] 1500-Changed class constructors checking extra arguments and logging of an error to warning. --- onestop-python-client/onestop/KafkaConsumer.py | 2 +- onestop-python-client/onestop/WebPublisher.py | 2 +- onestop-python-client/onestop/util/S3MessageAdapter.py | 2 +- onestop-python-client/onestop/util/S3Utils.py | 2 +- onestop-python-client/onestop/util/SqsConsumer.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 76078cc..747b0e4 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -101,7 +101,7 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.error("There were extra constructor arguments: " + str(wildargs)) + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def register_client(self): """ diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index d04eacc..7b1c6bd 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -42,7 +42,7 @@ def __init__(self, registry_base_url, registry_username, registry_password, ones self.logger.info("Initializing " + self.__class__.__name__) if kwargs: - self.logger.info("There were extra constructor arguments: " + str(kwargs)) + self.logger.warning("There were extra constructor arguments: " + str(kwargs)) def publish_registry(self, metadata_type, uuid, payload, method): """ diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index 1dda78c..6bd832d 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -59,7 +59,7 @@ def __init__(self, access_bucket, type, file_id_prefix, collection_id, log_leve self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.error("There were extra constructor arguments: " + str(wildargs)) + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def transform(self, recs): """ diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index f1bb8e2..0f86e2b 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -76,7 +76,7 @@ def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.error("There were extra constructor arguments: " + str(wildargs)) + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def connect(self, type, service_name, region): """ diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index 4d97c34..39356da 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -30,7 +30,7 @@ def __init__(self, log_level = 'INFO', **wildargs): self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.error("There were extra constructor arguments: " + str(wildargs)) + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def connect(self, sqs_resource, sqs_queue_name): """ From f5370eacd32992af4fc3b8a59ef15b1975a0cc77 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 15:23:38 -0600 Subject: [PATCH 34/49] 1500-Moved unit tests to tests/unit and integration tests to tests/integration. Change circle ci config to run all onestop-python-client tests. --- .circleci/config.yml | 10 ++++++++-- onestop-python-client/{tests => test}/__init__.py | 0 onestop-python-client/{tests => test}/data/file1.csv | 0 .../{tests => test}/data/file1_s3.csv | 0 onestop-python-client/{tests => test}/data/file2.csv | 0 onestop-python-client/{tests => test}/data/file3.csv | 0 onestop-python-client/{tests => test}/data/file4.csv | 0 onestop-python-client/test/integration/__init__.py | 0 .../{tests => test/integration}/test_WebPublisher.py | 4 ++-- onestop-python-client/test/unit/__init__.py | 0 onestop-python-client/test/unit/extractor/__init__.py | 0 .../unit/extractor/test_CsbExtractor.py} | 9 +++++---- .../unit/test_KafkaConsumer.py} | 2 +- .../unit/test_KafkaPublisher.py} | 2 +- .../unit/test_SqsHandlers.py} | 6 ++---- .../unit/test_WebPublisher.py} | 2 +- onestop-python-client/test/unit/util/__init__.py | 0 .../unit/util/test_S3MessageAdapter.py} | 0 .../S3UtilsTest.py => test/unit/util/test_S3Utils.py} | 6 +++--- .../unit/util/test_SqsConsumer.py} | 0 onestop-python-client/{tests => test}/utils.py | 0 onestop-python-client/tests/util/IntegrationTest.py | 1 - 22 files changed, 23 insertions(+), 19 deletions(-) rename onestop-python-client/{tests => test}/__init__.py (100%) rename onestop-python-client/{tests => test}/data/file1.csv (100%) rename onestop-python-client/{tests => test}/data/file1_s3.csv (100%) rename onestop-python-client/{tests => test}/data/file2.csv (100%) rename onestop-python-client/{tests => test}/data/file3.csv (100%) rename onestop-python-client/{tests => test}/data/file4.csv (100%) create mode 100644 onestop-python-client/test/integration/__init__.py rename onestop-python-client/{tests => test/integration}/test_WebPublisher.py (98%) create mode 100644 onestop-python-client/test/unit/__init__.py create mode 100644 onestop-python-client/test/unit/extractor/__init__.py rename onestop-python-client/{tests/extractor/CsbExtractorTest.py => test/unit/extractor/test_CsbExtractor.py} (92%) rename onestop-python-client/{tests/KafkaConsumerTest.py => test/unit/test_KafkaConsumer.py} (99%) rename onestop-python-client/{tests/KafkaPublisherTest.py => test/unit/test_KafkaPublisher.py} (99%) rename onestop-python-client/{tests/SqsHandlersTest.py => test/unit/test_SqsHandlers.py} (98%) rename onestop-python-client/{tests/test_WebPublisher_unit.py => test/unit/test_WebPublisher.py} (99%) create mode 100644 onestop-python-client/test/unit/util/__init__.py rename onestop-python-client/{tests/util/S3MessageAdapterTest.py => test/unit/util/test_S3MessageAdapter.py} (100%) rename onestop-python-client/{tests/util/S3UtilsTest.py => test/unit/util/test_S3Utils.py} (97%) rename onestop-python-client/{tests/util/SqsConsumerTest.py => test/unit/util/test_SqsConsumer.py} (100%) rename onestop-python-client/{tests => test}/utils.py (100%) delete mode 100644 onestop-python-client/tests/util/IntegrationTest.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 99f7692..dbaddb4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -107,10 +107,16 @@ jobs: app-dir: ./onestop-python-client pkg-manager: pip - run: - name: "Run util tests" + name: "Run unit tests" command: > cd onestop-python-client/; - python -m unittest tests/util/*.py + python -m unittest discover -s test/unit + + - run: + name: "Run integration tests" + command: > + cd onestop-python-client/; + python -m unittest discover -s test/integration orbs: slack: circleci/slack@3.4.2 diff --git a/onestop-python-client/tests/__init__.py b/onestop-python-client/test/__init__.py similarity index 100% rename from onestop-python-client/tests/__init__.py rename to onestop-python-client/test/__init__.py diff --git a/onestop-python-client/tests/data/file1.csv b/onestop-python-client/test/data/file1.csv similarity index 100% rename from onestop-python-client/tests/data/file1.csv rename to onestop-python-client/test/data/file1.csv diff --git a/onestop-python-client/tests/data/file1_s3.csv b/onestop-python-client/test/data/file1_s3.csv similarity index 100% rename from onestop-python-client/tests/data/file1_s3.csv rename to onestop-python-client/test/data/file1_s3.csv diff --git a/onestop-python-client/tests/data/file2.csv b/onestop-python-client/test/data/file2.csv similarity index 100% rename from onestop-python-client/tests/data/file2.csv rename to onestop-python-client/test/data/file2.csv diff --git a/onestop-python-client/tests/data/file3.csv b/onestop-python-client/test/data/file3.csv similarity index 100% rename from onestop-python-client/tests/data/file3.csv rename to onestop-python-client/test/data/file3.csv diff --git a/onestop-python-client/tests/data/file4.csv b/onestop-python-client/test/data/file4.csv similarity index 100% rename from onestop-python-client/tests/data/file4.csv rename to onestop-python-client/test/data/file4.csv diff --git a/onestop-python-client/test/integration/__init__.py b/onestop-python-client/test/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/tests/test_WebPublisher.py b/onestop-python-client/test/integration/test_WebPublisher.py similarity index 98% rename from onestop-python-client/tests/test_WebPublisher.py rename to onestop-python-client/test/integration/test_WebPublisher.py index c81a7de..9263938 100644 --- a/onestop-python-client/tests/test_WebPublisher.py +++ b/onestop-python-client/test/integration/test_WebPublisher.py @@ -56,8 +56,8 @@ class WebPublisherTest(unittest.TestCase): def setUpClass(cls): print("Set it up!") - cred_loc = "../config/credentials.yml" - conf_loc = "../config/csb-data-stream-config-template.yml" + cred_loc = "config/credentials.yml" + conf_loc = "config/csb-data-stream-config-template.yml" with open(cred_loc) as f: creds = yaml.load(f, Loader=yaml.FullLoader) diff --git a/onestop-python-client/test/unit/__init__.py b/onestop-python-client/test/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/extractor/__init__.py b/onestop-python-client/test/unit/extractor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/tests/extractor/CsbExtractorTest.py b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py similarity index 92% rename from onestop-python-client/tests/extractor/CsbExtractorTest.py rename to onestop-python-client/test/unit/extractor/test_CsbExtractor.py index 2c3ff72..415bb26 100644 --- a/onestop-python-client/tests/extractor/CsbExtractorTest.py +++ b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py @@ -11,9 +11,10 @@ def setUp(self): print("Set it up!") self.root_proj_path = os.getcwd() self.assertIsNotNone(self.root_proj_path) - self.key = "tests/data/file4.csv" - # Use open instead of our methodfor simplicity and reliability, plus not testing our code here. - self.file_obj = open(self.root_proj_path + '/' + self.key) + self.data_file_path = os.getcwd() + '/test/data/file4.csv' + self.key = "file4.csv" + # Use open instead of our method because we aren't testing our code here. + self.file_obj = open(self.data_file_path) config_dict = { "access_key": "test_access_key", @@ -40,7 +41,7 @@ def test_csb_SME_user_path(self): # Setup bucket and file to read s3 = self.s3_utils.connect('client', 's3', self.region) s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) - self.s3_utils.upload_s3(s3, self.root_proj_path + '/' + self.key, self.bucket, self.key, True) + self.s3_utils.upload_s3(s3, self.data_file_path, self.bucket, self.key, True) self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key)) # This is how we would expect an external user to get the file. diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/test/unit/test_KafkaConsumer.py similarity index 99% rename from onestop-python-client/tests/KafkaConsumerTest.py rename to onestop-python-client/test/unit/test_KafkaConsumer.py index 1246789..b119e9a 100644 --- a/onestop-python-client/tests/KafkaConsumerTest.py +++ b/onestop-python-client/test/unit/test_KafkaConsumer.py @@ -4,7 +4,7 @@ from onestop.KafkaConsumer import KafkaConsumer from confluent_kafka.schema_registry import SchemaRegistryClient -class KafkaConsumerTest(unittest.TestCase): +class test_KafkaConsumer(unittest.TestCase): kp = None conf_w_security = None conf_wo_security = None diff --git a/onestop-python-client/tests/KafkaPublisherTest.py b/onestop-python-client/test/unit/test_KafkaPublisher.py similarity index 99% rename from onestop-python-client/tests/KafkaPublisherTest.py rename to onestop-python-client/test/unit/test_KafkaPublisher.py index 643d4f5..1c9497b 100644 --- a/onestop-python-client/tests/KafkaPublisherTest.py +++ b/onestop-python-client/test/unit/test_KafkaPublisher.py @@ -5,7 +5,7 @@ from unittest.mock import ANY, patch, MagicMock from confluent_kafka.schema_registry import SchemaRegistryClient -class KafkaPublisherTest(unittest.TestCase): +class test_KafkaPublisher(unittest.TestCase): kp = None conf_w_security = None conf_wo_security = None diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/test/unit/test_SqsHandlers.py similarity index 98% rename from onestop-python-client/tests/SqsHandlersTest.py rename to onestop-python-client/test/unit/test_SqsHandlers.py index 4dd2c9e..b881fc9 100644 --- a/onestop-python-client/tests/SqsHandlersTest.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -4,15 +4,14 @@ from unittest import mock from unittest.mock import patch from moto import mock_sqs -from tests.utils import abspath_from_relative, create_delete_message +from test.utils import abspath_from_relative, create_delete_message from onestop.WebPublisher import WebPublisher from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.util.SqsConsumer import SqsConsumer from onestop.util.SqsHandlers import create_delete_handler - -class SqsHandlerTest(unittest.TestCase): +class test_SqsHandler(unittest.TestCase): def setUp(self): print("Set it up!") @@ -94,7 +93,6 @@ def json(self): } return MockResponse(onestop_search_response, 200) - def mocked_search_response_data_empty(*args, **kwargs): class MockResponse: def __init__(self, json_data, status_code): diff --git a/onestop-python-client/tests/test_WebPublisher_unit.py b/onestop-python-client/test/unit/test_WebPublisher.py similarity index 99% rename from onestop-python-client/tests/test_WebPublisher_unit.py rename to onestop-python-client/test/unit/test_WebPublisher.py index 4a97f80..af0802f 100644 --- a/onestop-python-client/tests/test_WebPublisher_unit.py +++ b/onestop-python-client/test/unit/test_WebPublisher.py @@ -6,7 +6,7 @@ from moto import mock_s3 from onestop.WebPublisher import WebPublisher -class WebPublisherTest(unittest.TestCase): +class test_WebPublisher(unittest.TestCase): username="admin" password="a_password" uuid = "9f0a5ff2-fcc0-5bcb-a225-024b669c9bba" diff --git a/onestop-python-client/test/unit/util/__init__.py b/onestop-python-client/test/unit/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py similarity index 100% rename from onestop-python-client/tests/util/S3MessageAdapterTest.py rename to onestop-python-client/test/unit/util/test_S3MessageAdapter.py diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/test/unit/util/test_S3Utils.py similarity index 97% rename from onestop-python-client/tests/util/S3UtilsTest.py rename to onestop-python-client/test/unit/util/test_S3Utils.py index c002003..70f3385 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -4,7 +4,7 @@ from moto import mock_s3, mock_sqs from moto import mock_glacier -from tests.utils import abspath_from_relative +from test.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils class S3UtilsTest(unittest.TestCase): @@ -77,7 +77,7 @@ def test_add_uuid_metadata(self): @mock_s3 def test_add_file_s3(self): boto_client = self.s3_utils.connect('client', 's3', None) - local_file = abspath_from_relative(__file__, "../data/file4.csv") + local_file = abspath_from_relative(__file__, "../../data/file4.csv") s3_key = "csv/file4.csv" location = {'LocationConstraint': self.region} boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) @@ -119,7 +119,7 @@ def test_add_files(self): overwrite = True for file in local_files: - local_file = abspath_from_relative(__file__, "../data/" + file) + local_file = abspath_from_relative(__file__, "../../data/" + file) s3_file = "csv/" + file self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_file, overwrite)) diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/test/unit/util/test_SqsConsumer.py similarity index 100% rename from onestop-python-client/tests/util/SqsConsumerTest.py rename to onestop-python-client/test/unit/util/test_SqsConsumer.py diff --git a/onestop-python-client/tests/utils.py b/onestop-python-client/test/utils.py similarity index 100% rename from onestop-python-client/tests/utils.py rename to onestop-python-client/test/utils.py diff --git a/onestop-python-client/tests/util/IntegrationTest.py b/onestop-python-client/tests/util/IntegrationTest.py deleted file mode 100644 index 381e4d7..0000000 --- a/onestop-python-client/tests/util/IntegrationTest.py +++ /dev/null @@ -1 +0,0 @@ -#TBD \ No newline at end of file From c93bab294aca7990a6927335b5e69550a1a20cb6 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 11:56:23 -0600 Subject: [PATCH 35/49] 1500-Fixed one of the test_S3Utils tests that was commented out. Removed some blank lines from S3Utils. --- onestop-python-client/onestop/util/S3Utils.py | 1 - .../test/unit/util/test_S3Utils.py | 40 ++++++++++++++----- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index 0f86e2b..d5de564 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -390,7 +390,6 @@ def s3_restore(self, boto_client, bucket_name, key, days): # returns status of object retrieval return obj.restore - def retrieve_inventory(self, boto_client, vault_name): """ Initiate an Amazon Glacier inventory-retrieval job diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py index 70f3385..6b3321f 100644 --- a/onestop-python-client/test/unit/util/test_S3Utils.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -1,11 +1,16 @@ import csv import unittest import uuid +import json +from unittest import mock from moto import mock_s3, mock_sqs from moto import mock_glacier from test.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils +from boto.glacier.layer1 import Layer1 +from botocore.response import StreamingBody +from io import StringIO class S3UtilsTest(unittest.TestCase): @@ -203,26 +208,39 @@ def test_retrieve_inventory(self): vault_name = 'archive-vault-new' glacier.create_vault(vaultName=vault_name) - response = self.s3_utils.retrieve_inventory(glacier, vault_name) - self.assertTrue(response['jobId']!= None) + print('jobid %s'%response['jobId']) + self.assertTrue(response['jobId'] != None) - ''' - Excluding for now because it's an asynchronous test - def test_retrieve_inventory_results(self, jobid): + @mock_glacier + @mock_s3 + def test_retrieve_inventory_results(self): """ Once the job has been completed, use the job id to retrieve archive results """ # Connect to your glacier vault for retrieval - glacier = self.su.connect('client', 'glacier', self.su.conf['region']) - vault_name = self.su.conf['vault_name'] + glacier = mock.Mock(spec=Layer1)#self.s3_utils.connect('client', 'glacier', self.region) + vault_name = 'archive-vault-new' + glacier.create_vault(vaultName=vault_name) + + body_json = {'Body': [{'test':'value'}]} + body_encoded = json.dumps(body_json)#.encode("utf-16") - # Retrieve the job results - inventory = self.su.retrieve_inventory_results(vault_name, glacier, jobid) + body = StreamingBody( + StringIO(str(body_encoded)), + len(str(body_encoded)) + ) + + mocked_response = { + 'body': body + } + glacier.get_job_output.return_value = mocked_response + with mock.patch('boto.glacier.job.tree_hash_from_str') as t: + t.return_value = 'tree_hash' + inventory = self.s3_utils.retrieve_inventory_results(vault_name, glacier, 'ASDF78') - self.assertTrue(inventory != None) - ''' + self.assertEqual(body_json, inventory) @mock_s3 def test_extra_parameters_constructor(self): From 32a300a94d27d5fe0582b42da01f9d912c717eab Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 13:46:01 -0600 Subject: [PATCH 36/49] 1500-Updated python-client requirements boto3. Seems to be using an old one. Trying to figure out how to force it to a newer one. --- onestop-python-client/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 9783885..06a3f5b 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -5,7 +5,7 @@ smart-open PyYAML~=5.3.1 setuptools~=49.2.0 argparse~=1.4.0 -boto3~=1.15.11 +boto3~=1.17.71 requests~=2.24.0 botocore~=1.18.11 moto[all]==2.0.5 From a3f6e96795dc167a2ad6906acd329f28ce8fcf6f Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 13:59:36 -0600 Subject: [PATCH 37/49] 1500-Updated python-client requirements botocore to 1.20.71 due to conflict between botocore 1.18.11 and moto 2.0.5 --- onestop-python-client/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 06a3f5b..9a38faa 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -7,6 +7,6 @@ setuptools~=49.2.0 argparse~=1.4.0 boto3~=1.17.71 requests~=2.24.0 -botocore~=1.18.11 +botocore~=1.20.71 moto[all]==2.0.5 undictify From 8fca7a9e55452e5238f1906b568c5729b9fe7e0f Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 14:06:52 -0600 Subject: [PATCH 38/49] 1500-Changed circleci config for python client to try and update boto --- .circleci/config.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index dbaddb4..c86d021 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -106,12 +106,16 @@ jobs: - python/install-packages: app-dir: ./onestop-python-client pkg-manager: pip + # This is to update boto + - run: pip -V + - run: pip list boto3 + - run: pip install --upgrade --user boto3 + - run: pip3 install boto - run: name: "Run unit tests" command: > cd onestop-python-client/; python -m unittest discover -s test/unit - - run: name: "Run integration tests" command: > From bd38748a45206b34da3b2da62e4920e6e7606ca7 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 14:15:53 -0600 Subject: [PATCH 39/49] 1500-added region_name to S3Utils connect for session. Suspect it was using my local aws config when region was not specified. --- onestop-python-client/onestop/util/S3Utils.py | 4 +++- .../test/unit/extractor/test_CsbExtractor.py | 2 +- onestop-python-client/test/unit/util/test_S3Utils.py | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index d5de564..cbc8f24 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -97,13 +97,15 @@ def connect(self, type, service_name, region): return boto3.Session( aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, + region_name=region ) elif type == 'client': return boto3.client( service_name, aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, - region_name=region) + region_name=region + ) elif type == 'resource': return boto3.resource( service_name, diff --git a/onestop-python-client/test/unit/extractor/test_CsbExtractor.py b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py index 415bb26..cba1bf7 100644 --- a/onestop-python-client/test/unit/extractor/test_CsbExtractor.py +++ b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py @@ -45,7 +45,7 @@ def test_csb_SME_user_path(self): self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key)) # This is how we would expect an external user to get the file. - sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect('session', None, None), self.bucket, self.key) + sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect('session', None, self.region), self.bucket, self.key) bounds_dict = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME') coords = bounds_dict["geospatial"] diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py index 6b3321f..f6bdd91 100644 --- a/onestop-python-client/test/unit/util/test_S3Utils.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -35,7 +35,7 @@ def setUp(self): @mock_sqs def test_connect_session(self): - session = self.s3_utils.connect('Session', None, None) + session = self.s3_utils.connect('Session', None, self.region) # No exception is called for unique method call session.client('sqs') @@ -92,7 +92,7 @@ def test_add_file_s3(self): @mock_s3 def test_get_csv_s3(self): - boto_session = self.s3_utils.connect('session', None, None) + boto_session = self.s3_utils.connect('session', None, self.region) s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} s3_key = "csv/file1.csv" From 6a101f8188e37b4a557f359d2a9d2801f2741da4 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 14:19:19 -0600 Subject: [PATCH 40/49] 1500-Changing python-client circleci config to see if need to tell it to install boto for pip3 every time. --- .circleci/config.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c86d021..a8ad73f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -106,11 +106,6 @@ jobs: - python/install-packages: app-dir: ./onestop-python-client pkg-manager: pip - # This is to update boto - - run: pip -V - - run: pip list boto3 - - run: pip install --upgrade --user boto3 - - run: pip3 install boto - run: name: "Run unit tests" command: > From 927fb7e00bdf80b88ec2a0959f1a61c1a6526874 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 14:27:46 -0600 Subject: [PATCH 41/49] 1500-updated python-client requirements to install boto --- onestop-python-client/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 9a38faa..036e217 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -5,6 +5,7 @@ smart-open PyYAML~=5.3.1 setuptools~=49.2.0 argparse~=1.4.0 +boto~=2.49.0 boto3~=1.17.71 requests~=2.24.0 botocore~=1.20.71 From 12374a046e48c43ce5c5d14b8b08ef8757675d44 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 17:59:20 -0600 Subject: [PATCH 42/49] 1500-Changed python-client integration test(s) to use environment variables if credentials yml doesn't exist. Commented out integration task in circleCI config. Since cannot reach registry on cedardevs. --- .circleci/config.yml | 11 ++++---- .../test/integration/test_WebPublisher.py | 25 +++++++++++++------ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a8ad73f..d475399 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -111,11 +111,12 @@ jobs: command: > cd onestop-python-client/; python -m unittest discover -s test/unit - - run: - name: "Run integration tests" - command: > - cd onestop-python-client/; - python -m unittest discover -s test/integration +# This is commented out only because the OneStop we have running on cedardevs doesn't have its registry exposed. You can only reach it via sshing to another machine. +# - run: +# name: "Run integration tests" +# command: > +# cd onestop-python-client/; +# python -m unittest discover -s test/integration orbs: slack: circleci/slack@3.4.2 diff --git a/onestop-python-client/test/integration/test_WebPublisher.py b/onestop-python-client/test/integration/test_WebPublisher.py index 9263938..04211dc 100644 --- a/onestop-python-client/test/integration/test_WebPublisher.py +++ b/onestop-python-client/test/integration/test_WebPublisher.py @@ -2,8 +2,10 @@ import json import unittest import time +import os.path from onestop.WebPublisher import WebPublisher +from os import path class WebPublisherTest(unittest.TestCase): wp = None @@ -59,13 +61,22 @@ def setUpClass(cls): cred_loc = "config/credentials.yml" conf_loc = "config/csb-data-stream-config-template.yml" - with open(cred_loc) as f: - creds = yaml.load(f, Loader=yaml.FullLoader) - - registry_username = creds['registry']['username'] - registry_password = creds['registry']['password'] - access_key = creds['sandbox']['access_key'] - access_secret = creds['sandbox']['secret_key'] + if path.exists(cred_loc): + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Credentials file doesn't exist at '%s', using environment variables."%cred_loc) + registry_username = os.environ.get('REGISTRY_USERNAME') + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + if registry_username == None: + raise Exception("REGISTRY_USERNAME not defined as env variable. Credentials file at '%s' doesn't exist." % cred_loc) with open(conf_loc) as f: conf = yaml.load(f, Loader=yaml.FullLoader) From eb0646d103d933ef30b0ba6fb2b98ca5fb8edb41 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 13 May 2021 11:58:43 -0600 Subject: [PATCH 43/49] 1500-Removed redundant log_level fields in all the configs. Put into credentials template. --- onestop-python-client/config/aws-util-config-dev.yml | 1 - onestop-python-client/config/credentials-template.yml | 3 +-- .../config/csb-data-stream-config-template.yml | 1 - scripts/config/aws-util-config-dev.yml | 1 - scripts/config/aws-util-config-test.yml | 1 - scripts/config/csb-data-stream-config.yml | 1 - scripts/config/kafka-publisher-config-dev.yml | 1 - scripts/config/web-publisher-config-dev.yml | 1 - scripts/config/web-publisher-config-local.yml | 1 - 9 files changed, 1 insertion(+), 10 deletions(-) diff --git a/onestop-python-client/config/aws-util-config-dev.yml b/onestop-python-client/config/aws-util-config-dev.yml index c30683e..2fdb5c1 100644 --- a/onestop-python-client/config/aws-util-config-dev.yml +++ b/onestop-python-client/config/aws-util-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs diff --git a/onestop-python-client/config/credentials-template.yml b/onestop-python-client/config/credentials-template.yml index 006e175..f94c70b 100644 --- a/onestop-python-client/config/credentials-template.yml +++ b/onestop-python-client/config/credentials-template.yml @@ -9,5 +9,4 @@ registry: username: rw_user password: rw_user_pwd - - +log_level: INFO \ No newline at end of file diff --git a/onestop-python-client/config/csb-data-stream-config-template.yml b/onestop-python-client/config/csb-data-stream-config-template.yml index 56bad99..8c2d4de 100644 --- a/onestop-python-client/config/csb-data-stream-config-template.yml +++ b/onestop-python-client/config/csb-data-stream-config-template.yml @@ -1,4 +1,3 @@ -log_level: INFO format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER type: COLLECTION diff --git a/scripts/config/aws-util-config-dev.yml b/scripts/config/aws-util-config-dev.yml index e054f49..9102be0 100644 --- a/scripts/config/aws-util-config-dev.yml +++ b/scripts/config/aws-util-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs diff --git a/scripts/config/aws-util-config-test.yml b/scripts/config/aws-util-config-test.yml index 6aac07a..9de4618 100644 --- a/scripts/config/aws-util-config-test.yml +++ b/scripts/config/aws-util-config-test.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: DEBUG # AWS config values sqs_url: 'test-queue' diff --git a/scripts/config/csb-data-stream-config.yml b/scripts/config/csb-data-stream-config.yml index 24a7cf6..06a45b6 100644 --- a/scripts/config/csb-data-stream-config.yml +++ b/scripts/config/csb-data-stream-config.yml @@ -1,4 +1,3 @@ -log_level: INFO format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER type: COLLECTION diff --git a/scripts/config/kafka-publisher-config-dev.yml b/scripts/config/kafka-publisher-config-dev.yml index 85a66f3..bd5af58 100644 --- a/scripts/config/kafka-publisher-config-dev.yml +++ b/scripts/config/kafka-publisher-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: DEBUG # COLLECTION or GRANULE metadata_type: GRANULE diff --git a/scripts/config/web-publisher-config-dev.yml b/scripts/config/web-publisher-config-dev.yml index 9b08391..387d252 100644 --- a/scripts/config/web-publisher-config-dev.yml +++ b/scripts/config/web-publisher-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # COLLECTION or GRANULE metadata_type: granule diff --git a/scripts/config/web-publisher-config-local.yml b/scripts/config/web-publisher-config-local.yml index 32db955..3ce7d88 100644 --- a/scripts/config/web-publisher-config-local.yml +++ b/scripts/config/web-publisher-config-local.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # COLLECTION or GRANULE metadata_type: granule From aa0b9a9ce25f2d27928c83b9278e0597e1c1172a Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 14 May 2021 09:55:39 -0600 Subject: [PATCH 44/49] 1500-Changed the kafka config in the scripts for collection and granule _topic_produce to _topic_publish, as it is in the constructor for KafkaPublisher and KafkaConsumer. --- scripts/config/kafka-publisher-config-dev.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/config/kafka-publisher-config-dev.yml b/scripts/config/kafka-publisher-config-dev.yml index bd5af58..8a94bf3 100644 --- a/scripts/config/kafka-publisher-config-dev.yml +++ b/scripts/config/kafka-publisher-config-dev.yml @@ -6,8 +6,8 @@ metadata_type: GRANULE # Kafka config values brokers: onestop-dev-cp-kafka:9092 schema_registry: http://onestop-dev-cp-schema-registry:8081 -collection_topic_produce: psi-granules-by-collection -granule_topic_produce: psi-granule-parsed +collection_topic_publish: psi-granules-by-collection +granule_topic_publish: psi-granule-parsed collection_topic_consume: psi-collection-input-unknown granule_topic_consume: psi-granule-input-unknown group_id: sme-test From 3b14757e303159f62ea07cff5c9590c990d9033f Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 13:23:23 -0600 Subject: [PATCH 45/49] 1500-Changed exception message to first be a string then passed into exception. Otherwise wasn't evaluating the variable within message. --- onestop-python-client/test/integration/test_WebPublisher.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/test/integration/test_WebPublisher.py b/onestop-python-client/test/integration/test_WebPublisher.py index 04211dc..5c7935a 100644 --- a/onestop-python-client/test/integration/test_WebPublisher.py +++ b/onestop-python-client/test/integration/test_WebPublisher.py @@ -76,7 +76,8 @@ def setUpClass(cls): access_key = os.environ.get("ACCESS_KEY") access_secret = os.environ.get("SECRET_KEY") if registry_username == None: - raise Exception("REGISTRY_USERNAME not defined as env variable. Credentials file at '%s' doesn't exist." % cred_loc) + msg = "REGISTRY_USERNAME not defined as env variable. Credentials file at '" + cred_loc + "' doesn't exist." + raise Exception(msg) with open(conf_loc) as f: conf = yaml.load(f, Loader=yaml.FullLoader) From ebf71ee681da93ea00aad68d9e229c0d132f738a Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 13:40:41 -0600 Subject: [PATCH 46/49] 1500-Adjusted exception thrown in S3Utils.connect for invalid type, wasn't printing value of variable. Added test for that negative case. --- onestop-python-client/onestop/util/S3Utils.py | 2 +- onestop-python-client/test/unit/util/test_S3Utils.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index cbc8f24..d63e654 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -114,7 +114,7 @@ def connect(self, type, service_name, region): aws_secret_access_key=self.secret_key ) else: - raise Exception('Unknown boto3 type of %s'%type) + raise Exception('Unknown boto3 type of "%s"'%(type)) def objectkey_exists(self, bucket, s3_key): """ diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py index f6bdd91..91b90a3 100644 --- a/onestop-python-client/test/unit/util/test_S3Utils.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -55,6 +55,11 @@ def test_connect_resource(self): # No exception is called for unique method call resource.Queue(url='test') + @mock_sqs + def test_connect_exception_for_invalid_connection_type(self): + with self.assertRaises(Exception): + self.s3_utils.connect('junk', 'sqs', self.region) + @mock_s3 def test_get_uuid_metadata(self): boto_client = self.s3_utils.connect('resource', 's3', None) From 053df0599094d127715449a8031a872aaa9d9049 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 14:22:37 -0600 Subject: [PATCH 47/49] 1500-Fixed log but in SqsConsumer of microseconds process time being multiplied instead of divided to get seconds. --- onestop-python-client/onestop/util/SqsConsumer.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index 39356da..1972cc6 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -98,15 +98,12 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): else: self.logger.info("s3 event message without 'Records' content received.") - sqs_message.delete() - - self.logger.info("The SQS message has been deleted.") - dt_end = datetime.now(tz=timezone.utc) processing_time = dt_end - dt_start + self.logger.info("Completed processing the message in %s seconds."%(processing_time.microseconds / 1000000)) - self.logger.info("Completed processing message (s):" + str(processing_time.microseconds * 1000)) - + sqs_message.delete() + self.logger.info("The SQS message has been deleted.") except: self.logger.exception( "An exception was thrown while processing a message, but this program will continue. The " From 5c66efa79d9ec4ee79a97d178a515a912ee7c896 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 15:11:25 -0600 Subject: [PATCH 48/49] 2500-Added SqsHandlers create_upload_handler back with tests. Didn't realize was used, looked obsolete. --- .../onestop/util/SqsHandlers.py | 57 ++++++++- .../test/unit/test_SqsHandlers.py | 111 +++++++++++++++++- 2 files changed, 165 insertions(+), 3 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index ce0f010..894f8b5 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -11,7 +11,7 @@ def create_delete_handler(web_publisher): """ def delete(records, log_level='INFO'): - logger = ClientLogger.get_logger('SqsHandlers', log_level, False) + logger = ClientLogger.get_logger('SqsHandlers.create_delete_handler.delete', log_level, False) logger.info("In create_delete_handler.delete() handler") logger.debug("Records: %s"%records) @@ -36,9 +36,62 @@ def delete(records, log_level='INFO'): if len(response_json['data']) != 0: granule_uuid = response_json['data'][0]['id'] response = web_publisher.delete_registry('granule', granule_uuid) - print('delete_registry response: %s'%response) + logger.debug('web_publisher.delete_registry response: %s'%response) return response logger.warning("OneStop search response has no 'data' field. Response=%s"%response_json) return delete + +def create_upload_handler(web_publisher, s3_utils, s3_message_adapter): + """ + Creates a upload function handler to be used with SqsConsumer.receive_messages. + + The upload handler function checks the object for a UUID and if one is not found, it will create one for it. + + :param: web_publisher: WebPublisher object + :param: s3_utils: S3Utils object + :param: s3ma: S3MessageAdapter object + + """ + def upload(records, log_level='INFO'): + logger = ClientLogger.get_logger('SqsHandlers.create_upload_handler.upload', log_level, False) + logger.info("In create_upload_handler.upload() handler") + logger.debug("Records: %s"%records) + + rec = records[0] + s3_key = rec['s3']['object']['key'] + logger.info("Received message for " + s3_key) + logger.info("Event type: " + rec['eventName']) + bucket = rec['s3']['bucket']['name'] + logger.info("BUCKET: %s"%bucket) + s3_resource = s3_utils.connect("s3_resource", None) + + # Fetch the object to get the uuid + object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) + if object_uuid is not None: + logger.info("Retrieved object-uuid: %s"%object_uuid) + else: + logger.info("Adding uuid") + # Can't add uuid to glacier and should be copied over + if "backup" not in bucket: + object_uuid = s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) + + # Convert s3 message to IM message + json_payload = s3_message_adapter.transform(records) + logger.debug('transformed message, json_payload: %s'%json_payload) + + # Send the message to registry + payload = json_payload.serialize() + method = 'PATCH' # Backup location should be patched if not backup within bucket name + if "backup" not in bucket: + method = 'POST' + + logger.debug('web_publisher.publish_registry method using "%s" with payload %s'%(method,payload)) + registry_response = web_publisher.publish_registry("granule", object_uuid, payload, method) + logger.debug('web_publisher.publish_registry response=%s'%registry_response) + logger.debug('web_publisher.publish_registry response json=%s'%registry_response.json()) + + return registry_response + + return upload \ No newline at end of file diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py index b881fc9..c17b972 100644 --- a/onestop-python-client/test/unit/test_SqsHandlers.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -10,6 +10,7 @@ from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.util.SqsConsumer import SqsConsumer from onestop.util.SqsHandlers import create_delete_handler +from onestop.util.SqsHandlers import create_upload_handler class test_SqsHandler(unittest.TestCase): @@ -32,7 +33,7 @@ def setUp(self): self.wp = WebPublisher(**self.config_dict) self.s3_utils = S3Utils(**self.config_dict) - self.s3ma = S3MessageAdapter(**self.config_dict) + self.s3_message_adapter = S3MessageAdapter(**self.config_dict) self.sqs_consumer = SqsConsumer(**self.config_dict) self.sqs_max_polls = 3 @@ -215,5 +216,113 @@ def test_delete_handler_eventName_not_delete_ends_cb(self, mock_wp, mock_respons mock_wp.search_onestop.assert_not_called() mock_wp.delete_registry.assert_not_called() + @mock_sqs + @patch('onestop.WebPublisher') + @patch('onestop.util.S3Utils') + @patch('onestop.util.S3MessageAdapter') + def test_upload_handler_happy(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): + bucket = self.bucket + key = self.key + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, bucket, key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + records = json.loads(message['Message'])['Records'] + records_transformed = mock_s3_msg_adapter.transform(records) + cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify get uuid called + mock_s3_utils.get_uuid_metadata.assert_called_with( + mock_s3_utils.connect('s3_resource', None), + bucket, + key) + # Verify uuid not added + mock_s3_utils.add_uuid_metadata.assert_not_called() + # Verify transform called + mock_s3_msg_adapter.transform.assert_called_with(records) + # Verify publish called + mock_wp.publish_registry.assert_called_with( + 'granule', + mock_s3_utils.get_uuid_metadata(mock_s3_utils.connect('s3_resource', None), bucket, key), + records_transformed.serialize(), + 'POST' + ) + + @mock_sqs + @patch('onestop.WebPublisher') + @patch('onestop.util.S3Utils') + @patch('onestop.util.S3MessageAdapter') + def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): + bucket = self.bucket + key = self.key + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, bucket, key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_s3_utils.get_uuid_metadata.return_value = None + cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify add uuid called + mock_s3_utils.add_uuid_metadata.assert_called_with( + mock_s3_utils.connect('s3_resource', None), + bucket, + key) + + @mock_sqs + @patch('onestop.WebPublisher') + @patch('onestop.util.S3Utils') + @patch('onestop.util.S3MessageAdapter') + def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): + bucket = "testing_backup_bucket" + key = self.key + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, bucket, key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_s3_utils.get_uuid_metadata.return_value = None + records = json.loads(message['Message'])['Records'] + records_transformed = mock_s3_msg_adapter.transform(records) + cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify publish called + mock_wp.publish_registry.assert_called_with( + 'granule', + mock_s3_utils.get_uuid_metadata(mock_s3_utils.connect('s3_resource', None), bucket, key), + records_transformed.serialize(), + 'PATCH' + ) + if __name__ == '__main__': unittest.main() \ No newline at end of file From d4b2013c3f84125e0941be6605cdfb03c95944d2 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 20:49:13 -0600 Subject: [PATCH 49/49] 1500-Changed references to psi_registry_url to registry_base_url --- kubernetes/pyconsumer-pod.yaml | 2 +- scripts/config/csb-data-stream-config.yml | 2 +- serverless/conf.py | 2 +- serverless/lambda_function.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kubernetes/pyconsumer-pod.yaml b/kubernetes/pyconsumer-pod.yaml index fed2258..6943403 100644 --- a/kubernetes/pyconsumer-pod.yaml +++ b/kubernetes/pyconsumer-pod.yaml @@ -72,7 +72,7 @@ data: headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 - psi_registry_url: https://cedardevs.org/ + registry_base_url: https://cedardevs.org/ access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_identifier_prefix: "gov.noaa.ncei.csb:" diff --git a/scripts/config/csb-data-stream-config.yml b/scripts/config/csb-data-stream-config.yml index 06a45b6..2d25328 100644 --- a/scripts/config/csb-data-stream-config.yml +++ b/scripts/config/csb-data-stream-config.yml @@ -2,7 +2,7 @@ format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 -psi_registry_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com +registry_base_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_id_prefix: "gov.noaa.ncei.csb:" diff --git a/serverless/conf.py b/serverless/conf.py index b41eb0b..26ef3cd 100644 --- a/serverless/conf.py +++ b/serverless/conf.py @@ -3,6 +3,6 @@ HEADERS = 'UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER' TYPE = 'COLLECTION' COLLECTION_ID = 'fdb56230-87f4-49f2-ab83-104cfd073177' -PSI_REGISTRY_URL = 'http://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com' +REGISTRY_BASE_URL = 'http://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com' ACCESS_BUCKET = 'https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com' FILE_IDENTIFIER_PREFIX = 'gov.noaa.ncei.csb:' diff --git a/serverless/lambda_function.py b/serverless/lambda_function.py index abe8fb7..3b6cd97 100644 --- a/serverless/lambda_function.py +++ b/serverless/lambda_function.py @@ -9,7 +9,7 @@ def lambda_handler(event, context): - registry_url = conf.PSI_REGISTRY_URL + "/metadata/granule" + registry_url = conf.REGISTRY_BASE_URL + "/metadata/granule" for rec in event['Records']: