From 4e12e1dd793df67b88fef38a24a84d5e0f12cd04 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 13 Apr 2021 13:21:42 -0600 Subject: [PATCH 001/129] Changed S3Utils, S3MessageAdapter classes' constructors(adjusted documentation) to take dictionary with extra parameters allowed as well as methods within this class not to reference config but the variable that was set. Adjusted effected tests. --- .../config/aws-util-config-dev.yml | 2 + .../onestop/util/S3MessageAdapter.py | 110 ++++++------- onestop-python-client/onestop/util/S3Utils.py | 113 +++++++------ .../tests/SqsHandlersTest.py | 40 +++-- .../tests/util/S3MessageAdapterTest.py | 41 ++++- .../tests/util/S3UtilsTest.py | 151 +++++++++--------- scripts/launch_e2e.py | 45 ++++-- scripts/launch_pyconsumer.py | 12 +- 8 files changed, 291 insertions(+), 223 deletions(-) diff --git a/onestop-python-client/config/aws-util-config-dev.yml b/onestop-python-client/config/aws-util-config-dev.yml index ee1ad95..c30683e 100644 --- a/onestop-python-client/config/aws-util-config-dev.yml +++ b/onestop-python-client/config/aws-util-config-dev.yml @@ -3,9 +3,11 @@ log_level: INFO # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs +sqs_name: 'foobar' sqs_max_polls: 2 s3_region: "us-east-2" s3_bucket: archive-testing-demo +s3_key: 'ABI-L1b-RadF/2019/298/15/OR_ABI-L1b-RadF-M6C15_G16_s20192981500369_e20192981510082_c20192981510166.nc' #AWS config values for 2nd vault in different region vault_name: archive-vault-new diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index d640b77..1dda78c 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -1,10 +1,4 @@ -import yaml from onestop.util.ClientLogger import ClientLogger -""" -from onestop.info.ImMessage import ImMessage -from onestop.info.FileMessage import FileMessage -from onestop.info.Link import Link -""" from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord, Publishing, ErrorEvent from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location import FileLocation,FileLocationType @@ -14,81 +8,67 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.discovery import Discovery, Link - class S3MessageAdapter: """ A class used to extract information from sqs messages that have been triggered by s3 events and transform it into correct format for publishing to IM Registry Attributes ---------- - conf: yaml file - csb-data-stream-config.yml - s3_utils: S3Utils object - used to access objects inside of s3 buckets - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated - prefix_mapping: Dict - contains mapping of various line offices and their associated collection id + access_bucket: str + Cloud bucket to put in the links field when transformed. + type: str + COLLECTION or GRANULE + file_id_prefix: str + File prefix returned as fileIdentifier + collection_id: str + Collection this data belongs to. Returned as parent identifier. + log_level: str + The log level to use for this class (Defaults to 'INFO') - Methods - ------- - collection_id_map(s3_key) - given an s3 key that contains one of the NESDIS line offices in its path, it will provide the corresponding collection id - - transform(recs) - transforms sqs message triggered by s3 event to correct format for publishing to IM registry - """ - def __init__(self, conf_loc, s3_utils): - """ - - :param conf_loc: yaml file - csb-data-stream-config.yml - :param s3_utils: S3Utils object - used to access objects inside of s3 buckets - - Other Attributes - ---------------- logger: ClientLogger object utilizes python logger library and creates logging for our specific needs logger.info: ClientLogger object logging statement that occurs when the class is instantiated - prefix_mapping: Dict - contains mapping of various line offices and their associated collection id - - """ - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False) - self.logger.info("Initializing " + self.__class__.__name__) - self.s3_utils = s3_utils - self.prefix_mapping = self.conf['prefixMap'] - - def collection_id_map(self, s3_key): + Methods + ------- + transform(recs) + transforms sqs message triggered by s3 event to correct format for publishing to IM registry + """ + def __init__(self, access_bucket, type, file_id_prefix, collection_id, log_level = 'INFO', **wildargs): """ - Given an s3 key that contains one of the NESDIS line offices in its path, it will provide the corresponding collection id + Parameters + ---------- + access_bucket: str + access bucket to put in the links field when transformed. + type: str + COLLECTION or GRANULE + file_id_prefix: str + File prefix returned as fileIdentifier + collection_id: str + Collection this data belongs to. Returned as parent identifier. + log_level: str + Log level for when logging in class. - :param s3_key: str - key path of object in s3 bucket - - :return: str - associated line office collection id """ - # Looks through our prefix map and returns appropriate collection id - for key in self.prefix_mapping: - if key in s3_key: - return self.prefix_mapping[key] + self.access_bucket = access_bucket + self.type = type + self.file_id_prefix = file_id_prefix + self.collection_id = collection_id + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) + self.logger.info("Initializing " + self.__class__.__name__) + if wildargs: + self.logger.error("There were extra constructor arguments: " + str(wildargs)) def transform(self, recs): """ Transforms sqs message triggered by s3 event to correct format for publishing to IM registry - :param recs: dict - sqs event message + Parameters: + ---------- + recs: dict + sqs event message to transform :return: ParsedRecord Object The Parsed Record class is an avro schema generated class @@ -111,8 +91,8 @@ def transform(self, recs): fileInformation = FileInformation(name=file_name, size=file_size, checksums=[checkSum], optionalAttributes={}) # Relationship - relationshipType = RelationshipType(type=self.conf['type']) - relationship = Relationship(id=self.conf['collection_id'], type=relationshipType) + relationshipType = RelationshipType(type=self.type) + relationship = Relationship(id=self.collection_id, type=relationshipType) # File Location fileLocationType = FileLocationType(type='ARCHIVE') @@ -127,12 +107,12 @@ def transform(self, recs): publishing = Publishing(isPrivate=True) # Discovery - access_obj_uri = self.conf['access_bucket'] + "/" + s3_key + access_obj_uri = self.access_bucket + "/" + s3_key link1 = Link(linkName="Amazon S3", linkUrl=access_obj_uri, linkProtocol="HTTPS", linkFunction="download") link2 = Link(linkName="Amazon S3", linkUrl=s3_obj_uri, linkProtocol="Amazon:AWS:S3", linkFunction="download") # To Change? Come back to this later - parent_identifier = self.conf['collection_id'] - file_identifier = self.conf['file_identifier_prefix'] + file_name[:-4] + parent_identifier = self.collection_id + file_identifier = self.file_id_prefix + file_name[:-4] # Initializing most fields to their default values in the avro schema so that it doesn't cause an error in Kafka discovery = Discovery(links=[link1, link2], title=file_name, parentIdentifier=parent_identifier, diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index 7bb0fbe..60fb876 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -1,5 +1,5 @@ import logging -import yaml + import uuid import boto3 import botocore @@ -15,69 +15,70 @@ class S3Utils: Attributes ---------- - conf: yaml file - aws-util-config-dev.yml - cred: yaml file - credentials.yml - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated + access_key: str + Cloud access key + + secret_key: str + Cloud secret key + + log_level: str + The log level to use for this class (Defaults to 'INFO') + + logger: ClientLogger object + Creates logging for us to log to. Methods ------- - connect(client_type, region) - connects to a boto3 client + connect(client_type, region) + connects to a boto3 client - objectkey_exists(bucket, s3_key) - checks to see if a s3 key path exists in a particular bucket + objectkey_exists(bucket, s3_key) + checks to see if a s3 key path exists in a particular bucket - get_uuid_metadata(boto_client, bucket, s3_key) - returns metadata uuid of an s3 object if it has one, otherwise prints that one does not exist + get_uuid_metadata(boto_client, bucket, s3_key) + returns metadata uuid of an s3 object if it has one, otherwise prints that one does not exist - add_uuid_metadata(boto_client, bucket, s3_key) - adds metadata uuid to an s3 object + add_uuid_metadata(boto_client, bucket, s3_key) + adds metadata uuid to an s3 object - upload_s3(boto_client, local_file, bucket, s3_key, overwrite) - uploads a file to s3 bucket + upload_s3(boto_client, local_file, bucket, s3_key, overwrite) + uploads a file to s3 bucket - get_csv_s3(boto_client, bucket, key) - gets a csv file from s3 bucket using smart open library + get_csv_s3(boto_client, bucket, key) + gets a csv file from s3 bucket using smart open library - read_bytes_s3(boto_client, bucket, key) - returns raw information of s3 object + read_bytes_s3(boto_client, bucket, key) + returns raw information of s3 object - upload_archive(boto_client, vault_name, src_data) - Add an archive to an Amazon S3 Glacier vault. The upload occurs synchronously. + upload_archive(boto_client, vault_name, src_data) + Add an archive to an Amazon S3 Glacier vault. The upload occurs synchronously. - s3_to_glacier(boto_client, bucket_name, key) - Changes storage class of s3 object from s3 -> glacier. Utilizes s3 client type + s3_to_glacier(boto_client, bucket_name, key) + Changes storage class of s3 object from s3 -> glacier. Utilizes s3 client type - s3_to_glacier_object_lock(boto_client, bucket_name, key, object_lock_mode, object_lock_retention) - Changes storage class of s3 object from s3 -> glacier and places it in object lock mode. Utilizes s3 client type + s3_to_glacier_object_lock(boto_client, bucket_name, key, object_lock_mode, object_lock_retention) + Changes storage class of s3 object from s3 -> glacier and places it in object lock mode. Utilizes s3 client type - s3_restore(boto_client, bucket_name, key, days) - Restores an object in S3 glacier back to S3 for specified amount of days + s3_restore(boto_client, bucket_name, key, days) + Restores an object in S3 glacier back to S3 for specified amount of days - retrieve_inventory(boto_client, vault_name) - Initiate an Amazon Glacier inventory-retrieval job + retrieve_inventory(boto_client, vault_name) + Initiate an Amazon Glacier inventory-retrieval job - retrieve_inventory_results(vault_name, boto_client, job_id) - Retrieve the results of an Amazon Glacier inventory-retrieval job + retrieve_inventory_results(vault_name, boto_client, job_id) + Retrieve the results of an Amazon Glacier inventory-retrieval job """ conf = None - def __init__(self, conf_loc, cred_loc): - - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - with open(cred_loc) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False) + def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): + self.access_key = access_key + self.secret_key = secret_key + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) + if wildargs: + self.logger.error("There were extra constructor arguments: " + str(wildargs)) + def connect(self, client_type, region): """ Connects to a boto3 client @@ -92,21 +93,29 @@ def connect(self, client_type, region): """ if client_type == "s3": - boto = boto3.client("s3", aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key'], region_name=region) + boto = boto3.client( + "s3", + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, + region_name=region) if client_type == "s3_resource": - boto = boto3.resource("s3", region_name=region, aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key'] ) + boto = boto3.resource( + "s3", + region_name=region, + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key) if client_type == "glacier": - boto = boto3.client("glacier", region_name=region, aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key']) + boto = boto3.client( + "glacier", + region_name=region,aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key) if client_type == "session": boto = boto3.Session( - aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key'], + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, ) return boto diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/tests/SqsHandlersTest.py index 12323ef..bbe4210 100644 --- a/onestop-python-client/tests/SqsHandlersTest.py +++ b/onestop-python-client/tests/SqsHandlersTest.py @@ -1,7 +1,7 @@ import json import unittest import boto3 - +import yaml from moto import mock_s3 from moto import mock_sqs from tests.utils import abspath_from_relative, create_delete_message @@ -54,9 +54,22 @@ class SqsHandlerTest(unittest.TestCase): def setUp(self): print("Set it up!") + + with open(abspath_from_relative(__file__, "../config/csb-data-stream-config-template.yml")) as f: + self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../config/aws-util-config-dev.yml")) as f: + self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../config/credentials-template.yml")) as f: + self.cred = yaml.load(f, Loader=yaml.FullLoader) + self.wp = WebPublisher(self.wp_config, self.cred_config) - self.su = S3Utils(self.aws_config, self.cred_config) - self.s3ma = S3MessageAdapter(self.csb_config, self.su) + self.su = S3Utils(self.cred['sandbox']['access_key'], + self.cred['sandbox']['secret_key'], + "DEBUG") + self.s3ma = S3MessageAdapter(self.stream_conf['access_bucket'], + self.stream_conf['type'], + self.stream_conf['file_identifier_prefix'], + self.stream_conf['collection_id']) def tearDown(self): print("Tear it down!") @@ -64,19 +77,21 @@ def tearDown(self): @mock_s3 @mock_sqs def init_s3(self): - bucket = self.su.conf['s3_bucket'] - key = self.su.conf['s3_key'] + bucket = self.cloud_conf['s3_bucket'] + key = self.cloud_conf['s3_key'] boto_client = self.su.connect("s3", None) boto_client.create_bucket(Bucket=bucket) boto_client.put_object(Bucket=bucket, Key=key, Body="foobar") - sqs_client = boto3.client('sqs', region_name=self.su.conf['s3_region']) - sqs_queue = sqs_client.create_queue(QueueName=self.su.conf['sqs_name']) + sqs_client = boto3.client('sqs', region_name=self.cloud_conf['s3_region']) + sqs_queue = sqs_client.create_queue(QueueName=self.cloud_conf['sqs_name']) self.sqs = SqsConsumer(self.aws_config, self.cred_config) - message = create_delete_message(self.su.conf['s3_region'], bucket, key) + message = create_delete_message(self.cloud_conf['s3_region'], bucket, key) sqs_client.send_message(QueueUrl=sqs_queue['QueueUrl'], MessageBody=json.dumps(message)) - return sqs_queue['QueueUrl'] + sqs_queue['QueueUrl'] + @mock_s3 + @mock_sqs def delete_handler_wrapper(self, recs): handler = create_delete_handler(self.wp) result = handler(recs) @@ -85,5 +100,8 @@ def delete_handler_wrapper(self, recs): @mock_sqs def test_delete_handler(self): mock_queue_url = self.init_s3() - sqs_queue = boto3.resource('sqs', region_name=self.su.conf['s3_region']).Queue(mock_queue_url) - self.sqs.receive_messages(sqs_queue, self.su.conf['sqs_max_polls'], self.delete_handler_wrapper) + sqs_queue = boto3.resource('sqs', region_name=self.stream_conf['s3_region']).Queue(mock_queue_url) + self.sqs.receive_messages(sqs_queue, self.stream_conf['sqs_max_polls'], self.delete_handler_wrapper) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/tests/util/S3MessageAdapterTest.py index 41a8f9d..a960737 100644 --- a/onestop-python-client/tests/util/S3MessageAdapterTest.py +++ b/onestop-python-client/tests/util/S3MessageAdapterTest.py @@ -1,4 +1,6 @@ import unittest +import yaml + from moto import mock_s3 from tests.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils @@ -51,22 +53,35 @@ class S3MessageAdapterTest(unittest.TestCase): def setUp(self): print("Set it up!") - self.s3_utils = S3Utils(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"), - abspath_from_relative(__file__, "../../config/credentials-template.yml")) - self.s3ma = S3MessageAdapter(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml"), - self.s3_utils) + + with open(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml")) as f: + self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml")) as f: + self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../../config/credentials-template.yml")) as f: + self.cred = yaml.load(f, Loader=yaml.FullLoader) + + self.s3_utils = S3Utils(self.cred['sandbox']['access_key'], + self.cred['sandbox']['secret_key'], + "DEBUG") + self.s3ma = S3MessageAdapter(self.stream_conf['access_bucket'], + self.stream_conf['type'], + self.stream_conf['file_identifier_prefix'], + self.stream_conf['collection_id']) + + self.region = self.cloud_conf['s3_region'] + self.bucket = self.cloud_conf['s3_bucket'] def tearDown(self): print("Tear it down!") def test_parse_config(self): - self.assertFalse(self.s3ma.conf['collection_id']==None) - + self.assertFalse(self.stream_conf['collection_id'] == None) @mock_s3 def test_transform(self): - s3 = self.s3_utils.connect('s3', self.s3_utils.conf['s3_region']) - location = {'LocationConstraint': self.s3_utils.conf['s3_region']} + s3 = self.s3_utils.connect('s3', self.region) + location = {'LocationConstraint': self.region} bucket = 'nesdis-ncei-csb-dev' key = 'csv/file1.csv' key2 = 'csv/file2.csv' @@ -81,4 +96,14 @@ def test_transform(self): print(payload) self.assertTrue(payload!=None) + @mock_s3 + def test_extra_parameters_constructor(self): + testParams = {"access_bucket": "blah1", + "type": "blah2", + "file_id_prefix": "blah3", + "collection_id": "blah4", + "extra": "extra value"} + self.assertRaises(Exception, S3MessageAdapter(**testParams)) +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py index 34850ad..acb0af4 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/tests/util/S3UtilsTest.py @@ -1,126 +1,130 @@ import csv import unittest import uuid +import yaml + from moto import mock_s3 from moto import mock_glacier - from tests.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils class S3UtilsTest(unittest.TestCase): - su = None def setUp(self): print("Set it up!") - self.su = S3Utils(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"), - abspath_from_relative(__file__, "../../config/credentials.yml")) - def tearDown(self): - print("Tear it down!") - # Remove files from bucket + with open(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml")) as f: + self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml")) as f: + self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(abspath_from_relative(__file__, "../../config/credentials-template.yml")) as f: + self.cred = yaml.load(f, Loader=yaml.FullLoader) - def test_parse_config(self): - self.assertFalse(self.su.conf['sqs_url']==None) + self.s3_utils = S3Utils(self.cred['sandbox']['access_key'], + self.cred['sandbox']['secret_key'], + "DEBUG") + + self.region = self.cloud_conf['s3_region'] + self.region2 = self.region + self.bucket = self.cloud_conf['s3_bucket'] @mock_s3 def test_get_uuid_metadata(self): - boto_client = self.su.connect("s3_resource", None) + boto_client = self.s3_utils.connect("s3_resource", None) s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - region = self.su.conf['s3_region'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) + + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) obj_uuid = str(uuid.uuid4()) - boto_client.Object(bucket, s3_key).put(Bucket=bucket, Key=s3_key, Body="my_body", Metadata={'object-uuid': obj_uuid}) + boto_client.Object(self.bucket, s3_key).put(Bucket=self.bucket, Key=s3_key, Body="my_body", Metadata={'object-uuid': obj_uuid}) - self.assertFalse(self.su.get_uuid_metadata(boto_client, bucket, s3_key) == None) + self.assertFalse(self.s3_utils.get_uuid_metadata(boto_client, self.bucket, s3_key) == None) @mock_s3 def test_add_uuid_metadata(self): - region = self.su.conf['s3_region'] - boto_client = self.su.connect("s3_resource", region) + boto_client = self.s3_utils.connect("s3_resource", self.region) s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - boto_client.Object(bucket, s3_key).put(Bucket=bucket, Key=s3_key, Body="my_body") + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + boto_client.Object(self.bucket, s3_key).put(Bucket=self.bucket, Key=s3_key, Body="my_body") - self.assertTrue(self.su.add_uuid_metadata(boto_client, bucket, s3_key)) + self.assertTrue(self.s3_utils.add_uuid_metadata(boto_client, self.bucket, s3_key)) @mock_s3 def test_add_file_s3(self): - boto_client = self.su.connect("s3", None) + boto_client = self.s3_utils.connect("s3", None) local_file = abspath_from_relative(__file__, "../data/file4.csv") s3_key = "csv/file4.csv" - bucket = self.su.conf['s3_bucket'] - region = self.su.conf['s3_region'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) overwrite = True - self.assertTrue(self.su.upload_s3(boto_client, local_file, bucket, s3_key, overwrite)) + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_key, overwrite)) + @mock_s3 def test_get_csv_s3(self): - boto_client = self.su.connect("session", None) + boto_session = self.s3_utils.connect("session", None) + s3 = self.s3_utils.connect('s3', self.cloud_conf['s3_region']) + location = {'LocationConstraint': self.region} s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - sm_open_file = self.su.get_csv_s3(boto_client, bucket, s3_key) + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.put_object(Bucket=self.bucket, Key=s3_key, Body="body") + + sm_open_file = self.s3_utils.get_csv_s3(boto_session, self.bucket, s3_key) # print("reading csv:" + line.decode('utf-8')) csv_reader = csv.DictReader(sm_open_file) for row in csv_reader: print(str(row["LON"])) + @mock_s3 def test_read_bytes_s3(self): - boto_client = self.su.connect("s3", None) + boto_client = self.s3_utils.connect("s3", None) s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - self.assertTrue(self.su.read_bytes_s3(boto_client, bucket, s3_key)) + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) + boto_client.put_object(Bucket=self.bucket, Key=s3_key, Body="body") + + self.assertTrue(self.s3_utils.read_bytes_s3(boto_client, self.bucket, s3_key)) @mock_s3 def test_add_files(self): - boto_client = self.su.connect("s3", None) + boto_client = self.s3_utils.connect("s3", None) local_files = ["file1_s3.csv", "file2.csv", "file3.csv"] - bucket = self.su.conf['s3_bucket'] - region = self.su.conf['s3_region'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) overwrite = True - s3_file = None + for file in local_files: local_file = abspath_from_relative(__file__, "../data/" + file) s3_file = "csv/" + file - self.assertTrue(self.su.upload_s3(boto_client, local_file, bucket, s3_file, overwrite)) + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_file, overwrite)) @mock_s3 @mock_glacier def test_s3_cross_region(self): print('Cross Region Vault Upload ------------- ') key = "csv/file1.csv" - # grabs te region and bucket name from the config file - region = self.su.conf['s3_region'] - bucket = self.su.conf['s3_bucket'] # makes connection to low level s3 client - s3 = self.su.connect('s3', region) - location = {'LocationConstraint': region} - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.put_object(Bucket=bucket, Key=key, Body="body") + s3 = self.s3_utils.connect('s3', self.region) + location = {'LocationConstraint': self.region} + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.put_object(Bucket=self.bucket, Key=key, Body="body") # Reads object data and stores it into a variable - file_data = self.su.read_bytes_s3(s3, bucket, key) + file_data = self.s3_utils.read_bytes_s3(s3, self.bucket, key) # Redirecting upload to vault in second region - glacier = self.su.connect("glacier", self.su.conf['s3_region2']) - vault_name = self.su.conf['vault_name'] + glacier = self.s3_utils.connect("glacier", self.region2) + vault_name = self.cloud_conf['vault_name'] glacier.create_vault(vaultName=vault_name) print('vault name: ' + str(vault_name)) - print('region name: ' + str(self.su.conf['s3_region2'])) + print('region name: ' + str(self.region2)) print('-------file data---------') print(file_data) - response = self.su.upload_archive(glacier, vault_name, file_data) + response = self.s3_utils.upload_archive(glacier, vault_name, file_data) self.assertTrue(response['archiveId']!=None) @@ -134,18 +138,15 @@ def test_s3_to_glacier(self): print("S3 to Glacier---------") key = "csv/file1_s3.csv" - # grabs te region and bucket name from the config file - region = self.su.conf['s3_region'] - bucket = self.su.conf['s3_bucket'] # Create boto3 low level api connection - s3 = self.su.connect('s3', region) - location = {'LocationConstraint': region} - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.put_object(Bucket=bucket, Key=key, Body="body") + s3 = self.s3_utils.connect('s3', self.region) + location = {'LocationConstraint': self.region} + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.put_object(Bucket=self.bucket, Key=key, Body="body") # Using the S3 util class invoke the change of storage class - response = self.su.s3_to_glacier(s3, bucket, key) + response = self.s3_utils.s3_to_glacier(s3, self.bucket, key) print(response['ResponseMetadata']['HTTPHeaders']['x-amz-storage-class']) # Assert 'x-amz-storage-class': 'GLACIER' @@ -157,18 +158,16 @@ def test_s3_restore(self): Uses high level api to restore object from glacier to s3 """ - region = self.su.conf['s3_region2'] - bucket = self.su.conf['s3_bucket'] key = "csv/file1_s3.csv" days = 3 # use high level api - s3 = self.su.connect('s3_resource', region) - location = {'LocationConstraint': region} - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.Object(bucket, key).put(Bucket=bucket, Key=key, Body="body") + s3 = self.s3_utils.connect('s3_resource', self.region2) + location = {'LocationConstraint': self.region2} + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.Object(self.bucket, key).put(Bucket=self.bucket, Key=key, Body="body") - self.assertTrue(self.su.s3_restore(s3, bucket, key, days) != None) + self.assertTrue(self.s3_utils.s3_restore(s3, self.bucket, key, days) != None) @mock_glacier def test_retrieve_inventory(self): @@ -178,12 +177,12 @@ def test_retrieve_inventory(self): # Using glacier api initiates job and returns archive results # Connect to your glacier vault for retrieval - glacier = self.su.connect("glacier", self.su.conf['s3_region2']) - vault_name = self.su.conf['vault_name'] + glacier = self.s3_utils.connect("glacier", self.region2) + vault_name = self.cloud_conf['vault_name'] glacier.create_vault(vaultName=vault_name) - response = self.su.retrieve_inventory(glacier, vault_name) + response = self.s3_utils.retrieve_inventory(glacier, vault_name) self.assertTrue(response['jobId']!= None) ''' @@ -203,7 +202,13 @@ def test_retrieve_inventory_results(self, jobid): self.assertTrue(inventory != None) ''' - + @mock_s3 + def test_extra_parameters_constructor(self): + testParams = {"access_key": "blah", + "secret_key": "blah", + "log_level": "DEBUG", + "extra": "extra value"} + self.assertRaises(Exception, S3Utils(**testParams)) if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/scripts/launch_e2e.py b/scripts/launch_e2e.py index 2d5b79b..6d60b2c 100644 --- a/scripts/launch_e2e.py +++ b/scripts/launch_e2e.py @@ -1,6 +1,8 @@ import argparse import json import os +import yaml + from onestop.util.SqsConsumer import SqsConsumer from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter @@ -55,8 +57,8 @@ def handler(recs): # Upload to archive file_data = s3_utils.read_bytes_s3(s3_client, bucket, s3_key) - glacier = s3_utils.connect("glacier", s3_utils.conf['s3_region']) - vault_name = s3_utils.conf['vault_name'] + glacier = s3_utils.connect("glacier", cloud_conf['s3_region']) + vault_name = cloud_conf['vault_name'] resp_dict = s3_utils.upload_archive(glacier, vault_name, file_data) @@ -106,9 +108,9 @@ def handler(recs): # High-level api s3_resource = s3_utils.connect("s3_resource", None) - bucket = s3_utils.conf['s3_bucket'] + bucket = cloud_conf['s3_bucket'] overwrite = True - sqs_max_polls = s3_utils.conf['sqs_max_polls'] + sqs_max_polls = cloud_conf['sqs_max_polls'] # Add 3 files to bucket local_files = ["file1.csv", "file4.csv"] s3_file = None @@ -141,18 +143,35 @@ def handler(recs): # Get configuration file path locations conf_loc = args.pop('conf') cred_loc = args.pop('cred') + stream_conf_loc = args.pop('cred') - # Upload a test file to s3 bucket - s3_utils = S3Utils(conf_loc, cred_loc) + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), cred_loc))) as f: + cred = yaml.load(f, Loader=yaml.FullLoader) + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), conf_loc))) as f: + cloud_conf = yaml.load(f, Loader=yaml.FullLoader) + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), stream_conf_loc))) as f: + stream_conf = yaml.load(f, Loader=yaml.FullLoader) - # Low-level api ? Can we just use high level revisit me! - s3_client = s3_utils.connect("s3", None) + s3_utils = S3Utils(cred['sandbox']['access_key'], + cred['sandbox']['secret_key'], + "DEBUG") - bucket = s3_utils.conf['s3_bucket'] + bucket = cloud_conf['s3_bucket'] + sqs_max_polls = cloud_conf['sqs_max_polls'] - sqs_max_polls = s3_utils.conf['sqs_max_polls'] + #Source + access_bucket = stream_conf['access_bucket'] - # Add 3 files to bucket + #Onestop related + file_id_prefix = stream_conf['file_identifier_prefix'] + file_format = stream_conf['format'] + headers = stream_conf['headers'] + type = stream_conf['type'] + + # Low-level api ? Can we just use high level revisit me! + s3_client = s3_utils.connect("s3", None) + + # Upload test files to s3 bucket local_files = ["file1.csv", "file4.csv"] s3_file = None for file in local_files: @@ -162,9 +181,11 @@ def handler(recs): if not s3_utils.upload_s3(s3_client, local_file, bucket, s3_file, True): exit("Error setting up for e2e: The test files were not uploaded to the s3 bucket therefore the tests cannot continue.") + + # Receive s3 message and MVM from SQS queue sqs_consumer = SqsConsumer(conf_loc, cred_loc) - s3ma = S3MessageAdapter("config/csb-data-stream-config.yml", s3_utils) + s3ma = S3MessageAdapter(access_bucket, headers, type, file_id_prefix, "DEBUG") wp = WebPublisher("config/web-publisher-config-dev.yml", cred_loc) queue = sqs_consumer.connect() diff --git a/scripts/launch_pyconsumer.py b/scripts/launch_pyconsumer.py index f9dbcf6..7850f38 100644 --- a/scripts/launch_pyconsumer.py +++ b/scripts/launch_pyconsumer.py @@ -1,4 +1,6 @@ import os +import yaml + from onestop.util.SqsConsumer import SqsConsumer from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter @@ -49,6 +51,10 @@ def handler(recs): if __name__ == '__main__': conf_loc = "/etc/config/config.yml" cred_loc = "creds.yml" + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "creds.yml"))) as f: + cred = yaml.load(f, Loader=yaml.FullLoader) + with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "/etc/config/config.yml"))) as f: + conf = yaml.load(f, Loader=yaml.FullLoader) registry_user = os.environ.get("REGISTRY_USERNAME") registry_pwd = os.environ.get("REGISTRY_PASSWORD") @@ -71,8 +77,10 @@ def handler(recs): r = open(cred_loc, "r") # # Receive s3 message and MVM from SQS queue - s3_utils = S3Utils(conf_loc, cred_loc) - sqs_max_polls = s3_utils.conf['sqs_max_polls'] + s3_utils = S3Utils(cred['sandbox']['access_key'], + cred['sandbox']['secret_key'], + "DEBUG") + sqs_max_polls = conf['sqs_max_polls'] sqs_consumer = SqsConsumer(conf_loc, cred_loc) queue = sqs_consumer.connect() From 47d9d335752ac6169849547d941e75958f94ddc7 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 15 Apr 2021 17:05:13 -0600 Subject: [PATCH 002/129] 1500-WebPublisher adjusted some documentation wording and added test_WebPublisher_unit as unit test until create folder structure for integration vs unit tests. --- onestop-python-client/onestop/WebPublisher.py | 28 ++-- .../tests/test_WebPublisher_unit.py | 145 ++++++++++++++++++ 2 files changed, 159 insertions(+), 14 deletions(-) create mode 100644 onestop-python-client/tests/test_WebPublisher_unit.py diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index 55ca06c..d944f8f 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -8,28 +8,28 @@ class WebPublisher: Attributes ---------- registry_base_url: str - url for registry endpoint + URL for registry endpoint registry_username: str - username for posting metadata to registry + Registry username where credentials needed registry_password: str - password for posting metadata to registry + Registry password where credentials needed onestop_base_url: str - url for onestop endpoint + URL for OneStop endpoint logger.info: str logging level Methods ------- publish_registry(metadata_type, uuid, payload, method) - Publish to registry with either POST,PUT, OR PATCH methods + Publish an item to registry with either POST, PUT, OR PATCH methods delete_registry(metadata_type, uuid) - Deletes item from registry + Delete an item from registry search_registry(metadata_type, uuid) - Searches for an item in registry given its metadata type and uuid + Search for an item in registry given its metadata type and uuid search_onestop(metadata_type, payload) - Acquires the item, collection or granule, from OneStop + Search for an item in OneStop given its metadata type and payload search criteria get_granules_onestop(self, uuid) - Acquires granules from OneStop given the uuid + Search for a granule in OneStop given its uuid """ conf = None @@ -84,12 +84,12 @@ def publish_registry(self, metadata_type, uuid, payload, method): def delete_registry(self, metadata_type, uuid): """ - Deletes item from registry + Delete an item from registry :param metadata_type: str metadata type (GRANULE/COLLECTION) :param uuid: str - uuid you want to publish with + uuid you want to delete :return: str response message indicating if delete was successful @@ -105,7 +105,7 @@ def delete_registry(self, metadata_type, uuid): def search_registry(self, metadata_type, uuid): """ - Searches for an item in registry given its metadata type and uuid + Search for an item in registry given its metadata type and uuid :param metadata_type: str metadata type (GRANULE/COLLECTION) @@ -126,7 +126,7 @@ def search_registry(self, metadata_type, uuid): def search_onestop(self, metadata_type, payload): """ - Searches for an item in OneStop given its metadata type and payload search criteria. + Search for an item in OneStop given its metadata type and payload search criteria. :param metadata_type: str metadata type (GRANULE/COLLECTION) @@ -147,7 +147,7 @@ def search_onestop(self, metadata_type, payload): def get_granules_onestop(self, uuid): """ - Searches for a granule in OneStop given its uuid + Search for a granule in OneStop given its uuid :param uuid: str uuid you want search for diff --git a/onestop-python-client/tests/test_WebPublisher_unit.py b/onestop-python-client/tests/test_WebPublisher_unit.py new file mode 100644 index 0000000..3e987fb --- /dev/null +++ b/onestop-python-client/tests/test_WebPublisher_unit.py @@ -0,0 +1,145 @@ +import json +import unittest + +from unittest.mock import ANY +from unittest import mock +from moto import mock_s3 +from onestop.WebPublisher import WebPublisher + +class WebPublisherTest(unittest.TestCase): + username="admin" + password="a_password" + uuid = "9f0a5ff2-fcc0-5bcb-a225-024b669c9bba" + registry_base_url = "https://localhost/onestop/api/registry" + registry_full_url_granule = registry_base_url + "/metadata/granule/" + uuid + registry_full_url_collection = registry_base_url + "/metadata/collection/" + uuid + onestop_base_url = "https://localhost/onestop/api/search" + + payloadDict = { + "fileInformation": { + "name": "file2.csv", + "size": 1385, + "checksums": [{ + "algorithm": "MD5", + "value": "44d2452e8bc2c8013e9c673086fbab7a" + }] + }, + "relationships": [ + {"type": "COLLECTION", + "id": "fdb56230-87f4-49f2-ab83-104cfd073177" + } + ], + "fileLocations": { + "nesdis-ncei-csb-dev/csv/file2.csv": { + "uri": "https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com/csv/file2.csv", + "type": "ACCESS", + "restricted": False, + "serviceType": "HTTPS", + "asynchronous": False + } + }, + "discovery": { + "title": "file2.csv", + "parentIdentifier": "fdb56230-87f4-49f2-ab83-104cfd073177", + "fileIdentifier": "gov.noaa.ncei.csb:file2" + } + } + + addlocDict = { + "fileLocations": { + "Crt3a-Hq2SGUp8n8QSRNpFIf59kmMONqaKlJ_7-Igd8ijMM62deLdtVkiYwlaePbC4JNCsfeg5i-DWDmwxLIx9V-OGgiQp_CZ0rEFXIZxM_ZPyGu7TTv8wwos5SvAI6xDURhzoCH-w": { + "uri": "/282856304593/vaults/noaa-nesdis-ncei-vault-test/archives/Crt3a-Hq2SGUp8n8QSRNpFIf59kmMONqaKlJ_7-Igd8ijMM62deLdtVkiYwlaePbC4JNCsfeg5i-DWDmwxLIx9V-OGgiQp_CZ0rEFXIZxM_ZPyGu7TTv8wwos5SvAI6xDURhzoCH-w", + "type": "ACCESS", + "restricted": True, + "serviceType": "Amazon:AWS:Glacier", + "asynchronous": True + } + } + } + + + def setUp(self): + print("Set it up!") + + self.wp = WebPublisher(self.registry_base_url, + self.username, + self.password, + self.onestop_base_url, + 'DEBUG') + + def tearDown(self): + print("Tear it down!") + + def mocked_requests_patch(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + print ("args: "+str(args)+" kwargs: "+str(kwargs)) + + return MockResponse({"key1":"value1"}, 200) + + @mock_s3 + @mock.patch('requests.post', side_effect=mocked_requests_patch) + def test_publish(self, mock_get): + payload = json.dumps(self.payloadDict) + self.wp.publish_registry("granule", self.uuid, payload, "POST") + + mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.put', side_effect=mocked_requests_patch) + def test_publish(self, mock_get): + payload = json.dumps(self.payloadDict) + self.wp.publish_registry("granule", self.uuid, payload, "PUT") + + mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.patch', side_effect=mocked_requests_patch) + def test_add_glacier_location(self, mock_get): + payload = json.dumps(self.addlocDict) + self.wp.publish_registry("granule", self.uuid, payload, "PATCH") + + mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.delete', side_effect=mocked_requests_patch) + def test_delete_registry_granule(self, mock_get): + self.wp.delete_registry("granule", self.uuid) + + mock_get.assert_called_with(url = self.registry_full_url_granule, headers = ANY, auth = ANY, verify = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.delete', side_effect=mocked_requests_patch) + def test_delete_registry_collection(self, mock_get): + self.wp.delete_registry("collection", self.uuid) + + mock_get.assert_called_with(url = self.registry_full_url_collection, headers = ANY, auth = ANY, verify = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 85a9096d5415bd606934c9d00c7a69b0722f764d Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 15 Apr 2021 17:17:05 -0600 Subject: [PATCH 003/129] 1500-Adjusted documentation indentation in WebPublisher --- onestop-python-client/onestop/WebPublisher.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index d944f8f..75ee99f 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -7,29 +7,29 @@ class WebPublisher: Attributes ---------- - registry_base_url: str - URL for registry endpoint - registry_username: str - Registry username where credentials needed - registry_password: str - Registry password where credentials needed - onestop_base_url: str - URL for OneStop endpoint - logger.info: str - logging level + registry_base_url: str + URL for registry endpoint + registry_username: str + Registry username where credentials needed + registry_password: str + Registry password where credentials needed + onestop_base_url: str + URL for OneStop endpoint + logger.info: str + logging level Methods ------- - publish_registry(metadata_type, uuid, payload, method) - Publish an item to registry with either POST, PUT, OR PATCH methods - delete_registry(metadata_type, uuid) - Delete an item from registry - search_registry(metadata_type, uuid) - Search for an item in registry given its metadata type and uuid - search_onestop(metadata_type, payload) - Search for an item in OneStop given its metadata type and payload search criteria - get_granules_onestop(self, uuid) - Search for a granule in OneStop given its uuid + publish_registry(metadata_type, uuid, payload, method) + Publish an item to registry with either POST, PUT, OR PATCH methods + delete_registry(metadata_type, uuid) + Delete an item from registry + search_registry(metadata_type, uuid) + Search for an item in registry given its metadata type and uuid + search_onestop(metadata_type, payload) + Search for an item in OneStop given its metadata type and payload search criteria + get_granules_onestop(self, uuid) + Search for a granule in OneStop given its uuid """ conf = None From 85ada2290c305218bbe93cae1cccb3e7c622b7e2 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 16 Apr 2021 15:52:18 -0600 Subject: [PATCH 004/129] 1500-Changed CsbExtractor class constructor(adjusted documentation) to take dictionary with extra parameters allowed as well as methods within this class not to reference config but the variable that was set. Adjusted effected tests. --- .../onestop/extract/CsbExtractor.py | 127 ++++++------------ .../tests/extractor/CsbExtractorTest.py | 85 +++++++----- 2 files changed, 98 insertions(+), 114 deletions(-) diff --git a/onestop-python-client/onestop/extract/CsbExtractor.py b/onestop-python-client/onestop/extract/CsbExtractor.py index e79cddc..b1006cb 100644 --- a/onestop-python-client/onestop/extract/CsbExtractor.py +++ b/onestop-python-client/onestop/extract/CsbExtractor.py @@ -2,61 +2,33 @@ from datetime import datetime class CsbExtractor: + """ A class used to extract geospatial data from csv files in an s3 bucket - Attributes - ---------- - su : S3 Utils object - an instance of the s3 utils class used to connect to the corresponding s3 bucket to get access to the csv file for extraction - boto_client: boto3 client - specific boto3 client type (s3, s3_resource, glacier, session) used to access aws resources - bucket: str - the name of the s3 bucket in which you want to access - key: str - the name of key path for the specific item you want to access in the bucket - - Methods ------- is_csv(file_name) - checks to see if the given file is of type csv + Verifies a file name ends with '.csv' get_spatial_temporal_bounds(lon_column_name, lat_column_name, date_column_name) - extracts min/max longitude and latitude values as well as beginning and ending dates from specified csv file + Gets the spacial bounding box for the open file. This seeks to the start of the file at start and the end. extract_coords(max_lon, max_lat, min_lon, min_lat) - extracts specific coordinates corresponding to min/max longitude and latitude values given from get_spatial_temporal_bounds(....) method + Given the max/min lon and lat, the function will parse the csv file to extract the coordinates within the given bounding box. """ - def __init__(self, su, key): - """ - :param su: S3 Utils object - an instance of the s3 utils class used to connect to the corresponding s3 bucket to get access to the csv file for extraction - :param key: str - the name of key path for the specific item you want to access in the bucket - - Other Attributes - ________________ - boto_client: boto3 client - specific boto3 client type (s3, s3_resource, glacier, session) used to access aws resources - bucket: str - the name of the s3 bucket in which you want to access + @staticmethod + def is_csv(file_name): """ - self.su = su - boto_client = self.su.connect("session", None) - bucket = self.su.conf['s3_bucket'] - self.key = key - - def is_csv(self, file_name): - """ - Checks to see if the given file is of type csv + Verifies a file name ends with '.csv' :param file_name: str - the name of the file in the s3 bucket i.e. file1.csv + File name with extension on the end. - :return: boolean - True if the file name contains .csv and False otherwise + :return: str + True if ends with csv + False if doesn't end with csv """ csv_str = '.csv' if file_name.endswith(csv_str): @@ -64,28 +36,22 @@ def is_csv(self, file_name): return False - # def smart_open_read(self, key): - # boto_client = self.su.connect("session", None) - # bucket = self.su.conf['s3_bucket'] - # self.su.read_csv_s3(boto_client, bucket, key) - - - def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_column_name): + @staticmethod + def get_spatial_temporal_bounds(sm_open_file, lon_column_name, lat_column_name, date_column_name): """ - Extracts min/max longitude and latitude values as well as beginning and ending dates from specified csv file + Gets the spacial bounding box for the open file. This seeks to the start of the file at start and the end. + :param sm_open_file: file-like object + A file-like object that is open, say from smart_open's sm_open. :param lon_column_name: str - name of longitude column in the csv file + Longitude column name :param lat_column_name: str - name of the latitude column in the csv file + Latitude column name :param date_column_name: str - name of the date column in the csv file + Date column name :return: dict - Key : Value - geospatial (str) -> List[float] containing min/max longitude and latitude values - temporal (str) -> List[str] containing beginning and end dates - + geospatial and temporal fields of the bounding box for given constraints. """ lon_min_val = None lon_max_val = None @@ -99,9 +65,7 @@ def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_col # variable to be returned in string format begin_date_str = '' - boto_client = self.su.connect("session", None) - bucket = self.su.conf['s3_bucket'] - sm_open_file = self.su.get_csv_s3(boto_client, bucket, self.key) + sm_open_file.seek(0) csv_reader = csv.DictReader(sm_open_file) for row in csv_reader: @@ -151,43 +115,40 @@ def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_col "temporal": [begin_date_str, end_date_str] } + sm_open_file.seek(0) return geospatial_temporal_bounds - - def extract_coords(self, max_lon, max_lat, min_lon, min_lat): + @staticmethod + def extract_coords(sm_open_file, max_lon, max_lat, min_lon, min_lat): """ - Extracts specific coordinates corresponding to min/max longitude and latitude values given from get_spatial_temporal_bounds(....) method - - :param max_lon: float - maximum longitude value - :param max_lat: float - maximum latitude value - :param min_lon: float - minimum longitude value - :param min_lat: float - minimum latitude value - - :return: List[ List[Float] ] - Returns a list of lists. Each list contains floats (longitude and latitude ) value pairs corresponding to - one of the min/max latitude and longitude values that were extracted previously from get_spatial_temporal_bounds (...) + Given the max/min lon and lat, the function will parse the csv file to extract the coordinates within the given bounding box. + + :param sm_open_file: file-like object + A file-like object that is open, say from smart_open's sm_open. + :param max_lon: str + Maximum longitude + :param max_lat: str + Maximum latitude + :param min_lon: str + Minimum longitude + :param min_lat: str + Minimum latitude + + :return: list + List of the the coordinates (no duplicates) within the file that are within the given bounding box. """ - # Keeps track of all coordinates that needs to be added to json payload coords = [] - boto_client = self.su.connect("session", None) - bucket = self.su.conf['s3_bucket'] - sm_open_file = self.su.get_csv_s3(boto_client, bucket, self.key) + sm_open_file.seek(0) csv_reader = csv.DictReader(sm_open_file) - for row in csv_reader: - if float( row['LAT'] ) == min_lat or float( row['LAT'] ) == max_lat or float( - row['LON'] ) == min_lon or float( row['LON'] ) == max_lon: + if float( row['LAT'] ) == min_lat or float( row['LAT'] ) == max_lat or \ + float( row['LON'] ) == min_lon or float( row['LON'] ) == max_lon: coord = [float( row['LON'] ), float( row['LAT'] )] - - # check to see if that coordinate has already been appended to the list that is keeping track of our coordinates + # if this coordinate has already been appended to the list to return (no duplicates) if coord not in coords: coords.append( coord ) + sm_open_file.seek(0) return coords - diff --git a/onestop-python-client/tests/extractor/CsbExtractorTest.py b/onestop-python-client/tests/extractor/CsbExtractorTest.py index 7dbbc9e..72bdbcc 100644 --- a/onestop-python-client/tests/extractor/CsbExtractorTest.py +++ b/onestop-python-client/tests/extractor/CsbExtractorTest.py @@ -1,35 +1,53 @@ import unittest +import os + +from moto import mock_s3 from onestop.extract.CsbExtractor import CsbExtractor from onestop.util.S3Utils import S3Utils -from tests.utils import abspath_from_relative - class CsbExtractorTest(unittest.TestCase): - # def setUp(self): - # print("Set it up!") - # file_name = '../data/file4.csv' - # self.csb_extractor = CsbExtractor(file_name) - def setUp(self): print("Set it up!") - key = "public/NESDIS/CSB/file4.csv" - self.su = S3Utils( abspath_from_relative( __file__, "../../config/aws-util-config-dev.yml" ), - abspath_from_relative(__file__, "../../config/credentials.yml") ) - self.csb_extractor = CsbExtractor(self.su, key) + self.root_proj_path = os.getcwd() + self.assertIsNotNone(self.root_proj_path) + self.key = "tests/data/file4.csv" + # Use open instead of our methodfor simplicity and reliability, plus not testing our code here. + self.file_obj = open(self.root_proj_path + '/' + self.key) + + config_dict = { + "access_key": "test_access_key", + "secret_key": "test_secret_key", + "log_level": "DEBUG" + } + + self.s3_utils = S3Utils(**config_dict) + self.bucket = "bucket" + self.region = "region" def tearDown(self): print("Tear it down!") + self.file_obj.close() def test_is_csv(self): - csv_str = '.csv' - self.assertTrue(self.csb_extractor.is_csv(self.csb_extractor.file_name)) + self.assertTrue(CsbExtractor.is_csv("test/blah/file.csv"), "Failed to determine a csv file name was a csv file.") + def test_is_not_csv(self): + self.assertFalse(CsbExtractor.is_csv("test/blah/file.txt"), "Failed to determine a csv file name was not a csv file.") - def test_get_geospatial_temporal_bounds(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') + @mock_s3 + def test_csb_SME_user_path(self): + # Setup bucket and file to read + s3 = self.s3_utils.connect('s3', self.region) + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) + self.s3_utils.upload_s3(s3, self.root_proj_path + '/' + self.key, self.bucket, self.key, True) + self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key)) + + # This is how we would expect an external user to get the file. + sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect("session", None), self.bucket, self.key) + + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME') coords = bounds_dict["geospatial"] - print(str(coords)) self.assertEqual(coords[0], -96.847995) self.assertEqual(coords[1], 29.373065) self.assertEqual(coords[2], -92.747995) @@ -39,38 +57,43 @@ def test_get_geospatial_temporal_bounds(self): self.assertEqual(date_rng[0], '2018-04-10T14:00:06.000Z' ) self.assertEqual(date_rng[1], '2020-04-10T14:00:06.000Z' ) + def test_get_geospatial_temporal_bounds(self): + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + + coords = bounds_dict["geospatial"] + self.assertEqual(coords[0], -96.847995) + self.assertEqual(coords[1], 29.373065) + self.assertEqual(coords[2], -92.747995) + self.assertEqual(coords[3], 33.373065) + + date_rng = bounds_dict["temporal"] + self.assertEqual(date_rng[0], '2018-04-10T14:00:06.000Z' ) + self.assertEqual(date_rng[1], '2020-04-10T14:00:06.000Z' ) def test_get_min_lon(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + coords = bounds_dict["geospatial"] min_lon = coords[0] self.assertEqual(min_lon, -96.847995) - def test_get_max_datetime(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') + + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + date_rng = bounds_dict["temporal"] end_date = date_rng[1] self.assertEqual(end_date, '2020-04-10T14:00:06.000Z') - def test_get_min_datetime(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + date_rng = bounds_dict["temporal"] begin_date = date_rng[0] self.assertEqual(begin_date, '2018-04-10T14:00:06.000Z') - def test_extract_coords(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') - coords = bounds_dict["geospatial"] - - min_lon = coords[0] - min_lat = coords[1] - max_lon = coords[2] - max_lat = coords[3] - - coords = self.csb_extractor.extract_coords(max_lon, max_lat, min_lon, min_lat) + coords = CsbExtractor.extract_coords(self.file_obj, -92.747995, 33.373065, -96.847995, 29.373065) result = [[ -94.847995, 29.373065 From 15cfaa33a1e1a3fe3b0e8360de01f5483f5bec7d Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 19 Apr 2021 13:28:40 -0600 Subject: [PATCH 005/129] 1500-Added unit tests for WebPublisher. Made sure using autospec=True --- onestop-python-client/tests/test_WebPublisher_unit.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/onestop-python-client/tests/test_WebPublisher_unit.py b/onestop-python-client/tests/test_WebPublisher_unit.py index 3e987fb..4a97f80 100644 --- a/onestop-python-client/tests/test_WebPublisher_unit.py +++ b/onestop-python-client/tests/test_WebPublisher_unit.py @@ -84,7 +84,7 @@ def json(self): return MockResponse({"key1":"value1"}, 200) @mock_s3 - @mock.patch('requests.post', side_effect=mocked_requests_patch) + @mock.patch('requests.post', side_effect=mocked_requests_patch, autospec=True) def test_publish(self, mock_get): payload = json.dumps(self.payloadDict) self.wp.publish_registry("granule", self.uuid, payload, "POST") @@ -96,7 +96,7 @@ def test_publish(self, mock_get): mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) @mock_s3 - @mock.patch('requests.put', side_effect=mocked_requests_patch) + @mock.patch('requests.put', side_effect=mocked_requests_patch, autospec=True) def test_publish(self, mock_get): payload = json.dumps(self.payloadDict) self.wp.publish_registry("granule", self.uuid, payload, "PUT") @@ -108,7 +108,7 @@ def test_publish(self, mock_get): mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) @mock_s3 - @mock.patch('requests.patch', side_effect=mocked_requests_patch) + @mock.patch('requests.patch', side_effect=mocked_requests_patch, autospec=True) def test_add_glacier_location(self, mock_get): payload = json.dumps(self.addlocDict) self.wp.publish_registry("granule", self.uuid, payload, "PATCH") @@ -120,7 +120,7 @@ def test_add_glacier_location(self, mock_get): mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) @mock_s3 - @mock.patch('requests.delete', side_effect=mocked_requests_patch) + @mock.patch('requests.delete', side_effect=mocked_requests_patch, autospec=True) def test_delete_registry_granule(self, mock_get): self.wp.delete_registry("granule", self.uuid) @@ -131,7 +131,7 @@ def test_delete_registry_granule(self, mock_get): mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) @mock_s3 - @mock.patch('requests.delete', side_effect=mocked_requests_patch) + @mock.patch('requests.delete', side_effect=mocked_requests_patch, autospec=True) def test_delete_registry_collection(self, mock_get): self.wp.delete_registry("collection", self.uuid) From 1e629ab31e343e52c3fc93f839b26dc566e710a0 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 10:08:53 -0600 Subject: [PATCH 006/129] 1500-Changed KafkaConsumer class constructors(adjusted documentation) to take dictionary with extra parameters allowed as well as methods within this class not to reference config but the variable that was set. Adjusted effected tests. Removed get_logger method as it wasn't used and we used a different logger then. Added checks for if security wasn't enabled. --- .../onestop/KafkaConsumer.py | 182 ++++++------ .../tests/KafkaConsumerTest.py | 264 ++++++++++++++++++ scripts/sme/smeFunc.py | 2 +- 3 files changed, 349 insertions(+), 99 deletions(-) create mode 100644 onestop-python-client/tests/KafkaConsumerTest.py diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index e45d6cc..a3d1e95 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -1,11 +1,9 @@ -import logging -import yaml - from confluent_kafka.schema_registry import SchemaRegistryClient from confluent_kafka.error import KafkaError from confluent_kafka import DeserializingConsumer from confluent_kafka.schema_registry.avro import AvroDeserializer from confluent_kafka.serialization import StringDeserializer +from onestop.util.ClientLogger import ClientLogger class KafkaConsumer: """ @@ -13,109 +11,97 @@ class KafkaConsumer: Attributes ---------- - conf: yaml file - kafka-publisher-config-dev.yml - logger: Logger object - utilizes python logger library and creates logging for our specific needs - logger.info: Logger object - logging statement that occurs when the class is instantiated - metadata_type: str - type of metadata (COLLECTION or GRANULE) - brokers: str - brokers (kubernetes service) - group_id: str - Client group id string. All clients sharing the same group.id belong to the same group - auto_offset_reset: str - Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) - schema_registry: str - schema registry (kubernetes service) - security: boolean - defines if security is in place - collection_topic: str - collection topic you want to consume - granule_topic: str - granule topic you want to consume + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + group_id: str + Client group id string. All clients sharing the same group.id belong to the same group + auto_offset_reset: str + Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) + schema_registry: str + schema registry (kubernetes service) + security_enabled: boolean + Whether to use security for the kafka schema registry client. + security_caLoc: str + Kafka schema registry certification authority (CA) file location. + security_keyLoc: str + Kafka schema registry client's private key file location. + security_certLoc: str + Kafka schema registry client's public key file location. + collection_topic_consume: str + collection topic you want to consume + granule_topic_consume: str + granule topic you want to consume + logger: Logger object + utilizes python logger library and creates logging for our specific needs Methods ------- - get_logger(log_name, create_file) - creates logger file - - register_client() - registers to schema registry client based on configs + register_client() + registers to schema registry client based on configs - create_consumer(registry_client) - subscribes to topic defined in configs and creates a consumer to deserialize messages from topic + connect() + utilizes register_client() and create_consumer(registry_client) to connect to schema registry and allow for consumption of topics - connect() - utilizes register_client() and create_consumer(registry_client) to connect to schema registry and allow for consumption of topics + create_consumer(registry_client) + subscribes to topic defined in configs and creates a consumer to deserialize messages from topic - consume(metadata_consumer, handler) - asynchronously polls for messages in the connected topic, results vary depending on the handler function that is passed into it + consume(metadata_consumer, handler) + asynchronously polls for messages in the connected topic, results vary depending on the handler function that is passed into it """ - conf = None - - def __init__(self, conf_loc): - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = self.get_logger(self.__class__.__name__, False) - self.logger.info("Initializing " + self.__class__.__name__) - self.metadata_type = self.conf['metadata_type'] - self.brokers = self.conf['brokers'] - self.group_id = self.conf['group_id'] - self.auto_offset_reset = self.conf['auto_offset_reset'] - self.schema_registry = self.conf['schema_registry'] - self.security = self.conf['security']['enabled'] - - self.collection_topic = self.conf['collection_topic_consume'] - self.granule_topic = self.conf['granule_topic_consume'] - if self.metadata_type not in ['COLLECTION', 'GRANULE']: - raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - - def get_logger(self, log_name, create_file): + def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_registry, security, collection_topic_consume, granule_topic_consume, log_level = 'INFO', **wildargs): """ - Utilizes python logger library and creates logging - - :param log_name: str - name of log to be created - :param create_file: boolean - defines whether of not you want a logger file to be created - - :return: Logger object + Attributes + ---------- + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + group_id: str + Client group id string. All clients sharing the same group.id belong to the same group + auto_offset_reset: str + Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) + schema_registry: str + schema registry (kubernetes service) URL + security: dict + enabled boolean: Whether to use security for kafka schema registry client. + caLoc str: Kafka schema registry certification authority (CA) file location. + keyLoc str: Kafka schema registry client's private key file location. + certLoc str: Kafka schema registry client's public key file location. + + collection_topic_consume: str + collection topic you want to consume + granule_topic_consume: str + granule topic you want to consume + log_level: str + What log level to use for this class """ - # create logger - log = logging.getLogger() - - # create formatter and add it to the handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + self.metadata_type = metadata_type + self.brokers = brokers + self.group_id = group_id + self.auto_offset_reset = auto_offset_reset + self.schema_registry = schema_registry + self.security_enabled = security['enabled'] - if self.conf['log_level'] == "DEBUG": - log.setLevel(level=logging.DEBUG) - else: - if self.conf['log_level'] == "INFO": - log.setLevel(level=logging.INFO) - else: - log.setLevel(level=logging.ERROR) + if self.security_enabled: + self.security_caLoc = security['caLoc'] + self.security_keyLoc = security['keyLoc'] + self.security_certLoc = security['certLoc'] - fh = None - if create_file: - # create file handler for logger. - fh = logging.FileHandler(log_name) - fh.setFormatter(formatter) + self.collection_topic_consume = collection_topic_consume + self.granule_topic_consume = granule_topic_consume - # create console handler for logger. - ch = logging.StreamHandler() - ch.setFormatter(formatter) + if self.metadata_type not in ['COLLECTION', 'GRANULE']: + raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - # add handlers to logger. - if create_file: - log.addHandler(fh) + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) + self.logger.info("Initializing " + self.__class__.__name__) - log.addHandler(ch) - return log + if wildargs: + self.logger.error("There were extra constructor arguments: " + str(wildargs)) def register_client(self): """ @@ -125,10 +111,10 @@ def register_client(self): """ reg_conf = {'url': self.schema_registry} - if self.security: - reg_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - reg_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - reg_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + if self.security_enabled: + reg_conf['ssl.ca.location'] = self.security_caLoc + reg_conf['ssl.key.location'] = self.security_keyLoc + reg_conf['ssl.certificate.location'] = self.security_certLoc registry_client = SchemaRegistryClient(reg_conf) return registry_client @@ -166,11 +152,11 @@ def create_consumer(self, registry_client): consumer_conf = {'bootstrap.servers': self.brokers} - if self.security: + if self.security_enabled: consumer_conf['security.protocol'] = 'SSL' - consumer_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - consumer_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - consumer_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + consumer_conf['ssl.ca.location'] = self.security_caLoc + consumer_conf['ssl.key.location'] = self.security_keyLoc + consumer_conf['ssl.certificate.location'] = self.security_certLoc meta_consumer_conf = consumer_conf meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8') diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/tests/KafkaConsumerTest.py new file mode 100644 index 0000000..e7c3f08 --- /dev/null +++ b/onestop-python-client/tests/KafkaConsumerTest.py @@ -0,0 +1,264 @@ +import unittest + +from unittest.mock import ANY, patch, MagicMock, call +from onestop.KafkaConsumer import KafkaConsumer +from confluent_kafka.schema_registry import SchemaRegistryClient +from confluent_kafka.serialization import StringDeserializer + +class KafkaConsumerTest(unittest.TestCase): + kp = None + conf_w_security = None + conf_wo_security = None + + @classmethod + def setUp(cls): + print("Set it up!") + cls.conf_w_security = { + "metadata_type" : "GRANULE", + "brokers" : "onestop-dev-cp-kafka:9092", + "group_id" : "sme-test", + "auto_offset_reset" : "earliest", + "schema_registry" : "http://onestop-dev-cp-schema-registry:8081", + "security" : { + "enabled" : True, + "caLoc" : "/etc/pki/tls/cert.pem", + "keyLoc" : "/etc/pki/tls/private/kafka-user.key", + "certLoc" : "/etc/pki/tls/certs/kafka-user.crt" + }, + "collection_topic_consume" : "psi-collection-input-unknown", + "granule_topic_consume" : "psi-granule-input-unknown", + "log_level" : "DEBUG" + } + cls.conf_wo_security = dict(cls.conf_w_security) + # Remove security credential section. + cls.conf_wo_security['security'] = { + "enabled":False + } + + @classmethod + def tearDown(self): + print("Tear it down!") + + def test_init_happy_nonconditional_params(self): + consumer = KafkaConsumer(**self.conf_w_security) + + self.assertEqual(consumer.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(consumer.brokers, self.conf_w_security['brokers']) + self.assertEqual(consumer.group_id, self.conf_w_security['group_id']) + self.assertEqual(consumer.auto_offset_reset, self.conf_w_security['auto_offset_reset']) + self.assertEqual(consumer.schema_registry, self.conf_w_security['schema_registry']) + self.assertEqual(consumer.security_enabled, self.conf_w_security['security']['enabled']) + self.assertEqual(consumer.collection_topic_consume, self.conf_w_security['collection_topic_consume']) + self.assertEqual(consumer.granule_topic_consume, self.conf_w_security['granule_topic_consume']) + + def test_init_security_enabled(self): + consumer = KafkaConsumer(**self.conf_w_security) + + self.assertEqual(consumer.security_caLoc, self.conf_w_security['security']['caLoc']) + self.assertEqual(consumer.security_keyLoc, self.conf_w_security['security']['keyLoc']) + self.assertEqual(consumer.security_certLoc, self.conf_w_security['security']['certLoc']) + + def test_init_security_disabled(self): + consumer = KafkaConsumer(**self.conf_wo_security) + + self.assertRaises(AttributeError, getattr, consumer, "security_caLoc") + self.assertRaises(AttributeError, getattr, consumer, "security_keyLoc") + self.assertRaises(AttributeError, getattr, consumer, "security_certLoc") + + def test_init_metadata_type_valid(self): + consumer = KafkaConsumer(**self.conf_w_security) + + self.assertEqual(consumer.metadata_type, self.conf_w_security['metadata_type']) + + def test_init_metadata_type_invalid(self): + wrong_metadata_type_config = dict(self.conf_w_security) + wrong_metadata_type_config['metadata_type'] = "invalid_type" + + self.assertRaises(ValueError, KafkaConsumer, **wrong_metadata_type_config) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_w_security(self, mock_client): + schema_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] + } + mock_client.return_value = None + + consumer = KafkaConsumer(**self.conf_w_security) + consumer.register_client() + + mock_client.assert_called() + mock_client.assert_called_with(ANY, schema_conf) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_wo_security(self, mock_client): + schema_conf = { + 'url' : self.conf_wo_security['schema_registry'] + } + mock_client.return_value = None + + consumer = KafkaConsumer(**self.conf_wo_security) + consumer.register_client() + + mock_client.assert_called_with(ANY, schema_conf) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_w_security_collection = dict(self.conf_w_security) + conf_w_security_collection['metadata_type'] = "COLLECTION" + + consumer = KafkaConsumer(**conf_w_security_collection) + reg_client = consumer.register_client() + reg_client.get_latest_version = MagicMock() + consumer.create_consumer(reg_client) + + # Verify AvroDeserializer called with expected registry client + mock_avro_deserializer.assert_called_with(ANY, reg_client) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_collection_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_w_security_collection = dict(self.conf_w_security) + topic = conf_w_security_collection['collection_topic_consume'] + conf_w_security_collection['metadata_type'] = 'COLLECTION' + + consumer = KafkaConsumer(**conf_w_security_collection) + reg_client = MagicMock() + consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into DeserializingConsumer + mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_w_security_collection['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], + 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_collection['group_id'], + 'auto.offset.reset': conf_w_security_collection['auto_offset_reset'] + }) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_collection_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_wo_security_collection = dict(self.conf_wo_security) + topic = conf_wo_security_collection['collection_topic_consume'] + conf_wo_security_collection['metadata_type'] = 'COLLECTION' + + consumer = KafkaConsumer(**conf_wo_security_collection) + reg_client = MagicMock() + consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify no security passed into DeserializingConsumer + mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_wo_security_collection['brokers'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_wo_security_collection['group_id'], + 'auto.offset.reset': conf_wo_security_collection['auto_offset_reset'] + }) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_w_security_granule = dict(self.conf_w_security) + topic = conf_w_security_granule['granule_topic_consume'] + conf_w_security_granule['metadata_type'] = 'GRANULE' + + consumer = KafkaConsumer(**conf_w_security_granule) + reg_client = MagicMock() + consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into DeserializingConsumer + mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_w_security_granule['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], + 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_granule['group_id'], + 'auto.offset.reset': conf_w_security_granule['auto_offset_reset'] + }) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_wo_security_granule = dict(self.conf_wo_security) + topic = conf_wo_security_granule['granule_topic_consume'] + conf_wo_security_granule['metadata_type'] = 'GRANULE' + + # Verify security taken into consideration + meta_consumer_conf = {'bootstrap.servers': conf_wo_security_granule['brokers'], + 'key.deserializer': StringDeserializer('utf-8'), + 'value.deserializer': mock_avro_deserializer, + 'group.id': conf_wo_security_granule['group_id'], + 'auto.offset.reset': conf_wo_security_granule['auto_offset_reset'] + } + + consumer = KafkaConsumer(**conf_wo_security_granule) + reg_client = MagicMock() + consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify no security passed into DeserializingConsumer called with expected configuration + meta_consumer_conf['key.deserializer'] = ANY + meta_consumer_conf['value.deserializer'] = ANY + mock_deserializing_consumer.assert_called_with(meta_consumer_conf) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + def test_connect(self): + mock_client = MagicMock() + + consumer = KafkaConsumer(**self.conf_w_security) + consumer.register_client = MagicMock(return_value=mock_client) + consumer.create_consumer = MagicMock(return_value=MagicMock(mock_client)) + consumer.connect() + + consumer.register_client.assert_called_once() + consumer.create_consumer.assert_called_with(mock_client) + + @patch('confluent_kafka.cimpl.Message') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_consume(self, mock_metadata_consumer, mock_message): + mock_message_key = 'key1' + mock_message_value = 'value1' + consumer = KafkaConsumer(**self.conf_w_security) + consumer.register_client = MagicMock(return_value=MagicMock()) + mock_message.key.return_value = mock_message_key + mock_message.value.return_value = mock_message_value + mock_metadata_consumer.poll.side_effect = [None, mock_message, Exception] + mock_handler = MagicMock() + + # Would have liked not having the try/catch but it wasn't ignoring the exception. Just need to not fail due to end of loop. + try: + self.assertRaises(Exception, consumer.consume(mock_metadata_consumer, mock_handler)) + except Exception as e: + print("Ignoring exception: {}".format(e)) + + # Verify kafka consumer poll called expected number of times + self.assertTrue(mock_metadata_consumer.poll.call_count == 3) + mock_metadata_consumer.poll.assert_has_calls([call(10), call(10), call(10)]) + + # Verify callback function was called once with expected message attributes + mock_handler.assert_called_once() + mock_handler.assert_called_with(mock_message_key, mock_message_value) + + if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/scripts/sme/smeFunc.py b/scripts/sme/smeFunc.py index 2e11d51..084e15b 100644 --- a/scripts/sme/smeFunc.py +++ b/scripts/sme/smeFunc.py @@ -27,7 +27,7 @@ def handler(key,value): if __name__ == '__main__': kafka_consumer = KafkaConsumer("scripts/config/kafka-publisher-config-dev.yml") - kafka_consumer.granule_topic = 'psi-granule-parsed' + kafka_consumer.granule_topic_consume = 'psi-granule-parsed' metadata_consumer = kafka_consumer.connect() kafka_consumer.consume(metadata_consumer, lambda k, v: handler(k, v)) """ From b63bfe868d48e3e574e3a5f0b75b1d6dff5897ef Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 10:11:07 -0600 Subject: [PATCH 007/129] 1500-Adjusted KafkaConsumer create_consumer to not do duplicate code, using a topic variable instead. Removed changing name of the consumer_conf to meta_consumer_conf. Added additional logging. --- .../onestop/KafkaConsumer.py | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index a3d1e95..54744cb 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -108,7 +108,7 @@ def register_client(self): Registers to schema registry client based on configs :return: SchemaRegistryClient (confluent kafka library) - """ + """ reg_conf = {'url': self.schema_registry} if self.security_enabled: @@ -116,6 +116,7 @@ def register_client(self): reg_conf['ssl.key.location'] = self.security_keyLoc reg_conf['ssl.certificate.location'] = self.security_certLoc + self.logger.info("Creating SchemaRegistryClient with configuration:"+str(reg_conf)) registry_client = SchemaRegistryClient(reg_conf) return registry_client @@ -138,18 +139,21 @@ def create_consumer(self, registry_client): :return: DeserializingConsumer object """ - metadata_schema = None topic = None if self.metadata_type == "COLLECTION": - metadata_schema = registry_client.get_latest_version(self.collection_topic + '-value').schema.schema_str - topic = self.collection_topic + topic = self.collection_topic_consume if self.metadata_type == "GRANULE": - metadata_schema = registry_client.get_latest_version(self.granule_topic + '-value').schema.schema_str - topic = self.granule_topic + topic = self.granule_topic_consume - metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) + self.logger.debug("topic: "+str(topic)) + + # This topic naming scheme is how OneStop creates the topics. + latest_schema = registry_client.get_latest_version(topic + '-value') + metadata_schema = latest_schema.schema.schema_str + self.logger.debug("metadata_schema: "+metadata_schema) + metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) consumer_conf = {'bootstrap.servers': self.brokers} if self.security_enabled: @@ -158,13 +162,14 @@ def create_consumer(self, registry_client): consumer_conf['ssl.key.location'] = self.security_keyLoc consumer_conf['ssl.certificate.location'] = self.security_certLoc - meta_consumer_conf = consumer_conf - meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8') - meta_consumer_conf['value.deserializer'] = metadata_deserializer - meta_consumer_conf['group.id'] = self.group_id - meta_consumer_conf['auto.offset.reset'] = self.auto_offset_reset + consumer_conf['key.deserializer'] = StringDeserializer('utf-8') + consumer_conf['value.deserializer'] = metadata_deserializer + consumer_conf['group.id'] = self.group_id + consumer_conf['auto.offset.reset'] = self.auto_offset_reset - metadata_consumer = DeserializingConsumer(meta_consumer_conf) + self.logger.debug("meta_consumer_conf: "+str(consumer_conf)) + metadata_consumer = DeserializingConsumer(consumer_conf) + self.logger.debug("topic: "+str(topic)) metadata_consumer.subscribe([topic]) return metadata_consumer @@ -183,15 +188,16 @@ def consume(self, metadata_consumer, handler): while True: try: msg = metadata_consumer.poll(10) + self.logger.debug("Message received: "+str(msg)) if msg is None: - print('No Messages') + self.logger.info('No Messages') continue + self.logger.debug("Message key="+str(msg.key())+" value="+str(msg.value())) key = msg.key() value = msg.value() - except KafkaError: raise try: @@ -199,4 +205,5 @@ def consume(self, metadata_consumer, handler): except Exception as e: self.logger.error("Message handler failed: {}".format(e)) break + self.logger.debug("Closing metadata_consumer") metadata_consumer.close() From 74e7f6f5c03c5943d6b1cdd98dc04bdcc68d9fc9 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 10:19:42 -0600 Subject: [PATCH 008/129] 1500-Added __init__.py to tests directory so was discoverable/module. --- onestop-python-client/tests/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 onestop-python-client/tests/__init__.py diff --git a/onestop-python-client/tests/__init__.py b/onestop-python-client/tests/__init__.py new file mode 100644 index 0000000..e69de29 From a07e642ee13f34b6baa5d3679292a5403cdfed8c Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 12:47:34 -0600 Subject: [PATCH 009/129] 1500-in KafkaConsumer renamed variables so tad more generic. Makes it clearer how similar code is to KafkaPublisher class. --- .../onestop/KafkaConsumer.py | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 54744cb..18c489e 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -91,8 +91,8 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r self.security_keyLoc = security['keyLoc'] self.security_certLoc = security['certLoc'] - self.collection_topic_consume = collection_topic_consume - self.granule_topic_consume = granule_topic_consume + self.collection_topic = collection_topic_consume + self.granule_topic = granule_topic_consume if self.metadata_type not in ['COLLECTION', 'GRANULE']: raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") @@ -109,15 +109,15 @@ def register_client(self): :return: SchemaRegistryClient (confluent kafka library) """ - reg_conf = {'url': self.schema_registry} + conf = {'url': self.schema_registry} if self.security_enabled: - reg_conf['ssl.ca.location'] = self.security_caLoc - reg_conf['ssl.key.location'] = self.security_keyLoc - reg_conf['ssl.certificate.location'] = self.security_certLoc + conf['ssl.ca.location'] = self.security_caLoc + conf['ssl.key.location'] = self.security_keyLoc + conf['ssl.certificate.location'] = self.security_certLoc - self.logger.info("Creating SchemaRegistryClient with configuration:"+str(reg_conf)) - registry_client = SchemaRegistryClient(reg_conf) + self.logger.info("Creating SchemaRegistryClient with configuration:"+str(conf)) + registry_client = SchemaRegistryClient(conf) return registry_client def connect(self): @@ -141,10 +141,10 @@ def create_consumer(self, registry_client): """ topic = None if self.metadata_type == "COLLECTION": - topic = self.collection_topic_consume + topic = self.collection_topic if self.metadata_type == "GRANULE": - topic = self.granule_topic_consume + topic = self.granule_topic self.logger.debug("topic: "+str(topic)) @@ -154,21 +154,21 @@ def create_consumer(self, registry_client): metadata_schema = latest_schema.schema.schema_str self.logger.debug("metadata_schema: "+metadata_schema) metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) - consumer_conf = {'bootstrap.servers': self.brokers} + conf = {'bootstrap.servers': self.brokers} if self.security_enabled: - consumer_conf['security.protocol'] = 'SSL' - consumer_conf['ssl.ca.location'] = self.security_caLoc - consumer_conf['ssl.key.location'] = self.security_keyLoc - consumer_conf['ssl.certificate.location'] = self.security_certLoc - - consumer_conf['key.deserializer'] = StringDeserializer('utf-8') - consumer_conf['value.deserializer'] = metadata_deserializer - consumer_conf['group.id'] = self.group_id - consumer_conf['auto.offset.reset'] = self.auto_offset_reset - - self.logger.debug("meta_consumer_conf: "+str(consumer_conf)) - metadata_consumer = DeserializingConsumer(consumer_conf) + conf['security.protocol'] = 'SSL' + conf['ssl.ca.location'] = self.security_caLoc + conf['ssl.key.location'] = self.security_keyLoc + conf['ssl.certificate.location'] = self.security_certLoc + + conf['key.deserializer'] = StringDeserializer('utf-8') + conf['value.deserializer'] = metadata_deserializer + conf['group.id'] = self.group_id + conf['auto.offset.reset'] = self.auto_offset_reset + + self.logger.debug("conf: "+str(conf)) + metadata_consumer = DeserializingConsumer(conf) self.logger.debug("topic: "+str(topic)) metadata_consumer.subscribe([topic]) return metadata_consumer From da934c40324310b255dfee857ef356b21cfd7fe8 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 12:59:07 -0600 Subject: [PATCH 010/129] 1500-Fixed KafkaConsumerTest (thought intellij refactor of var name would notice this usage). --- onestop-python-client/tests/KafkaConsumerTest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/tests/KafkaConsumerTest.py index e7c3f08..776e21a 100644 --- a/onestop-python-client/tests/KafkaConsumerTest.py +++ b/onestop-python-client/tests/KafkaConsumerTest.py @@ -48,8 +48,8 @@ def test_init_happy_nonconditional_params(self): self.assertEqual(consumer.auto_offset_reset, self.conf_w_security['auto_offset_reset']) self.assertEqual(consumer.schema_registry, self.conf_w_security['schema_registry']) self.assertEqual(consumer.security_enabled, self.conf_w_security['security']['enabled']) - self.assertEqual(consumer.collection_topic_consume, self.conf_w_security['collection_topic_consume']) - self.assertEqual(consumer.granule_topic_consume, self.conf_w_security['granule_topic_consume']) + self.assertEqual(consumer.collection_topic, self.conf_w_security['collection_topic_consume']) + self.assertEqual(consumer.granule_topic, self.conf_w_security['granule_topic_consume']) def test_init_security_enabled(self): consumer = KafkaConsumer(**self.conf_w_security) From 92e12c9dc1253ecd0acb3434936c0d25f2276b72 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 12:59:42 -0600 Subject: [PATCH 011/129] 1500-KafkaConsumer consolidated config for deserializer. --- onestop-python-client/onestop/KafkaConsumer.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 18c489e..76078cc 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -154,7 +154,13 @@ def create_consumer(self, registry_client): metadata_schema = latest_schema.schema.schema_str self.logger.debug("metadata_schema: "+metadata_schema) metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) - conf = {'bootstrap.servers': self.brokers} + conf = { + 'bootstrap.servers': self.brokers, + 'key.deserializer': StringDeserializer('utf-8'), + 'value.deserializer': metadata_deserializer, + 'group.id': self.group_id, + 'auto.offset.reset': self.auto_offset_reset + } if self.security_enabled: conf['security.protocol'] = 'SSL' @@ -162,11 +168,6 @@ def create_consumer(self, registry_client): conf['ssl.key.location'] = self.security_keyLoc conf['ssl.certificate.location'] = self.security_certLoc - conf['key.deserializer'] = StringDeserializer('utf-8') - conf['value.deserializer'] = metadata_deserializer - conf['group.id'] = self.group_id - conf['auto.offset.reset'] = self.auto_offset_reset - self.logger.debug("conf: "+str(conf)) metadata_consumer = DeserializingConsumer(conf) self.logger.debug("topic: "+str(topic)) From ba1740838c3ef9c6c488374228b37eb45155bf30 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 17:50:02 -0600 Subject: [PATCH 012/129] 1500-Changed KafkaPublisher class constructors(adjusted documentation) to take dictionary with extra parameters allowed as well as methods within this class not to reference config but the variable that was set. Removed logging import and changed to ClientLogger. --- .../onestop/KafkaPublisher.py | 182 ++++++++---------- 1 file changed, 82 insertions(+), 100 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index d357de8..125174b 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -1,13 +1,11 @@ -import logging -from uuid import UUID import json -import yaml +from uuid import UUID from confluent_kafka.schema_registry import SchemaRegistryClient from confluent_kafka.error import KafkaError from confluent_kafka import SerializingProducer from confluent_kafka.schema_registry.avro import AvroSerializer - +from onestop.util.ClientLogger import ClientLogger class KafkaPublisher: """ @@ -15,114 +13,98 @@ class KafkaPublisher: Attributes ---------- - conf: yaml file - config/kafka-publisher-config-dev.yml - logger: Logger object - utilizes python logger library and creates logging for our specific needs - logger.info: Logger object - logging statement that occurs when the class is instantiated - metadata_type: str - type of metadata (COLLECTION or GRANULE) - brokers: str - brokers (kubernetes service) - schema_registry: str - schema registry (kubernetes service) - security: boolean - defines if security is in place - collection_topic: str - collection topic you want to consume - granule_topic: str - granule topic you want to consume + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + schema_registry: str + schema registry (kubernetes service) + security_enabled: boolean + defines if security is in place + security_caLoc: str + Kafka schema registry certification authority (CA) file location. + security_keyLoc: str + Kafka schema registry client's private key file location. + security_certLoc: str + Kafka schema registry client's public key file location. + collection_topic: str + collection topic you want to produce to + granule_topic: str + granule topic you want to produce to + logger: Logger object + utilizes python logger library and creates logging for our specific needs Methods ------- - get_logger(log_name, create_file) - creates logger file + register_client() + registers to schema registry client based on configs - register_client() - registers to schema registry client based on configs + create_producer(registry_client) + creates a SerializingProducer object to produce to kafka topic - create_producer(registry_client) - creates a SerializingProducer object to produce to kafka topic + connect() + utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics - connect() - utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics + publish_collection(collection_producer, collection_uuid, content_dict, method) + Publish collection to collection topic - publish_collection(collection_producer, collection_uuid, content_dict, method) - Publish collection to collection topic - - publish_granule(granule_producer, record_uuid, collection_uuid, content_dict) - Publish granule to granule topic + publish_granule(granule_producer, record_uuid, collection_uuid, content_dict) + Publish granule to granule topic """ - conf = None - - def __init__(self, conf_loc): - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = self.get_logger(self.__class__.__name__, False) - self.logger.info("Initializing " + self.__class__.__name__) - self.metadata_type = self.conf['metadata_type'] - self.brokers = self.conf['brokers'] - self.schema_registry = self.conf['schema_registry'] - self.security = self.conf['security']['enabled'] - - self.collection_topic = self.conf['collection_topic_produce'] - self.granule_topic = self.conf['granule_topic_produce'] - - if self.metadata_type not in ['COLLECTION', 'GRANULE']: - raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - - def get_logger(self, log_name, create_file): + def __init__(self, metadata_type, brokers, schema_registry, security, collection_topic_publish, granule_topic_publish, log_level='INFO', **wildargs): """ - Utilizes python logger library and creates logging - - :param log_name: str - name of log to be created - :param create_file: boolean - defines whether of not you want a logger file to be created - - :return: Logger object + Attributes + ---------- + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + group_id: str + Client group id string. All clients sharing the same group.id belong to the same group + auto_offset_reset: str + Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) + schema_registry: str + schema registry (kubernetes service) URL + security: dict + enabled boolean: Whether to use security for kafka schema registry client. + caLoc str: Kafka schema registry certification authority (CA) file location. + keyLoc str: Kafka schema registry client's private key file location. + certLoc str: Kafka schema registry client's public key file location. + + collection_topic: str + collection topic you want to produce to + granule_topic: str + granule topic you want to produce to """ + self.metadata_type = metadata_type + self.brokers = brokers + self.schema_registry = schema_registry + self.security_enabled = security['enabled'] - # create logger - log = logging.getLogger() - - # create formatter and add it to the handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + if self.security_enabled: + self.security_caLoc = security['caLoc'] + self.security_keyLoc = security['keyLoc'] + self.security_certLoc = security['certLoc'] - if self.conf['log_level'] == "DEBUG": - log.setLevel(level=logging.DEBUG) - else: - if self.conf['log_level'] == "INFO": - log.setLevel(level=logging.INFO) - else: - log.setLevel(level=logging.ERROR) - - fh = None - if create_file: - # create file handler for logger. - fh = logging.FileHandler(log_name) - fh.setFormatter(formatter) + self.collection_topic = collection_topic_publish + self.granule_topic = granule_topic_publish - # create console handler for logger. - ch = logging.StreamHandler() - ch.setFormatter(formatter) + if self.metadata_type not in ['COLLECTION', 'GRANULE']: + raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - # add handlers to logger. - if create_file: - log.addHandler(fh) + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) + self.logger.info("Initializing " + self.__class__.__name__) - log.addHandler(ch) - return log + if wildargs: + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def connect(self): """ Utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics :return: SerializingProducer Object - based on config values + based on initial constructor values """ registry_client = self.register_client() metadata_producer = self.create_producer(registry_client) @@ -137,10 +119,10 @@ def register_client(self): reg_conf = {'url': self.schema_registry} - if self.security: - reg_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - reg_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - reg_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + if self.security_enabled: + reg_conf['ssl.ca.location'] = self.security_caLoc + reg_conf['ssl.key.location'] = self.security_keyLoc + reg_conf['ssl.certificate.location'] = self.security_certLoc registry_client = SchemaRegistryClient(reg_conf) return registry_client @@ -153,7 +135,7 @@ def create_producer(self, registry_client): get this from register_client() :return: SerializingProducer Object - based on config values + based on initial constructor values """ metadata_schema = None @@ -166,11 +148,11 @@ def create_producer(self, registry_client): metadata_serializer = AvroSerializer(metadata_schema, registry_client) producer_conf = {'bootstrap.servers': self.brokers} - if self.security: + if self.security_enabled: producer_conf['security.protocol'] = 'SSL' - producer_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - producer_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - producer_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + producer_conf['ssl.ca.location'] = self.security_caLoc + producer_conf['ssl.key.location'] = self.security_keyLoc + producer_conf['ssl.certificate.location'] = self.security_certLoc meta_producer_conf = producer_conf meta_producer_conf['value.serializer'] = metadata_serializer @@ -180,7 +162,7 @@ def create_producer(self, registry_client): def delivery_report(self, err, msg): """ - Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). + Called once for each message produced to indicate delivery of message. Triggered by poll() or flush(). :param err: str err produced after publishing, if there is one From 392788aee0f8714b021d9a8fb96b89f2278a686b Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 28 Apr 2021 18:06:35 -0600 Subject: [PATCH 013/129] 1500-Changed KafkaConsumerTest(s) to have vars named exp where it makes sense. Added test for extra arguments via constructor. Fixed test for testing less parameters passed in, because more could have made it through. --- .../tests/KafkaConsumerTest.py | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/tests/KafkaConsumerTest.py index 776e21a..d1e6195 100644 --- a/onestop-python-client/tests/KafkaConsumerTest.py +++ b/onestop-python-client/tests/KafkaConsumerTest.py @@ -76,9 +76,14 @@ def test_init_metadata_type_invalid(self): self.assertRaises(ValueError, KafkaConsumer, **wrong_metadata_type_config) + def test_init_extra_params(self): + conf = dict(self.conf_wo_security) + conf['junk_key'] = 'junk_value' + KafkaConsumer(**conf) + @patch.object(SchemaRegistryClient, '__init__', autospec=True) def test_register_client_w_security(self, mock_client): - schema_conf = { + exp_security_conf = { 'url':self.conf_w_security['schema_registry'], 'ssl.ca.location': self.conf_w_security['security']['caLoc'], 'ssl.key.location': self.conf_w_security['security']['keyLoc'], @@ -90,19 +95,25 @@ def test_register_client_w_security(self, mock_client): consumer.register_client() mock_client.assert_called() - mock_client.assert_called_with(ANY, schema_conf) + mock_client.assert_called_with(ANY, exp_security_conf) @patch.object(SchemaRegistryClient, '__init__', autospec=True) def test_register_client_wo_security(self, mock_client): - schema_conf = { - 'url' : self.conf_wo_security['schema_registry'] + exp_security_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] } mock_client.return_value = None consumer = KafkaConsumer(**self.conf_wo_security) consumer.register_client() - - mock_client.assert_called_with(ANY, schema_conf) + try: + mock_client.assert_called_with(ANY, exp_security_conf) + except: + return + raise AssertionError('Expected register_client() to not have been called with security arguments.') @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') @@ -199,7 +210,7 @@ def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, m @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): conf_wo_security_granule = dict(self.conf_wo_security) - topic = conf_wo_security_granule['granule_topic_consume'] + exp_topic = conf_wo_security_granule['granule_topic_consume'] conf_wo_security_granule['metadata_type'] = 'GRANULE' # Verify security taken into consideration @@ -215,13 +226,14 @@ def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information - reg_client.get_latest_version.assert_called_with(topic + '-value') + reg_client.get_latest_version.assert_called_with(exp_topic + '-value') # Verify no security passed into DeserializingConsumer called with expected configuration - meta_consumer_conf['key.deserializer'] = ANY - meta_consumer_conf['value.deserializer'] = ANY - mock_deserializing_consumer.assert_called_with(meta_consumer_conf) - mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + exp_arguments = dict(meta_consumer_conf) + exp_arguments['key.deserializer'] = ANY + exp_arguments['value.deserializer'] = ANY + mock_deserializing_consumer.assert_called_with(exp_arguments) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([exp_topic]) def test_connect(self): mock_client = MagicMock() @@ -260,5 +272,5 @@ def test_consume(self, mock_metadata_consumer, mock_message): mock_handler.assert_called_once() mock_handler.assert_called_with(mock_message_key, mock_message_value) - if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 3426472c26738ae8866329fecfad58335f440bac Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 09:56:56 -0600 Subject: [PATCH 014/129] 1500-KafkaConsumerTest improved the formatting of dicts, tested return values a little better where could, and changed a test of assertTrue to assertEqual so got to see what the actual value was when failed. --- .../tests/KafkaConsumerTest.py | 101 ++++++++++-------- 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/tests/KafkaConsumerTest.py index d1e6195..1246789 100644 --- a/onestop-python-client/tests/KafkaConsumerTest.py +++ b/onestop-python-client/tests/KafkaConsumerTest.py @@ -3,7 +3,6 @@ from unittest.mock import ANY, patch, MagicMock, call from onestop.KafkaConsumer import KafkaConsumer from confluent_kafka.schema_registry import SchemaRegistryClient -from confluent_kafka.serialization import StringDeserializer class KafkaConsumerTest(unittest.TestCase): kp = None @@ -124,11 +123,13 @@ def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consume consumer = KafkaConsumer(**conf_w_security_collection) reg_client = consumer.register_client() reg_client.get_latest_version = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify AvroDeserializer called with expected registry client mock_avro_deserializer.assert_called_with(ANY, reg_client) + self.assertIsNotNone(deser_consumer) + @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_collection_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): @@ -138,24 +139,28 @@ def test_create_consumer_collection_w_security(self, mock_deserializing_consumer consumer = KafkaConsumer(**conf_w_security_collection) reg_client = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information reg_client.get_latest_version.assert_called_with(topic + '-value') # Verify security passed into DeserializingConsumer - mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_w_security_collection['brokers'], - 'security.protocol': 'SSL', - 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], - 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], - 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], - 'key.deserializer': ANY, - 'value.deserializer': ANY, - 'group.id': conf_w_security_collection['group_id'], - 'auto.offset.reset': conf_w_security_collection['auto_offset_reset'] - }) + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_collection['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], + 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_collection['group_id'], + 'auto.offset.reset': conf_w_security_collection['auto_offset_reset'] + }) mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + self.assertIsNotNone(deser_consumer) + @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_collection_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): @@ -165,20 +170,24 @@ def test_create_consumer_collection_wo_security(self, mock_deserializing_consume consumer = KafkaConsumer(**conf_wo_security_collection) reg_client = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information reg_client.get_latest_version.assert_called_with(topic + '-value') # Verify no security passed into DeserializingConsumer - mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_wo_security_collection['brokers'], - 'key.deserializer': ANY, - 'value.deserializer': ANY, - 'group.id': conf_wo_security_collection['group_id'], - 'auto.offset.reset': conf_wo_security_collection['auto_offset_reset'] - }) + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_collection['brokers'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_wo_security_collection['group_id'], + 'auto.offset.reset': conf_wo_security_collection['auto_offset_reset'] + }) mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + self.assertIsNotNone(deser_consumer) + @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): @@ -188,24 +197,28 @@ def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, m consumer = KafkaConsumer(**conf_w_security_granule) reg_client = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information reg_client.get_latest_version.assert_called_with(topic + '-value') # Verify security passed into DeserializingConsumer - mock_deserializing_consumer.assert_called_with({'bootstrap.servers': conf_w_security_granule['brokers'], - 'security.protocol': 'SSL', - 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], - 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], - 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], - 'key.deserializer': ANY, - 'value.deserializer': ANY, - 'group.id': conf_w_security_granule['group_id'], - 'auto.offset.reset': conf_w_security_granule['auto_offset_reset'] - }) + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_granule['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], + 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_granule['group_id'], + 'auto.offset.reset': conf_w_security_granule['auto_offset_reset'] + }) mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + self.assertIsNotNone(deser_consumer) + @patch('onestop.KafkaConsumer.AvroDeserializer') @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): @@ -213,28 +226,26 @@ def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, exp_topic = conf_wo_security_granule['granule_topic_consume'] conf_wo_security_granule['metadata_type'] = 'GRANULE' - # Verify security taken into consideration - meta_consumer_conf = {'bootstrap.servers': conf_wo_security_granule['brokers'], - 'key.deserializer': StringDeserializer('utf-8'), - 'value.deserializer': mock_avro_deserializer, - 'group.id': conf_wo_security_granule['group_id'], - 'auto.offset.reset': conf_wo_security_granule['auto_offset_reset'] - } - consumer = KafkaConsumer(**conf_wo_security_granule) reg_client = MagicMock() - consumer.create_consumer(reg_client) + deser_consumer = consumer.create_consumer(reg_client) # Verify metadata type was taken into consideration for getting topic information reg_client.get_latest_version.assert_called_with(exp_topic + '-value') # Verify no security passed into DeserializingConsumer called with expected configuration - exp_arguments = dict(meta_consumer_conf) - exp_arguments['key.deserializer'] = ANY - exp_arguments['value.deserializer'] = ANY - mock_deserializing_consumer.assert_called_with(exp_arguments) + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_granule['brokers'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_wo_security_granule['group_id'], + 'auto.offset.reset': conf_wo_security_granule['auto_offset_reset'] + }) mock_deserializing_consumer.return_value.subscribe.assert_called_with([exp_topic]) + self.assertIsNotNone(deser_consumer) + def test_connect(self): mock_client = MagicMock() @@ -265,7 +276,7 @@ def test_consume(self, mock_metadata_consumer, mock_message): print("Ignoring exception: {}".format(e)) # Verify kafka consumer poll called expected number of times - self.assertTrue(mock_metadata_consumer.poll.call_count == 3) + self.assertEqual(mock_metadata_consumer.poll.call_count, 3) mock_metadata_consumer.poll.assert_has_calls([call(10), call(10), call(10)]) # Verify callback function was called once with expected message attributes From 47c9dce8de9aa898e6997aecddf2e9c93687ffd8 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 10:32:47 -0600 Subject: [PATCH 015/129] 1500-Changed/added to KafkaPublisherTest(s). --- .../tests/KafkaPublisherTest.py | 326 +++++++++++++++++- 1 file changed, 318 insertions(+), 8 deletions(-) diff --git a/onestop-python-client/tests/KafkaPublisherTest.py b/onestop-python-client/tests/KafkaPublisherTest.py index 7d992ae..643d4f5 100644 --- a/onestop-python-client/tests/KafkaPublisherTest.py +++ b/onestop-python-client/tests/KafkaPublisherTest.py @@ -1,25 +1,335 @@ import unittest - import json from onestop.KafkaPublisher import KafkaPublisher +from unittest.mock import ANY, patch, MagicMock +from confluent_kafka.schema_registry import SchemaRegistryClient class KafkaPublisherTest(unittest.TestCase): kp = None + conf_w_security = None + conf_wo_security = None - def setUp(self): + @classmethod + def setUp(cls): print("Set it up!") - self.kp = KafkaPublisher("../config/kafka-publisher-config-dev.yml") + cls.conf_w_security = { + "metadata_type" : "GRANULE", + "brokers" : "onestop-dev-cp-kafka:9092", + "schema_registry" : "http://onestop-dev-cp-schema-registry:8081", + "security" : { + "enabled" : True, + "caLoc" : "/etc/pki/tls/cert.pem", + "keyLoc" : "/etc/pki/tls/private/kafka-user.key", + "certLoc" : "/etc/pki/tls/certs/kafka-user.crt" + }, + "collection_topic_publish" : "psi-collection-input-unknown", + "granule_topic_publish" : "psi-granule-input-unknown", + "log_level" : "DEBUG" + } + cls.conf_wo_security = dict(cls.conf_w_security) + # Remove security credential section. + cls.conf_wo_security['security'] = { + "enabled":False + } + @classmethod def tearDown(self): print("Tear it down!") - def test_parse_config(self): - self.assertFalse(self.kp.conf['brokers']==None) + def test_init_happy_nonconditional_params(self): + publisher = KafkaPublisher(**self.conf_w_security) + + self.assertEqual(publisher.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(publisher.brokers, self.conf_w_security['brokers']) + self.assertEqual(publisher.schema_registry, self.conf_w_security['schema_registry']) + self.assertEqual(publisher.security_enabled, self.conf_w_security['security']['enabled']) + self.assertEqual(publisher.collection_topic, self.conf_w_security['collection_topic_publish']) + self.assertEqual(publisher.granule_topic, self.conf_w_security['granule_topic_publish']) + + def test_init_security_enabled(self): + publisher = KafkaPublisher(**self.conf_w_security) + + self.assertEqual(publisher.security_caLoc, self.conf_w_security['security']['caLoc']) + self.assertEqual(publisher.security_keyLoc, self.conf_w_security['security']['keyLoc']) + self.assertEqual(publisher.security_certLoc, self.conf_w_security['security']['certLoc']) + + def test_init_security_disabled(self): + publisher = KafkaPublisher(**self.conf_wo_security) + + self.assertRaises(AttributeError, getattr, publisher, "security_caLoc") + self.assertRaises(AttributeError, getattr, publisher, "security_keyLoc") + self.assertRaises(AttributeError, getattr, publisher, "security_certLoc") + + def test_init_metadata_type_valid(self): + publisher = KafkaPublisher(**self.conf_w_security) + + self.assertEqual(publisher.metadata_type, self.conf_w_security['metadata_type']) + + def test_init_metadata_type_invalid(self): + wrong_metadata_type_config = dict(self.conf_w_security) + wrong_metadata_type_config['metadata_type'] = "invalid_type" + + self.assertRaises(ValueError, KafkaPublisher, **wrong_metadata_type_config) + + def test_init_extra_params(self): + conf = dict(self.conf_wo_security) + conf['junk_key'] = 'junk_value' + KafkaPublisher(**conf) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_w_security(self, mock_client): + exp_security_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] + } + mock_client.return_value = None + + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client() + + mock_client.assert_called() + mock_client.assert_called_with(ANY, exp_security_conf) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_wo_security(self, mock_client): + exp_security_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] + } + mock_client.return_value = None + + publisher = KafkaPublisher(**self.conf_wo_security) + publisher.register_client() + try: + mock_client.assert_called_with(ANY, exp_security_conf) + except: + return + raise AssertionError('Expected register_client() to not have been called with security arguments.') + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_calls_AvroSerializer(self, mock_serializing_publisher, mock_avro_serializer): + conf_w_security_collection = dict(self.conf_w_security) + conf_w_security_collection['metadata_type'] = "COLLECTION" + + publisher = KafkaPublisher(**conf_w_security_collection) + reg_client = publisher.register_client() + reg_client.get_latest_version = MagicMock() + publisher.create_producer(reg_client) + + # Verify AvroSerializer called with expected registry client + mock_avro_serializer.assert_called_with(ANY, reg_client) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_collection_w_security(self, mock_serializing_producer, mock_avro_serializer): + conf_w_security_collection = dict(self.conf_w_security) + topic = conf_w_security_collection['collection_topic_publish'] + conf_w_security_collection['metadata_type'] = 'COLLECTION' + + publisher = KafkaPublisher(**conf_w_security_collection) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into SerializingProducer + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_collection['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], + 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_collection_wo_security(self, mock_serializing_producer, mock_avro_serializer): + conf_wo_security_collection = dict(self.conf_wo_security) + topic = conf_wo_security_collection['collection_topic_publish'] + conf_wo_security_collection['metadata_type'] = 'COLLECTION' + + publisher = KafkaPublisher(**conf_wo_security_collection) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify no security passed into SerializingProducer + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_collection['brokers'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_granule_w_security(self, mock_serializing_producer, mock_avro_serializer): + conf_w_security_granule = dict(self.conf_w_security) + topic = conf_w_security_granule['granule_topic_publish'] + conf_w_security_granule['metadata_type'] = 'GRANULE' + + publisher = KafkaPublisher(**conf_w_security_granule) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into SerializingProducer + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_granule['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], + 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_granule_wo_security(self, mock_serializing_producer, mock_avro_serializer): + conf_wo_security_granule = dict(self.conf_wo_security) + exp_topic = conf_wo_security_granule['granule_topic_publish'] + conf_wo_security_granule['metadata_type'] = 'GRANULE' + + publisher = KafkaPublisher(**conf_wo_security_granule) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(exp_topic + '-value') + + # Verify no security passed into SerializingProducer called with expected configuration + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_granule['brokers'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + def test_connect(self): + mock_client = MagicMock() + + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client = MagicMock(return_value=mock_client) + publisher.create_producer = MagicMock(return_value=MagicMock(mock_client)) + publisher.connect() + + publisher.register_client.assert_called_once() + publisher.create_producer.assert_called_with(mock_client) + + def test_get_collection_key_from_uuid(self): + expKey = '12345678-1234-5678-1234-567812345678' + for uuid in [ + '{12345678-1234-5678-1234-567812345678}', + '12345678123456781234567812345678', + 'urn:uuid:12345678-1234-5678-1234-567812345678', + b'\x12\x34\x56\x78'*4, +# b'\x78\x56\x34\x12\x34\x12\x78\x56' + b'\x12\x34\x56\x78\x12\x34\x56\x78', +# {0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678}, +# 0x12345678123456781234567812345678, + ]: + with self.subTest(uuid=uuid): + print ("Testing uuid "+str(uuid)) + key = KafkaPublisher.get_collection_key_from_uuid(uuid) + print("Acquired uuid="+str(key)) + self.assertEqual(key, expKey) + + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_publish_collection(self, mock_collection_producer): + uuid = '{12345678-1234-5678-1234-567812345678}' + content_dict = { + 'title': 'this is a test', + 'location': 'somewhere in space' + } + method = 'PUT' + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client = MagicMock(return_value=MagicMock()) + mock_collection_producer.produce = MagicMock() + mock_collection_producer.poll.side_effect = [1] + + publisher.publish_collection(mock_collection_producer, uuid, content_dict, method) + + # Verify kafka produce called once + mock_collection_producer.produce.assert_called_with( + topic=self.conf_w_security['collection_topic_publish'], + value={ + 'type': 'collection', + 'content': json.dumps(content_dict), + 'contentType': 'application/json', + 'method': method, + 'source': 'unknown', + }, + key=publisher.get_collection_key_from_uuid(uuid), + on_delivery=publisher.delivery_report + ) + + # Verify kafka produce poll called once + mock_collection_producer.poll.assert_called_once() + + + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_publish_granule(self, mock_collection_producer): + uuid = '{12345678-1234-5678-1234-567812345678}' + content_dict = { + 'title': 'this is a test', + 'location': 'somewhere in space', + 'relationships': [{"type": "COLLECTION", + "id": '{12345678-1234-5678-1234-567812345678}'}], + 'errors': [], + 'analysis': 'No analysis', + 'fileLocations': 'archived', + 'fileInformation': 'no information', + 'discovery': 'AWS' + } + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client = MagicMock(return_value=MagicMock()) + mock_collection_producer.produce = MagicMock() + mock_collection_producer.poll.side_effect = [1] + + publisher.publish_granule(mock_collection_producer, uuid, content_dict) + + # Verify kafka produce called once + mock_collection_producer.produce.assert_called_with( + topic=self.conf_w_security['granule_topic_publish'], + value={ + 'type': 'granule', + 'content': json.dumps(content_dict), + #'contentType': 'application/json', + 'method': 'PUT', + 'source': 'unknown', + 'operation': None, + 'relationships': content_dict['relationships'], + 'errors': content_dict['errors'], + 'analysis': content_dict['analysis'], + 'fileLocations': {'fileLocation': content_dict['fileLocations']}, + 'fileInformation': content_dict['fileInformation'], + 'discovery': content_dict['discovery'] + }, + key=publisher.get_collection_key_from_uuid(uuid), + on_delivery=publisher.delivery_report + ) - def test_publish_collection(self): - print("Publish collection") - # Integration test TBD + # Verify kafka produce poll called once + mock_collection_producer.poll.assert_called_once() if __name__ == '__main__': unittest.main() \ No newline at end of file From 921490ac1b3d0cc113e1c8a62ef8ab3f00cd1aa8 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 10:34:45 -0600 Subject: [PATCH 016/129] 1500-In KafkaPublisher cleaned up documentation, added method to consolidate generating the key from UUID, and added a little logging. Consolidated topic generation code too. --- .../onestop/KafkaPublisher.py | 77 +++++++++++-------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index 125174b..047783c 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -48,7 +48,7 @@ class KafkaPublisher: publish_collection(collection_producer, collection_uuid, content_dict, method) Publish collection to collection topic - publish_granule(granule_producer, record_uuid, collection_uuid, content_dict) + publish_granule(granule_producer, collection_uuid, content_dict) Publish granule to granule topic """ @@ -137,27 +137,27 @@ def create_producer(self, registry_client): :return: SerializingProducer Object based on initial constructor values """ - metadata_schema = None + topic = None if self.metadata_type == "COLLECTION": - metadata_schema = registry_client.get_latest_version(self.collection_topic + '-value').schema.schema_str + topic = self.collection_topic if self.metadata_type == "GRANULE": - metadata_schema = registry_client.get_latest_version(self.granule_topic + '-value').schema.schema_str + topic = self.granule_topic + metadata_schema = registry_client.get_latest_version(topic + '-value').schema.schema_str metadata_serializer = AvroSerializer(metadata_schema, registry_client) - producer_conf = {'bootstrap.servers': self.brokers} + conf = {'bootstrap.servers': self.brokers} if self.security_enabled: - producer_conf['security.protocol'] = 'SSL' - producer_conf['ssl.ca.location'] = self.security_caLoc - producer_conf['ssl.key.location'] = self.security_keyLoc - producer_conf['ssl.certificate.location'] = self.security_certLoc + conf['security.protocol'] = 'SSL' + conf['ssl.ca.location'] = self.security_caLoc + conf['ssl.key.location'] = self.security_keyLoc + conf['ssl.certificate.location'] = self.security_certLoc - meta_producer_conf = producer_conf - meta_producer_conf['value.serializer'] = metadata_serializer + conf['value.serializer'] = metadata_serializer - metadata_producer = SerializingProducer(meta_producer_conf) + metadata_producer = SerializingProducer(conf) return metadata_producer def delivery_report(self, err, msg): @@ -174,14 +174,27 @@ def delivery_report(self, err, msg): else: self.logger.error('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) + @staticmethod + def get_collection_key_from_uuid(collection_uuid): + """ + Create a key to use in a kafka message from the given string representation of the collection UUID. + :param collection_uuid: str + collection string to turn into a key. + :return: + """ + if type(collection_uuid) == bytes: + return str(UUID(bytes=collection_uuid)) + else: + return str(UUID(hex=collection_uuid)) + def publish_collection(self, collection_producer, collection_uuid, content_dict, method): """ - Publish collection to collection topic + Publish a collection to the collection topic :param collection_producer: SerializingProducer use connect() :param collection_uuid: str - collection uuid that you want colelction to have + collection uuid that you want the collection to have :param content_dict: dict dictionary containing information you want to publish :param method: str @@ -190,11 +203,9 @@ def publish_collection(self, collection_producer, collection_uuid, content_dict, :return: str returns msg if publish is successful, kafka error if it wasn't successful """ - self.logger.info('Publish collection') - if type(collection_uuid) == bytes: - key = str(UUID(bytes=collection_uuid)) - else: - key = str(UUID(hex=collection_uuid)) + self.logger.info('Publishing collection') + + key = self.get_collection_key_from_uuid(collection_uuid) value_dict = { 'type': 'collection', @@ -204,20 +215,22 @@ def publish_collection(self, collection_producer, collection_uuid, content_dict, 'source': 'unknown', } try: - collection_producer.produce(topic=self.collection_topic, value=value_dict, key=key, - on_delivery=self.delivery_report) + self.logger.debug('Publishing collection with topic='+self.collection_topic+' key='+key+' value='+str(value_dict)) + collection_producer.produce( + topic=self.collection_topic, + value=value_dict, + key=key, + on_delivery=self.delivery_report) except KafkaError: raise collection_producer.poll() - def publish_granule(self, granule_producer, record_uuid, collection_uuid, content_dict): + def publish_granule(self, granule_producer, collection_uuid, content_dict): """ - Publishes granule to granule topic + Publish a granule to the granule topic :param granule_producer: SerializingProducer use connect() - :param record_uuid: str - record uuid associated with the granule :param collection_uuid: str collection uuid associated with the granule :param content_dict: dict @@ -228,10 +241,8 @@ def publish_granule(self, granule_producer, record_uuid, collection_uuid, conten """ self.logger.info('Publish granule') - if type(record_uuid) == bytes: - key = str(UUID(bytes=collection_uuid)) - else: - key = str(UUID(hex=collection_uuid)) + key = self.get_collection_key_from_uuid(collection_uuid) + """ if type(collection_uuid) == bytes: content_dict['relationships'] = [{"type": "COLLECTION", "id": collection_uuid.hex()}] @@ -264,8 +275,12 @@ def publish_granule(self, granule_producer, record_uuid, collection_uuid, conten } try: - granule_producer.produce(topic=self.granule_topic, value=value_dict, key=key, - on_delivery=self.delivery_report) + self.logger.debug('Publishing granule with topic='+self.granule_topic+' key='+key+' value='+str(value_dict)) + granule_producer.produce( + topic=self.granule_topic, + value=value_dict, + key=key, + on_delivery=self.delivery_report) except KafkaError: raise granule_producer.poll() From a337c39f88e953d1a99fc9bfc3d15677e63864bd Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 10:59:27 -0600 Subject: [PATCH 017/129] 1500-Adjusted csb config variable name from file_identifier_prefix to file_id_prefix as it is in half the other places including S3MessageAdapter constructor. --- .../config/csb-data-stream-config-template.yml | 2 +- scripts/config/csb-data-stream-config.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/config/csb-data-stream-config-template.yml b/onestop-python-client/config/csb-data-stream-config-template.yml index 887c9be..56bad99 100644 --- a/onestop-python-client/config/csb-data-stream-config-template.yml +++ b/onestop-python-client/config/csb-data-stream-config-template.yml @@ -9,7 +9,7 @@ registry_base_url: http://localhost/onestop/api/registry onestop_base_url: http://localhost/onestop/api/search/search access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com -file_identifier_prefix: "gov.noaa.ncei.csb:" +file_id_prefix: "gov.noaa.ncei.csb:" prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' diff --git a/scripts/config/csb-data-stream-config.yml b/scripts/config/csb-data-stream-config.yml index 1556ab9..24a7cf6 100644 --- a/scripts/config/csb-data-stream-config.yml +++ b/scripts/config/csb-data-stream-config.yml @@ -6,7 +6,7 @@ collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 psi_registry_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com -file_identifier_prefix: "gov.noaa.ncei.csb:" +file_id_prefix: "gov.noaa.ncei.csb:" prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' From 5aab6d67df06f6faed7e43343b2dcfa2cf271a38 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 4 May 2021 11:28:56 -0600 Subject: [PATCH 018/129] 1500-Fixed lack of carriage returnin S3Utils for legibility. --- onestop-python-client/onestop/util/S3Utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index 60fb876..e2f2e32 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -109,7 +109,8 @@ def connect(self, client_type, region): if client_type == "glacier": boto = boto3.client( "glacier", - region_name=region,aws_access_key_id=self.access_key, + region_name=region, + aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key) if client_type == "session": From 5975e1d24d47ccb80fa758b71b18019b1e57f7c6 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 7 May 2021 11:21:15 -0600 Subject: [PATCH 019/129] 1500-Changed SqsConsumer class constructor to take dictionary with extra parameters allowed. Refactored out of SqsConsumer the connecting part and put into S3Utils, this left only log_level as class var. Put creating a Queue object into receive_messages (can refactor out if ever need again, but single line didn't seem to warrent its own method). Added debug logging. --- .../onestop/util/SqsConsumer.py | 95 +++++++------------ 1 file changed, 33 insertions(+), 62 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index f782cc5..e7ceed4 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -1,10 +1,7 @@ -import logging -from datetime import datetime, timezone -import yaml -import boto3 import json -from onestop.util.ClientLogger import ClientLogger +from datetime import datetime, timezone +from onestop.util.ClientLogger import ClientLogger class SqsConsumer: """ @@ -12,101 +9,75 @@ class SqsConsumer: Attributes ---------- - conf: yaml file - aws-util-config-dev.yml - cred: yaml file - credentials.yml - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated + logger: ClientLogger object + utilizes python logger library and creates logging for our specific needs Methods ------- - connect() - connects a boto sqs instance based on configurations in conf and cred yml files - - receive_messages(queue, sqs_max_polls, cb) - polls for messages in the queue + receive_messages(sqs_client, sqs_queue_name, sqs_max_polls, cb) + polls for messages in the queue """ - conf = None - def __init__(self, conf_loc, cred_loc): + def __init__(self, log_level = 'INFO', **wildargs): """ - - :param conf_loc: yaml file - aws-util-config-dev.yml - :param cred_loc: yaml file - credentials.yml - - Other Attributes - ---------------- - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated - + Attributes + ---------- + log_level: str + The log level to use for this class (Defaults to 'INFO') """ - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - with open(cred_loc) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False) + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) - def connect(self): - """ - Connects a boto sqs instance based on configurations in conf and cred yml files + if wildargs: + self.logger.error("There were extra constructor arguments: " + str(wildargs)) - :return: boto sqs - returns instance of boto sqs resource - """ - boto_session = boto3.Session(aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key']) - # Get the queue. This returns an SQS.Queue instance - sqs_session = boto_session.resource('sqs', region_name=self.conf['s3_region']) - sqs_queue = sqs_session.Queue(self.conf['sqs_url']) - self.logger.info("Connecting to " + self.conf['sqs_url']) - return sqs_queue - - def receive_messages(self, queue, sqs_max_polls, cb): + def receive_messages(self, sqs_client, sqs_queue_name, sqs_max_polls, cb): """ - Polls for messages in the queue + Polls for messages from an sqs queue - :param queue: boto sqs resource - instance of boto sqs resource given from connect() + :param sqs_client: boto SQS.Client + instance of boto sqs Client + :param sqs_queue_name: str + name of the queue to connect to. :param sqs_max_polls: int number of polls :param cb: function call back function - :return: Dependent on the call back function + :return: If the Message has a Records key then the call back function gets called on the Message. """ self.logger.info("Receive messages") + self.logger.info("Polling %d time(s) for SQS messages" % sqs_max_polls) + + sqs_queue = sqs_client.Queue(sqs_queue_name) i = 1 while i <= sqs_max_polls: self.logger.info("Polling attempt: " + str(i)) i = i + 1 - sqs_messages = queue.receive_messages(MaxNumberOfMessages=10, WaitTimeSeconds=10) + sqs_messages = sqs_queue.receive_messages( + MaxNumberOfMessages=10, + WaitTimeSeconds=10 + ) self.logger.info("Received %d messages." % len(sqs_messages)) + self.logger.debug("Messages: %s" % sqs_messages) for sqs_message in sqs_messages: try: # Log start time dt_start = datetime.now(tz=timezone.utc) - self.logger.info("Started processing message") + self.logger.info("Starting processing message") + self.logger.debug("Message: %s" % sqs_message) + self.logger.debug("Message body: %s" % sqs_message.body) message_body = json.loads(sqs_message.body) + self.logger.debug("Message body message: %s" % message_body['Message']) message_content = json.loads(message_body['Message']) if 'Records' in message_content: recs = message_content['Records'] - self.logger.info("Received message") self.logger.debug('Records: ' + str(recs)) else: self.logger.info("s3 event without records content received.") From 204a2bd0c4d7101476db87322bf621e1fdd34a07 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 7 May 2021 14:41:42 -0600 Subject: [PATCH 020/129] 1500-Decided to put "connect" back into SqsConsumer. Adjusted input parameters for receive_message so a user could create their own queue or use our connect to do so and pass either in. --- .../onestop/util/SqsConsumer.py | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index e7ceed4..bd7f98f 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -31,14 +31,24 @@ def __init__(self, log_level = 'INFO', **wildargs): if wildargs: self.logger.error("There were extra constructor arguments: " + str(wildargs)) - def receive_messages(self, sqs_client, sqs_queue_name, sqs_max_polls, cb): + def connect(self, sqs_resource, sqs_queue_name): + """ + Gets a boto SQS.Queue resource. + :param sqs_resource: boto SQS.Resource + SQS resource to create the queue from. + :param sqs_queue_name: str + SQS queue name to create and return a boto SQS.Queue object to. + :return: SQS.Queue + An SQS.Queue resource to use for Queue operations. + """ + return sqs_resource.create_queue(QueueName=sqs_queue_name) + + def receive_messages(self, sqs_queue, sqs_max_polls, cb): """ Polls for messages from an sqs queue - :param sqs_client: boto SQS.Client - instance of boto sqs Client - :param sqs_queue_name: str - name of the queue to connect to. + :param sqs_queue: boto SQS.Queue object + boto SQS Queue object. Can be generated by the method in this class. :param sqs_max_polls: int number of polls :param cb: function @@ -50,8 +60,6 @@ def receive_messages(self, sqs_client, sqs_queue_name, sqs_max_polls, cb): self.logger.info("Receive messages") self.logger.info("Polling %d time(s) for SQS messages" % sqs_max_polls) - sqs_queue = sqs_client.Queue(sqs_queue_name) - i = 1 while i <= sqs_max_polls: self.logger.info("Polling attempt: " + str(i)) From 47b3e5bd187e082c45af07aab585126afdf1828e Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 7 May 2021 17:26:56 -0600 Subject: [PATCH 021/129] 1500-Fixed some bugs in SqsConsumer. --- .../onestop/util/SqsConsumer.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index bd7f98f..4f503d8 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -60,11 +60,17 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): self.logger.info("Receive messages") self.logger.info("Polling %d time(s) for SQS messages" % sqs_max_polls) - i = 1 - while i <= sqs_max_polls: + if sqs_max_polls < 1: + raise ValueError('Max polling value should be greater than 0.') + + for i in range(1, sqs_max_polls+1): self.logger.info("Polling attempt: " + str(i)) - i = i + 1 + # boto3 SQS.Queue appears to have a subset of SQS.Client methods plus a few management queue ones. + # The ones they do share seem to have different return types. + # The message method names are different and return types different: + # Client.send_message and Queue.send_message and Queue.send_messages + # Client.receive_message and Queue.receive_messages sqs_messages = sqs_queue.receive_messages( MaxNumberOfMessages=10, WaitTimeSeconds=10 @@ -86,9 +92,10 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): if 'Records' in message_content: recs = message_content['Records'] - self.logger.debug('Records: ' + str(recs)) + self.logger.debug('Message "Records": %s' % recs) + cb(recs) else: - self.logger.info("s3 event without records content received.") + self.logger.info("s3 event message without 'Records' content received.") sqs_message.delete() @@ -98,9 +105,8 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): processing_time = dt_end - dt_start self.logger.info("Completed processing message (s):" + str(processing_time.microseconds * 1000)) - cb(recs) except: self.logger.exception( "An exception was thrown while processing a message, but this program will continue. The " - "message will not be deleted from the SQS queue. The message was: %s" % sqs_message.body) + "message will not be deleted from the SQS queue. The message was: %s" % sqs_message) From b2143aefb499ac3faff7d10b6b9f74b49ff2ae72 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 7 May 2021 17:27:15 -0600 Subject: [PATCH 022/129] 1500-Due to changing SqsConsumer class constructor to take dictionary with extra parameters allowed so adjusted the tests and added more with more verification. --- .../tests/util/SqsConsumerTest.py | 175 ++++++++++++++++-- 1 file changed, 159 insertions(+), 16 deletions(-) diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/tests/util/SqsConsumerTest.py index 4d6be77..87f9005 100644 --- a/onestop-python-client/tests/util/SqsConsumerTest.py +++ b/onestop-python-client/tests/util/SqsConsumerTest.py @@ -1,34 +1,177 @@ import unittest -import boto3 +import json + from moto import mock_sqs -from tests.utils import abspath_from_relative +from unittest.mock import ANY, patch, MagicMock, call +from onestop.util.S3Utils import S3Utils from onestop.util.SqsConsumer import SqsConsumer class SqsConsumerTest(unittest.TestCase): - sc = None + config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 's3_region': 'us-east-2', + 's3_bucket': 'archive-testing-demo', + 'sqs_url': 'https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs', + 'type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'registry_base_url': 'http://localhost/onestop/api/registry', + 'registry_username': 'admin', + 'registry_password': 'whoknows', + 'onestop_base_url': 'http://localhost/onestop/api/search/search', + 'log_level': 'DEBUG' + } + + records = [{"eventVersion":"2.1"}] + message = json.dumps( + {"Type": "Notification", + "MessageId": "9d0691d2-ae9c-58f9-a9f4-c8dcf05d87be", + "TopicArn": "arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1", + "Subject": "Amazon S3 Notification", + "Message": json.dumps({"Records": records}), + "Timestamp": "2021-05-06T21:15:45.427Z", + "SignatureVersion": "1", + "Signature": "Ui5s4uVgcMr5fjGmePCMgmi14Dx9oS8hIpjXXiQo+xZPgsHkUayz7dEeGmMGGt45l8blmZTZEbxJG+HVGfIUmQGRqoimwiLm+mIAaNIN/BV76FVFcQUIkORX8gYN0a4RS3HU8/ElrKFK8Iz0zpxJdjwxa3xPCDwu+dTotiLTJxSouvg8MmkkDnq758a8vZ9WK2PaOlZiZ3m8Mv2ZvLrozZ/DAAz48HSad6Mymhit82RpGCUxy4SDwXVlP/nLB01AS11Gp2HowJR8NXyStrZYzzQEc+PebITaExyikgTMiVhRHkmb7JrtZPpgZu2daQsSooqpwyIzb6pvgwu9W54jkw==", + "SigningCertURL": "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem", + "UnsubscribeURL": "https://sns.us-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1:e7a9a9f5-792e-48a6-9ec8-40f7f5a8f600" + }) + message_wo_records = json.dumps( + {"Type": "Notification", + "MessageId": "9d0691d2-ae9c-58f9-a9f4-c8dcf05d87be", + "TopicArn": "arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1", + "Subject": "Amazon S3 Notification", + "Message": "{}", + "Timestamp": "2021-05-06T21:15:45.427Z", + "SignatureVersion": "1", + "Signature": "Ui5s4uVgcMr5fjGmePCMgmi14Dx9oS8hIpjXXiQo+xZPgsHkUayz7dEeGmMGGt45l8blmZTZEbxJG+HVGfIUmQGRqoimwiLm+mIAaNIN/BV76FVFcQUIkORX8gYN0a4RS3HU8/ElrKFK8Iz0zpxJdjwxa3xPCDwu+dTotiLTJxSouvg8MmkkDnq758a8vZ9WK2PaOlZiZ3m8Mv2ZvLrozZ/DAAz48HSad6Mymhit82RpGCUxy4SDwXVlP/nLB01AS11Gp2HowJR8NXyStrZYzzQEc+PebITaExyikgTMiVhRHkmb7JrtZPpgZu2daQsSooqpwyIzb6pvgwu9W54jkw==", + "SigningCertURL": "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem", + "UnsubscribeURL": "https://sns.us-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1:e7a9a9f5-792e-48a6-9ec8-40f7f5a8f600" + }) + + @mock_sqs def setUp(self): print("Set it up!") - self.sc = SqsConsumer(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"), - abspath_from_relative(__file__, "../../config/credentials-template.yml")) + + self.s3_utils = S3Utils(**self.config_dict) + self.sqs_consumer = SqsConsumer(**self.config_dict) def tearDown(self): print("Tear it down!") - def test_parse_config(self): - self.assertFalse(self.sc.conf['sqs_url']==None) + @mock_sqs + def test_connect(self): + queue_name = 'test' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + expQueue = sqs_resource.create_queue(QueueName=queue_name) + queue = self.sqs_consumer.connect(sqs_resource, queue_name) + + self.assertEqual(expQueue.url, queue.url) + + # Kind of pointless since we catch every exception this doesn't fail when it should.... + @mock_sqs + def test_receive_messages_no_records(self): + mock_cb = MagicMock() + + # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + + # Send a test message lacking Records field + sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody= self.message_wo_records + ) + queue = sqs_resource.Queue(queue_name) + + self.sqs_consumer.receive_messages(queue, 1, mock_cb) + + # Verify callback function was called once with expected message attributes + mock_cb.assert_not_called() + + @mock_sqs + def test_receive_messages_fails_invalid_sqs_max_polls(self): + with self.assertRaises(ValueError): + self.sqs_consumer.receive_messages(MagicMock(), 0, MagicMock()) + + @mock_sqs + def test_receive_messages_polls_msgs_expected_times(self): + mock_cb = MagicMock() + queue = MagicMock() + + sqs_max_polls = 2 + self.sqs_consumer.receive_messages(queue, sqs_max_polls, mock_cb) + + # Verify polling called expected times + self.assertEqual(queue.receive_messages.call_count, sqs_max_polls) + + @mock_sqs + def test_receive_messages_callback_occurs(self): + mock_cb = MagicMock() + + # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody= self.message + ) + queue = sqs_resource.Queue(queue_name) + + self.sqs_consumer.receive_messages(queue, 1, mock_cb) + + # Verify callback function was called once with expected message attributes + mock_cb.assert_called_with(self.records) + + @mock_sqs + def test_happy_path(self): + mock_cb = MagicMock() + + # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + queue = self.sqs_consumer.connect(sqs_resource, queue_name) #sqs_resource.create_queue(QueueName=queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client.send_message( + QueueUrl=queue.url, + MessageBody= self.message + ) + + self.sqs_consumer.receive_messages(queue, 1, mock_cb) + + # Verify callback function was called once with expected message attributes + mock_cb.assert_called_with(self.records) + # An example using external send/receive methods @mock_sqs - def test_poll_messages(self): - # Create the mock queue beforehand and set its mock URL as the 'sqs_url' config value for SqsConsumer - boto_session = boto3.Session(aws_access_key_id=self.sc.cred['sandbox']['access_key'], - aws_secret_access_key=self.sc.cred['sandbox']['secret_key']) - sqs_session = boto_session.resource('sqs', region_name=self.sc.conf['s3_region']) - res = sqs_session.create_queue(QueueName="test_queue") - self.sc.conf['sqs_url'] = res.url - queue = self.sc.connect() - self.sc.receive_messages(queue, self.sc.conf['sqs_max_polls'], lambda *args, **kwargs: None) + def test_write_message_valid(self): + "Test the write_message method with a valid message" + sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + queue = sqs.create_queue(QueueName='test-skype-sender') + self.sqs_consumer.sqs_url = queue.url + skype_message = 'Testing with a valid message' + channel = 'test' + expected_message = str({'msg':f'{skype_message}', 'channel':channel}) + message = str({'msg':f'{skype_message}', 'channel':channel}) + queue.send_message(MessageBody=(message)) + sqs_messages = queue.receive_messages() + print('Message: %s'%sqs_messages) + print('Message0: %s'%sqs_messages[0]) + assert sqs_messages[0].body == expected_message, 'Message in skype-sender does not match expected' + print(f'The message in skype-sender SQS matches what we sent') + assert len(sqs_messages) == 1, 'Expected exactly one message in SQS' + print(f'\nExactly one message in skype-sender SQS') if __name__ == '__main__': unittest.main() \ No newline at end of file From 11f8845ef8f111c7d3a2632cace3c208751e13b2 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:00:06 -0600 Subject: [PATCH 023/129] 1500-fixed bug in tests/utils of message missing a carriage return. Just looks. --- onestop-python-client/tests/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/tests/utils.py b/onestop-python-client/tests/utils.py index 2f1e6d5..9cb7913 100644 --- a/onestop-python-client/tests/utils.py +++ b/onestop-python-client/tests/utils.py @@ -15,7 +15,8 @@ def create_delete_message(region, bucket, key): "Message": '''{ "Records": [{ "eventVersion": "2.1", "eventSource": "aws:s3", "awsRegion": "''' + region + '''", - "eventTime": "2020-12-14T20:56:08.725Z", "eventName": "ObjectRemoved:Delete", + "eventTime": "2020-12-14T20:56:08.725Z", + "eventName": "ObjectRemoved:Delete", "userIdentity": {"principalId": "AX8TWPQYA8JEM"}, "requestParameters": {"sourceIPAddress": "65.113.158.185"}, "responseElements": {"x-amz-request-id": "D8059E6A1D53597A", From 9048e5326d48cd85f059e487068cd07f464fb35e Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:01:30 -0600 Subject: [PATCH 024/129] 1500-Added logging to SqsHandlers and log_level method parameter. Adjusted SqsConsumer callback parameters to pass along log_level. --- .../onestop/util/SqsConsumer.py | 3 ++- .../onestop/util/SqsHandlers.py | 21 +++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index 4f503d8..4d97c34 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -25,6 +25,7 @@ def __init__(self, log_level = 'INFO', **wildargs): log_level: str The log level to use for this class (Defaults to 'INFO') """ + self.log_level = log_level self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) @@ -93,7 +94,7 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): if 'Records' in message_content: recs = message_content['Records'] self.logger.debug('Message "Records": %s' % recs) - cb(recs) + cb(recs, self.log_level) else: self.logger.info("s3 event message without 'Records' content received.") diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index 57be8da..ce0f010 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -1,3 +1,5 @@ +from onestop.util.ClientLogger import ClientLogger + def create_delete_handler(web_publisher): """ Creates a delete function handler to be used with SqsConsumer.receive_messages. @@ -7,21 +9,36 @@ def create_delete_handler(web_publisher): :param: web_publisher: WebPublisher object """ - def delete(records): - if records is None: + def delete(records, log_level='INFO'): + + logger = ClientLogger.get_logger('SqsHandlers', log_level, False) + logger.info("In create_delete_handler.delete() handler") + logger.debug("Records: %s"%records) + + if not records or records is None: + logger.info("Ending handler, records empty, records=%s"%records) return + record = records[0] if record['eventName'] != 'ObjectRemoved:Delete': + logger.info("Ending handler, eventName=%s"%record['eventName']) return + bucket = record['s3']['bucket']['name'] s3_key = record['s3']['object']['key'] s3_url = "s3://" + bucket + "/" + s3_key payload = '{"queries":[{"type": "fieldQuery", "field": "links.linkUrl", "value": "' + s3_url + '"}] }' search_response = web_publisher.search_onestop('granule', payload) + logger.debug('OneStop search response=%s'%search_response) response_json = search_response.json() + logger.debug('OneStop search response json=%s'%response_json) + logger.debug('OneStop search response data=%s'%response_json['data']) if len(response_json['data']) != 0: granule_uuid = response_json['data'][0]['id'] response = web_publisher.delete_registry('granule', granule_uuid) + print('delete_registry response: %s'%response) return response + logger.warning("OneStop search response has no 'data' field. Response=%s"%response_json) + return delete From 5e0d3ba87eed10f2078c591ae8f2b3bc575de13a Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:02:45 -0600 Subject: [PATCH 025/129] 1500-Added tests to SqsHandlersTest and removed config usage. --- .../tests/SqsHandlersTest.py | 274 +++++++++++++----- 1 file changed, 194 insertions(+), 80 deletions(-) diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/tests/SqsHandlersTest.py index bbe4210..3897169 100644 --- a/onestop-python-client/tests/SqsHandlersTest.py +++ b/onestop-python-client/tests/SqsHandlersTest.py @@ -1,8 +1,8 @@ import json import unittest -import boto3 -import yaml -from moto import mock_s3 + +from unittest import mock +from unittest.mock import patch from moto import mock_sqs from tests.utils import abspath_from_relative, create_delete_message from onestop.WebPublisher import WebPublisher @@ -13,95 +13,209 @@ class SqsHandlerTest(unittest.TestCase): - wp = None - su = None - s3ma = None - sqs = None - wp_config = abspath_from_relative(__file__, "../config/web-publisher-config-local.yml") - aws_config = abspath_from_relative(__file__, "../config/aws-util-config-dev.yml") - cred_config = abspath_from_relative(__file__, "../config/credentials-template.yml") - csb_config = abspath_from_relative(__file__, "../config/csb-data-stream-config.yml") - - collection_uuid = '5b58de08-afef-49fb-99a1-9c5d5c003bde' - payloadDict = { - "fileInformation": { - "name": "OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc", - "size": 30551050, - "checksums": [{ - "algorithm": "SHA1", - "value": "bf4c5b58f8d5f9445f7b277f988e5861184f775a" - }], - "format": "NetCDF" - }, - "relationships": [{ - "type": "COLLECTION", - "id": collection_uuid - }], - "fileLocations": { - "s3://noaa-goes16/ABI-L1b-RadF/2019/298/17/OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc": { - "uri": "s3://noaa-goes16/ABI-L1b-RadF/2019/298/17/OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc", - "type": "ACCESS", - "deleted": "false", - "restricted": "false", - "asynchronous": "false", - "locality": "us-east-2", - "lastModified": 1572025823000, - "serviceType": "Amazon:AWS:S3", - "optionalAttributes": {} - } - } - } def setUp(self): print("Set it up!") - with open(abspath_from_relative(__file__, "../config/csb-data-stream-config-template.yml")) as f: - self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../config/aws-util-config-dev.yml")) as f: - self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../config/credentials-template.yml")) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) - - self.wp = WebPublisher(self.wp_config, self.cred_config) - self.su = S3Utils(self.cred['sandbox']['access_key'], - self.cred['sandbox']['secret_key'], - "DEBUG") - self.s3ma = S3MessageAdapter(self.stream_conf['access_bucket'], - self.stream_conf['type'], - self.stream_conf['file_identifier_prefix'], - self.stream_conf['collection_id']) + self.config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', + 'type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'registry_base_url': 'http://localhost/onestop/api/registry', + 'registry_username': 'admin', + 'registry_password': 'whoknows', + 'onestop_base_url': 'http://localhost/onestop/api/search/search', + 'log_level': 'DEBUG' + } + + self.wp = WebPublisher(**self.config_dict) + self.s3_utils = S3Utils(**self.config_dict) + self.s3ma = S3MessageAdapter(**self.config_dict) + self.sqs_consumer = SqsConsumer(**self.config_dict) + + self.sqs_max_polls = 3 + self.region = 'us-east-2' + self.bucket = 'archive-testing-demo' + self.key = 'ABI-L1b-RadF/2019/298/15/OR_ABI-L1b-RadF-M6C15_G16_s20192981500369_e20192981510082_c20192981510166.nc' def tearDown(self): print("Tear it down!") - @mock_s3 + def mocked_search_response_data(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + print ("args: "+str(args)+" kwargs: "+str(kwargs)) + onestop_search_response = { + "data":[ + { + "attributes":{ + "serviceLinks":[ + + ], + "citeAsStatements":[ + + ], + "links":[ + { + "linkFunction":"download", + "linkUrl":"s3://archive-testing-demo-backup/public/NESDIS/CSB/csv/2019/12/01/20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv", + "linkName":"Amazon S3", + "linkProtocol":"Amazon:AWS:S3" + }, + { + "linkFunction":"download", + "linkUrl":"https://archive-testing-demo.s3-us-east-2.amazonaws.com/public/NESDIS/CSB/csv/2019/12/01/20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv", + "linkName":"Amazon S3", + "linkProtocol":"HTTPS" + } + ], + "internalParentIdentifier":"fdb56230-87f4-49f2-ab83-104cfd073177", + "filesize":63751, + "title":"20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv" + }, + "id":"77b11a1e-1b75-46e1-b7d6-99b5022ed113", + "type":"granule" + } + ], + "meta":{ + "took":1, + "total":6, + "exactCount":True + } + } + return MockResponse(onestop_search_response, 200) + + + def mocked_search_response_data_empty(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + print ("args: "+str(args)+" kwargs: "+str(kwargs)) + onestop_search_response = { + "data":[], + "meta":{ + "took":1, + "total":6, + "exactCount":True + } + } + return MockResponse(onestop_search_response, 200) + @mock_sqs - def init_s3(self): - bucket = self.cloud_conf['s3_bucket'] - key = self.cloud_conf['s3_key'] - boto_client = self.su.connect("s3", None) - boto_client.create_bucket(Bucket=bucket) - boto_client.put_object(Bucket=bucket, Key=key, Body="foobar") - - sqs_client = boto3.client('sqs', region_name=self.cloud_conf['s3_region']) - sqs_queue = sqs_client.create_queue(QueueName=self.cloud_conf['sqs_name']) - self.sqs = SqsConsumer(self.aws_config, self.cred_config) - message = create_delete_message(self.cloud_conf['s3_region'], bucket, key) - sqs_client.send_message(QueueUrl=sqs_queue['QueueUrl'], MessageBody=json.dumps(message)) - sqs_queue['QueueUrl'] - - @mock_s3 + @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_happy(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.region) + message = create_delete_message(self.region, self.bucket, self.key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_called_once() + mock_wp.delete_registry.assert_called_once() + @mock_sqs - def delete_handler_wrapper(self, recs): - handler = create_delete_handler(self.wp) - result = handler(recs) - self.assertTrue(result) + @mock.patch('requests.get', side_effect=mocked_search_response_data_empty, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_data_empty_ends_cb(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.region) + message = create_delete_message(self.region, self.bucket, self.key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_called_once() + mock_wp.delete_registry.assert_not_called() @mock_sqs - def test_delete_handler(self): - mock_queue_url = self.init_s3() - sqs_queue = boto3.resource('sqs', region_name=self.stream_conf['s3_region']).Queue(mock_queue_url) - self.sqs.receive_messages(sqs_queue, self.stream_conf['sqs_max_polls'], self.delete_handler_wrapper) + @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_no_records_ends_cb(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps({"Message":'''{"Records":[]}'''}) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_not_called() + mock_wp.delete_registry.assert_not_called() + + @mock_sqs + @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_eventName_not_delete_ends_cb(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps({"Message":'''{"Records":[{"eventName":"Unknown"}]}'''}) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_not_called() + mock_wp.delete_registry.assert_not_called() if __name__ == '__main__': unittest.main() \ No newline at end of file From 918c378b053da7918f679cf1806a5f68440f190c Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:17:33 -0600 Subject: [PATCH 026/129] 1500-Fixed SqsConsumerTest due to parameters into CB changing. skipped example test. --- onestop-python-client/tests/util/SqsConsumerTest.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/tests/util/SqsConsumerTest.py index 87f9005..7b5785f 100644 --- a/onestop-python-client/tests/util/SqsConsumerTest.py +++ b/onestop-python-client/tests/util/SqsConsumerTest.py @@ -2,7 +2,7 @@ import json from moto import mock_sqs -from unittest.mock import ANY, patch, MagicMock, call +from unittest.mock import MagicMock, ANY from onestop.util.S3Utils import S3Utils from onestop.util.SqsConsumer import SqsConsumer @@ -128,7 +128,7 @@ def test_receive_messages_callback_occurs(self): self.sqs_consumer.receive_messages(queue, 1, mock_cb) # Verify callback function was called once with expected message attributes - mock_cb.assert_called_with(self.records) + mock_cb.assert_called_with(self.records, ANY) @mock_sqs def test_happy_path(self): @@ -149,9 +149,10 @@ def test_happy_path(self): self.sqs_consumer.receive_messages(queue, 1, mock_cb) # Verify callback function was called once with expected message attributes - mock_cb.assert_called_with(self.records) + mock_cb.assert_called_with(self.records, ANY) # An example using external send/receive methods + @unittest.skip @mock_sqs def test_write_message_valid(self): "Test the write_message method with a valid message" From 3f39966f8fe4f42c3e385ad79e04ba014fdc1b17 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 08:22:33 -0600 Subject: [PATCH 027/129] 1500-Removed unused conf variable. --- onestop-python-client/onestop/util/S3Utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index e2f2e32..eebafe9 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -68,7 +68,6 @@ class S3Utils: retrieve_inventory_results(vault_name, boto_client, job_id) Retrieve the results of an Amazon Glacier inventory-retrieval job """ - conf = None def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): self.access_key = access_key From 4cffc3884e7d41e074494ca6bd122f0ce4cde9eb Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 09:21:19 -0600 Subject: [PATCH 028/129] 1500-Removed unused var conf from classes. --- onestop-python-client/onestop/WebPublisher.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index 75ee99f..d04eacc 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -31,7 +31,6 @@ class WebPublisher: get_granules_onestop(self, uuid) Search for a granule in OneStop given its uuid """ - conf = None def __init__(self, registry_base_url, registry_username, registry_password, onestop_base_url, log_level="INFO", **kwargs): self.registry_base_url = registry_base_url From 8280a374901c62c68eea722ebd6a7c087a57ce1f Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 09:21:58 -0600 Subject: [PATCH 029/129] 1500-Changed mock tests to not load configs but use mock data. --- .../tests/util/S3MessageAdapterTest.py | 32 +++++------- .../tests/util/S3UtilsTest.py | 52 +++++++++---------- 2 files changed, 38 insertions(+), 46 deletions(-) diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/tests/util/S3MessageAdapterTest.py index a960737..671695a 100644 --- a/onestop-python-client/tests/util/S3MessageAdapterTest.py +++ b/onestop-python-client/tests/util/S3MessageAdapterTest.py @@ -1,8 +1,6 @@ import unittest -import yaml from moto import mock_s3 -from tests.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter @@ -54,30 +52,24 @@ class S3MessageAdapterTest(unittest.TestCase): def setUp(self): print("Set it up!") - with open(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml")) as f: - self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml")) as f: - self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../../config/credentials-template.yml")) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) + config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', + 'type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'log_level': 'DEBUG' + } - self.s3_utils = S3Utils(self.cred['sandbox']['access_key'], - self.cred['sandbox']['secret_key'], - "DEBUG") - self.s3ma = S3MessageAdapter(self.stream_conf['access_bucket'], - self.stream_conf['type'], - self.stream_conf['file_identifier_prefix'], - self.stream_conf['collection_id']) + self.s3_utils = S3Utils(**config_dict) + self.s3ma = S3MessageAdapter(**config_dict) - self.region = self.cloud_conf['s3_region'] - self.bucket = self.cloud_conf['s3_bucket'] + self.region = 'us-east-2' def tearDown(self): print("Tear it down!") - def test_parse_config(self): - self.assertFalse(self.stream_conf['collection_id'] == None) - @mock_s3 def test_transform(self): s3 = self.s3_utils.connect('s3', self.region) diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py index acb0af4..47c8ade 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/tests/util/S3UtilsTest.py @@ -1,7 +1,6 @@ import csv import unittest import uuid -import yaml from moto import mock_s3 from moto import mock_glacier @@ -13,20 +12,21 @@ class S3UtilsTest(unittest.TestCase): def setUp(self): print("Set it up!") - with open(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml")) as f: - self.stream_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml")) as f: - self.cloud_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(abspath_from_relative(__file__, "../../config/credentials-template.yml")) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) + config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', + 'type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'log_level': 'DEBUG' + } - self.s3_utils = S3Utils(self.cred['sandbox']['access_key'], - self.cred['sandbox']['secret_key'], - "DEBUG") + self.s3_utils = S3Utils(**config_dict) - self.region = self.cloud_conf['s3_region'] - self.region2 = self.region - self.bucket = self.cloud_conf['s3_bucket'] + self.region = 'us-east-2' + self.region2 = 'eu-north-1' + self.bucket = 'archive-testing-demo' @mock_s3 def test_get_uuid_metadata(self): @@ -54,7 +54,7 @@ def test_add_uuid_metadata(self): @mock_s3 def test_add_file_s3(self): - boto_client = self.s3_utils.connect("s3", None) + boto_client = self.s3_utils.connect('client', 's3', None) local_file = abspath_from_relative(__file__, "../data/file4.csv") s3_key = "csv/file4.csv" location = {'LocationConstraint': self.region} @@ -65,8 +65,8 @@ def test_add_file_s3(self): @mock_s3 def test_get_csv_s3(self): - boto_session = self.s3_utils.connect("session", None) - s3 = self.s3_utils.connect('s3', self.cloud_conf['s3_region']) + boto_session = self.s3_utils.connect('session', None, None) + s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} s3_key = "csv/file1.csv" s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) @@ -81,7 +81,7 @@ def test_get_csv_s3(self): @mock_s3 def test_read_bytes_s3(self): - boto_client = self.s3_utils.connect("s3", None) + boto_client = self.s3_utils.connect('client', 's3', None) s3_key = "csv/file1.csv" boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) boto_client.put_object(Bucket=self.bucket, Key=s3_key, Body="body") @@ -90,7 +90,7 @@ def test_read_bytes_s3(self): @mock_s3 def test_add_files(self): - boto_client = self.s3_utils.connect("s3", None) + boto_client = self.s3_utils.connect('client', 's3', None) local_files = ["file1_s3.csv", "file2.csv", "file3.csv"] location = {'LocationConstraint': self.region} boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) @@ -108,7 +108,7 @@ def test_s3_cross_region(self): key = "csv/file1.csv" # makes connection to low level s3 client - s3 = self.s3_utils.connect('s3', self.region) + s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) s3.put_object(Bucket=self.bucket, Key=key, Body="body") @@ -117,8 +117,8 @@ def test_s3_cross_region(self): file_data = self.s3_utils.read_bytes_s3(s3, self.bucket, key) # Redirecting upload to vault in second region - glacier = self.s3_utils.connect("glacier", self.region2) - vault_name = self.cloud_conf['vault_name'] + glacier = self.s3_utils.connect('client', 'glacier', self.region2) + vault_name = 'archive-vault-new' glacier.create_vault(vaultName=vault_name) print('vault name: ' + str(vault_name)) print('region name: ' + str(self.region2)) @@ -140,7 +140,7 @@ def test_s3_to_glacier(self): key = "csv/file1_s3.csv" # Create boto3 low level api connection - s3 = self.s3_utils.connect('s3', self.region) + s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) s3.put_object(Bucket=self.bucket, Key=key, Body="body") @@ -172,13 +172,13 @@ def test_s3_restore(self): @mock_glacier def test_retrieve_inventory(self): """ - Initiates job for archive retrieval. Takes 3-5 hours to complete + Initiates job for archive retrieval. Takes 3-5 hours to complete if not mocked. """ # Using glacier api initiates job and returns archive results # Connect to your glacier vault for retrieval - glacier = self.s3_utils.connect("glacier", self.region2) - vault_name = self.cloud_conf['vault_name'] + glacier = self.s3_utils.connect('client', 'glacier', self.region2) + vault_name = 'archive-vault-new' glacier.create_vault(vaultName=vault_name) @@ -193,7 +193,7 @@ def test_retrieve_inventory_results(self, jobid): """ # Connect to your glacier vault for retrieval - glacier = self.su.connect("glacier", self.su.conf['region']) + glacier = self.su.connect('client', 'glacier', self.su.conf['region']) vault_name = self.su.conf['vault_name'] # Retrieve the job results From c16302eaf845fbab4cfe1862163e97cab563182e Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 09:25:02 -0600 Subject: [PATCH 030/129] 1500-refactored S3Utils connect to take in type parameter instead of us assuming if they say "glacier" they mean a client of service type "glacier". Little clearer to the user and in the code. This allows boto to catch the error of wrong service name specified and it gives a nice list of choices. Added else statement too for cases user specifies a type we don't expect, will add tests to. --- onestop-python-client/onestop/util/S3Utils.py | 61 +++++++++---------- .../tests/SqsHandlersTest.py | 16 ++--- .../tests/extractor/CsbExtractorTest.py | 4 +- .../tests/util/S3MessageAdapterTest.py | 2 +- .../tests/util/S3UtilsTest.py | 6 +- .../tests/util/SqsConsumerTest.py | 18 +++--- 6 files changed, 51 insertions(+), 56 deletions(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index eebafe9..f1bb8e2 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -30,7 +30,7 @@ class S3Utils: Methods ------- connect(client_type, region) - connects to a boto3 client + connects to a boto3 service objectkey_exists(bucket, s3_key) checks to see if a s3 key path exists in a particular bucket @@ -78,46 +78,41 @@ def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): if wildargs: self.logger.error("There were extra constructor arguments: " + str(wildargs)) - def connect(self, client_type, region): + def connect(self, type, service_name, region): """ - Connects to a boto3 client + Connects to a boto3 of specified type using the credentials provided in the constructor. - :param client_type: str - boto client type in which you want to access + :param type: str + boto object type to return, see return type. + :param service_name: str + (Optional for session type) boto service name in which you want to access :param region: str - name of aws region you want to access + (Optional for session type) name of aws region you want to access - :return: boto3 client - dependent on the client_type parameter + :return: boto3 connection object + A boto3 connection object; Client, Session, or Resource. """ - - if client_type == "s3": - boto = boto3.client( - "s3", + type = type.lower() + if type == 'session': + return boto3.Session( aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, - region_name=region) - - if client_type == "s3_resource": - boto = boto3.resource( - "s3", - region_name=region, + ) + elif type == 'client': + return boto3.client( + service_name, aws_access_key_id=self.access_key, - aws_secret_access_key=self.secret_key) - - if client_type == "glacier": - boto = boto3.client( - "glacier", + aws_secret_access_key=self.secret_key, + region_name=region) + elif type == 'resource': + return boto3.resource( + service_name, region_name=region, aws_access_key_id=self.access_key, - aws_secret_access_key=self.secret_key) - - if client_type == "session": - boto = boto3.Session( - aws_access_key_id=self.access_key, - aws_secret_access_key=self.secret_key, + aws_secret_access_key=self.secret_key ) - return boto + else: + raise Exception('Unknown boto3 type of %s'%type) def objectkey_exists(self, bucket, s3_key): """ @@ -235,11 +230,11 @@ def upload_s3(self, boto_client, local_file, bucket, s3_key, overwrite): self.logger.error("File to upload was not found. Path: "+local_file) return False - def get_csv_s3(self, boto_client, bucket, key): + def get_csv_s3(self, boto_session, bucket, key): """ gets a csv file from s3 bucket using smart open library - :param boto_client: session + :param boto_session: session utilizes boto session type :param bucket: str name of bucket @@ -249,7 +244,7 @@ def get_csv_s3(self, boto_client, bucket, key): :return: smart open file """ url = "s3://" + bucket + "/" + key - sm_open_file = sm_open(url, 'r', transport_params={'session': boto_client}) + sm_open_file = sm_open(url, 'r', transport_params={'session': boto_session}) return sm_open_file def read_bytes_s3(self, boto_client, bucket, key): diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/tests/SqsHandlersTest.py index 3897169..4dd2c9e 100644 --- a/onestop-python-client/tests/SqsHandlersTest.py +++ b/onestop-python-client/tests/SqsHandlersTest.py @@ -120,12 +120,12 @@ def json(self): @patch('onestop.WebPublisher') def test_delete_handler_happy(self, mock_wp, mock_response): queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url sqs_queue = sqs_resource.Queue(queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) message = create_delete_message(self.region, self.bucket, self.key) sqs_client.send_message( QueueUrl=sqs_queue_url, @@ -146,12 +146,12 @@ def test_delete_handler_happy(self, mock_wp, mock_response): @patch('onestop.WebPublisher') def test_delete_handler_data_empty_ends_cb(self, mock_wp, mock_response): queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url sqs_queue = sqs_resource.Queue(queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) message = create_delete_message(self.region, self.bucket, self.key) sqs_client.send_message( QueueUrl=sqs_queue_url, @@ -172,12 +172,12 @@ def test_delete_handler_data_empty_ends_cb(self, mock_wp, mock_response): @patch('onestop.WebPublisher') def test_delete_handler_no_records_ends_cb(self, mock_wp, mock_response): queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url sqs_queue = sqs_resource.Queue(queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) sqs_client.send_message( QueueUrl=sqs_queue_url, MessageBody=json.dumps({"Message":'''{"Records":[]}'''}) @@ -197,12 +197,12 @@ def test_delete_handler_no_records_ends_cb(self, mock_wp, mock_response): @patch('onestop.WebPublisher') def test_delete_handler_eventName_not_delete_ends_cb(self, mock_wp, mock_response): queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.region) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url sqs_queue = sqs_resource.Queue(queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.region) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) sqs_client.send_message( QueueUrl=sqs_queue_url, MessageBody=json.dumps({"Message":'''{"Records":[{"eventName":"Unknown"}]}'''}) diff --git a/onestop-python-client/tests/extractor/CsbExtractorTest.py b/onestop-python-client/tests/extractor/CsbExtractorTest.py index 72bdbcc..2c3ff72 100644 --- a/onestop-python-client/tests/extractor/CsbExtractorTest.py +++ b/onestop-python-client/tests/extractor/CsbExtractorTest.py @@ -38,13 +38,13 @@ def test_is_not_csv(self): @mock_s3 def test_csb_SME_user_path(self): # Setup bucket and file to read - s3 = self.s3_utils.connect('s3', self.region) + s3 = self.s3_utils.connect('client', 's3', self.region) s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) self.s3_utils.upload_s3(s3, self.root_proj_path + '/' + self.key, self.bucket, self.key, True) self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key)) # This is how we would expect an external user to get the file. - sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect("session", None), self.bucket, self.key) + sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect('session', None, None), self.bucket, self.key) bounds_dict = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME') coords = bounds_dict["geospatial"] diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/tests/util/S3MessageAdapterTest.py index 671695a..925be2e 100644 --- a/onestop-python-client/tests/util/S3MessageAdapterTest.py +++ b/onestop-python-client/tests/util/S3MessageAdapterTest.py @@ -72,7 +72,7 @@ def tearDown(self): @mock_s3 def test_transform(self): - s3 = self.s3_utils.connect('s3', self.region) + s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} bucket = 'nesdis-ncei-csb-dev' key = 'csv/file1.csv' diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py index 47c8ade..83be8f2 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/tests/util/S3UtilsTest.py @@ -30,7 +30,7 @@ def setUp(self): @mock_s3 def test_get_uuid_metadata(self): - boto_client = self.s3_utils.connect("s3_resource", None) + boto_client = self.s3_utils.connect('resource', 's3', None) s3_key = "csv/file1.csv" location = {'LocationConstraint': self.region} @@ -42,7 +42,7 @@ def test_get_uuid_metadata(self): @mock_s3 def test_add_uuid_metadata(self): - boto_client = self.s3_utils.connect("s3_resource", self.region) + boto_client = self.s3_utils.connect('resource', 's3', self.region) s3_key = "csv/file1.csv" @@ -162,7 +162,7 @@ def test_s3_restore(self): days = 3 # use high level api - s3 = self.s3_utils.connect('s3_resource', self.region2) + s3 = self.s3_utils.connect('resource', 's3' , self.region2) location = {'LocationConstraint': self.region2} s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) s3.Object(self.bucket, key).put(Bucket=self.bucket, Key=key, Body="body") diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/tests/util/SqsConsumerTest.py index 7b5785f..ef50b20 100644 --- a/onestop-python-client/tests/util/SqsConsumerTest.py +++ b/onestop-python-client/tests/util/SqsConsumerTest.py @@ -63,7 +63,7 @@ def tearDown(self): @mock_sqs def test_connect(self): queue_name = 'test' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) expQueue = sqs_resource.create_queue(QueueName=queue_name) queue = self.sqs_consumer.connect(sqs_resource, queue_name) @@ -76,11 +76,11 @@ def test_receive_messages_no_records(self): # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url # Send a test message lacking Records field - sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) sqs_client.send_message( QueueUrl=sqs_queue_url, MessageBody= self.message_wo_records @@ -114,11 +114,11 @@ def test_receive_messages_callback_occurs(self): # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) sqs_client.send_message( QueueUrl=sqs_queue_url, MessageBody= self.message @@ -136,11 +136,11 @@ def test_happy_path(self): # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL queue_name = 'test_queue' - sqs_resource = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) queue = self.sqs_consumer.connect(sqs_resource, queue_name) #sqs_resource.create_queue(QueueName=queue_name) # Send a test message - sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) sqs_client.send_message( QueueUrl=queue.url, MessageBody= self.message @@ -156,8 +156,8 @@ def test_happy_path(self): @mock_sqs def test_write_message_valid(self): "Test the write_message method with a valid message" - sqs_client = self.s3_utils.connect('sqs', self.config_dict['s3_region']) - sqs = self.s3_utils.connect_to_resource('sqs', self.config_dict['s3_region']) + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) + sqs = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) queue = sqs.create_queue(QueueName='test-skype-sender') self.sqs_consumer.sqs_url = queue.url skype_message = 'Testing with a valid message' From f8c5bd0fb922e63030bb63b34dead24dc23d65c1 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 10:39:14 -0600 Subject: [PATCH 031/129] 1500-Changed moto dependency to moto[all] because of some issues with a moto version issue. https://github.com/spulec/moto/issues/3297 --- onestop-python-client/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 735dad7..9783885 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -8,5 +8,5 @@ argparse~=1.4.0 boto3~=1.15.11 requests~=2.24.0 botocore~=1.18.11 -moto==1.3.16.dev122 +moto[all]==2.0.5 undictify From ecfec1e61e411b67300c7050b7eb701c67a8b454 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 13:47:37 -0600 Subject: [PATCH 032/129] 1500-added tests for different connect types for S3Utils --- .../tests/util/S3UtilsTest.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py index 83be8f2..c002003 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/tests/util/S3UtilsTest.py @@ -2,7 +2,7 @@ import unittest import uuid -from moto import mock_s3 +from moto import mock_s3, mock_sqs from moto import mock_glacier from tests.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils @@ -28,6 +28,28 @@ def setUp(self): self.region2 = 'eu-north-1' self.bucket = 'archive-testing-demo' + @mock_sqs + def test_connect_session(self): + session = self.s3_utils.connect('Session', None, None) + + # No exception is called for unique method call + session.client('sqs') + session.resource('s3') + + @mock_sqs + def test_connect_client(self): + client = self.s3_utils.connect('Client', 'sqs', self.region) + + # No exception is called for unique method call + client.list_queues() + + @mock_sqs + def test_connect_resource(self): + resource = self.s3_utils.connect('Resource', 'sqs', self.region) + + # No exception is called for unique method call + resource.Queue(url='test') + @mock_s3 def test_get_uuid_metadata(self): boto_client = self.s3_utils.connect('resource', 's3', None) From e4c7fb46c6455e62bf9d8371ad35c7148c68904a Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 13:55:23 -0600 Subject: [PATCH 033/129] 1500-Changed class constructors checking extra arguments and logging of an error to warning. --- onestop-python-client/onestop/KafkaConsumer.py | 2 +- onestop-python-client/onestop/WebPublisher.py | 2 +- onestop-python-client/onestop/util/S3MessageAdapter.py | 2 +- onestop-python-client/onestop/util/S3Utils.py | 2 +- onestop-python-client/onestop/util/SqsConsumer.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 76078cc..747b0e4 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -101,7 +101,7 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.error("There were extra constructor arguments: " + str(wildargs)) + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def register_client(self): """ diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index d04eacc..7b1c6bd 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -42,7 +42,7 @@ def __init__(self, registry_base_url, registry_username, registry_password, ones self.logger.info("Initializing " + self.__class__.__name__) if kwargs: - self.logger.info("There were extra constructor arguments: " + str(kwargs)) + self.logger.warning("There were extra constructor arguments: " + str(kwargs)) def publish_registry(self, metadata_type, uuid, payload, method): """ diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index 1dda78c..6bd832d 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -59,7 +59,7 @@ def __init__(self, access_bucket, type, file_id_prefix, collection_id, log_leve self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.error("There were extra constructor arguments: " + str(wildargs)) + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def transform(self, recs): """ diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index f1bb8e2..0f86e2b 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -76,7 +76,7 @@ def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.error("There were extra constructor arguments: " + str(wildargs)) + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def connect(self, type, service_name, region): """ diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index 4d97c34..39356da 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -30,7 +30,7 @@ def __init__(self, log_level = 'INFO', **wildargs): self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.error("There were extra constructor arguments: " + str(wildargs)) + self.logger.warning("There were extra constructor arguments: " + str(wildargs)) def connect(self, sqs_resource, sqs_queue_name): """ From f5370eacd32992af4fc3b8a59ef15b1975a0cc77 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 11 May 2021 15:23:38 -0600 Subject: [PATCH 034/129] 1500-Moved unit tests to tests/unit and integration tests to tests/integration. Change circle ci config to run all onestop-python-client tests. --- .circleci/config.yml | 10 ++++++++-- onestop-python-client/{tests => test}/__init__.py | 0 onestop-python-client/{tests => test}/data/file1.csv | 0 .../{tests => test}/data/file1_s3.csv | 0 onestop-python-client/{tests => test}/data/file2.csv | 0 onestop-python-client/{tests => test}/data/file3.csv | 0 onestop-python-client/{tests => test}/data/file4.csv | 0 onestop-python-client/test/integration/__init__.py | 0 .../{tests => test/integration}/test_WebPublisher.py | 4 ++-- onestop-python-client/test/unit/__init__.py | 0 onestop-python-client/test/unit/extractor/__init__.py | 0 .../unit/extractor/test_CsbExtractor.py} | 9 +++++---- .../unit/test_KafkaConsumer.py} | 2 +- .../unit/test_KafkaPublisher.py} | 2 +- .../unit/test_SqsHandlers.py} | 6 ++---- .../unit/test_WebPublisher.py} | 2 +- onestop-python-client/test/unit/util/__init__.py | 0 .../unit/util/test_S3MessageAdapter.py} | 0 .../S3UtilsTest.py => test/unit/util/test_S3Utils.py} | 6 +++--- .../unit/util/test_SqsConsumer.py} | 0 onestop-python-client/{tests => test}/utils.py | 0 onestop-python-client/tests/util/IntegrationTest.py | 1 - 22 files changed, 23 insertions(+), 19 deletions(-) rename onestop-python-client/{tests => test}/__init__.py (100%) rename onestop-python-client/{tests => test}/data/file1.csv (100%) rename onestop-python-client/{tests => test}/data/file1_s3.csv (100%) rename onestop-python-client/{tests => test}/data/file2.csv (100%) rename onestop-python-client/{tests => test}/data/file3.csv (100%) rename onestop-python-client/{tests => test}/data/file4.csv (100%) create mode 100644 onestop-python-client/test/integration/__init__.py rename onestop-python-client/{tests => test/integration}/test_WebPublisher.py (98%) create mode 100644 onestop-python-client/test/unit/__init__.py create mode 100644 onestop-python-client/test/unit/extractor/__init__.py rename onestop-python-client/{tests/extractor/CsbExtractorTest.py => test/unit/extractor/test_CsbExtractor.py} (92%) rename onestop-python-client/{tests/KafkaConsumerTest.py => test/unit/test_KafkaConsumer.py} (99%) rename onestop-python-client/{tests/KafkaPublisherTest.py => test/unit/test_KafkaPublisher.py} (99%) rename onestop-python-client/{tests/SqsHandlersTest.py => test/unit/test_SqsHandlers.py} (98%) rename onestop-python-client/{tests/test_WebPublisher_unit.py => test/unit/test_WebPublisher.py} (99%) create mode 100644 onestop-python-client/test/unit/util/__init__.py rename onestop-python-client/{tests/util/S3MessageAdapterTest.py => test/unit/util/test_S3MessageAdapter.py} (100%) rename onestop-python-client/{tests/util/S3UtilsTest.py => test/unit/util/test_S3Utils.py} (97%) rename onestop-python-client/{tests/util/SqsConsumerTest.py => test/unit/util/test_SqsConsumer.py} (100%) rename onestop-python-client/{tests => test}/utils.py (100%) delete mode 100644 onestop-python-client/tests/util/IntegrationTest.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 99f7692..dbaddb4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -107,10 +107,16 @@ jobs: app-dir: ./onestop-python-client pkg-manager: pip - run: - name: "Run util tests" + name: "Run unit tests" command: > cd onestop-python-client/; - python -m unittest tests/util/*.py + python -m unittest discover -s test/unit + + - run: + name: "Run integration tests" + command: > + cd onestop-python-client/; + python -m unittest discover -s test/integration orbs: slack: circleci/slack@3.4.2 diff --git a/onestop-python-client/tests/__init__.py b/onestop-python-client/test/__init__.py similarity index 100% rename from onestop-python-client/tests/__init__.py rename to onestop-python-client/test/__init__.py diff --git a/onestop-python-client/tests/data/file1.csv b/onestop-python-client/test/data/file1.csv similarity index 100% rename from onestop-python-client/tests/data/file1.csv rename to onestop-python-client/test/data/file1.csv diff --git a/onestop-python-client/tests/data/file1_s3.csv b/onestop-python-client/test/data/file1_s3.csv similarity index 100% rename from onestop-python-client/tests/data/file1_s3.csv rename to onestop-python-client/test/data/file1_s3.csv diff --git a/onestop-python-client/tests/data/file2.csv b/onestop-python-client/test/data/file2.csv similarity index 100% rename from onestop-python-client/tests/data/file2.csv rename to onestop-python-client/test/data/file2.csv diff --git a/onestop-python-client/tests/data/file3.csv b/onestop-python-client/test/data/file3.csv similarity index 100% rename from onestop-python-client/tests/data/file3.csv rename to onestop-python-client/test/data/file3.csv diff --git a/onestop-python-client/tests/data/file4.csv b/onestop-python-client/test/data/file4.csv similarity index 100% rename from onestop-python-client/tests/data/file4.csv rename to onestop-python-client/test/data/file4.csv diff --git a/onestop-python-client/test/integration/__init__.py b/onestop-python-client/test/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/tests/test_WebPublisher.py b/onestop-python-client/test/integration/test_WebPublisher.py similarity index 98% rename from onestop-python-client/tests/test_WebPublisher.py rename to onestop-python-client/test/integration/test_WebPublisher.py index c81a7de..9263938 100644 --- a/onestop-python-client/tests/test_WebPublisher.py +++ b/onestop-python-client/test/integration/test_WebPublisher.py @@ -56,8 +56,8 @@ class WebPublisherTest(unittest.TestCase): def setUpClass(cls): print("Set it up!") - cred_loc = "../config/credentials.yml" - conf_loc = "../config/csb-data-stream-config-template.yml" + cred_loc = "config/credentials.yml" + conf_loc = "config/csb-data-stream-config-template.yml" with open(cred_loc) as f: creds = yaml.load(f, Loader=yaml.FullLoader) diff --git a/onestop-python-client/test/unit/__init__.py b/onestop-python-client/test/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/extractor/__init__.py b/onestop-python-client/test/unit/extractor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/tests/extractor/CsbExtractorTest.py b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py similarity index 92% rename from onestop-python-client/tests/extractor/CsbExtractorTest.py rename to onestop-python-client/test/unit/extractor/test_CsbExtractor.py index 2c3ff72..415bb26 100644 --- a/onestop-python-client/tests/extractor/CsbExtractorTest.py +++ b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py @@ -11,9 +11,10 @@ def setUp(self): print("Set it up!") self.root_proj_path = os.getcwd() self.assertIsNotNone(self.root_proj_path) - self.key = "tests/data/file4.csv" - # Use open instead of our methodfor simplicity and reliability, plus not testing our code here. - self.file_obj = open(self.root_proj_path + '/' + self.key) + self.data_file_path = os.getcwd() + '/test/data/file4.csv' + self.key = "file4.csv" + # Use open instead of our method because we aren't testing our code here. + self.file_obj = open(self.data_file_path) config_dict = { "access_key": "test_access_key", @@ -40,7 +41,7 @@ def test_csb_SME_user_path(self): # Setup bucket and file to read s3 = self.s3_utils.connect('client', 's3', self.region) s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) - self.s3_utils.upload_s3(s3, self.root_proj_path + '/' + self.key, self.bucket, self.key, True) + self.s3_utils.upload_s3(s3, self.data_file_path, self.bucket, self.key, True) self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key)) # This is how we would expect an external user to get the file. diff --git a/onestop-python-client/tests/KafkaConsumerTest.py b/onestop-python-client/test/unit/test_KafkaConsumer.py similarity index 99% rename from onestop-python-client/tests/KafkaConsumerTest.py rename to onestop-python-client/test/unit/test_KafkaConsumer.py index 1246789..b119e9a 100644 --- a/onestop-python-client/tests/KafkaConsumerTest.py +++ b/onestop-python-client/test/unit/test_KafkaConsumer.py @@ -4,7 +4,7 @@ from onestop.KafkaConsumer import KafkaConsumer from confluent_kafka.schema_registry import SchemaRegistryClient -class KafkaConsumerTest(unittest.TestCase): +class test_KafkaConsumer(unittest.TestCase): kp = None conf_w_security = None conf_wo_security = None diff --git a/onestop-python-client/tests/KafkaPublisherTest.py b/onestop-python-client/test/unit/test_KafkaPublisher.py similarity index 99% rename from onestop-python-client/tests/KafkaPublisherTest.py rename to onestop-python-client/test/unit/test_KafkaPublisher.py index 643d4f5..1c9497b 100644 --- a/onestop-python-client/tests/KafkaPublisherTest.py +++ b/onestop-python-client/test/unit/test_KafkaPublisher.py @@ -5,7 +5,7 @@ from unittest.mock import ANY, patch, MagicMock from confluent_kafka.schema_registry import SchemaRegistryClient -class KafkaPublisherTest(unittest.TestCase): +class test_KafkaPublisher(unittest.TestCase): kp = None conf_w_security = None conf_wo_security = None diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/test/unit/test_SqsHandlers.py similarity index 98% rename from onestop-python-client/tests/SqsHandlersTest.py rename to onestop-python-client/test/unit/test_SqsHandlers.py index 4dd2c9e..b881fc9 100644 --- a/onestop-python-client/tests/SqsHandlersTest.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -4,15 +4,14 @@ from unittest import mock from unittest.mock import patch from moto import mock_sqs -from tests.utils import abspath_from_relative, create_delete_message +from test.utils import abspath_from_relative, create_delete_message from onestop.WebPublisher import WebPublisher from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.util.SqsConsumer import SqsConsumer from onestop.util.SqsHandlers import create_delete_handler - -class SqsHandlerTest(unittest.TestCase): +class test_SqsHandler(unittest.TestCase): def setUp(self): print("Set it up!") @@ -94,7 +93,6 @@ def json(self): } return MockResponse(onestop_search_response, 200) - def mocked_search_response_data_empty(*args, **kwargs): class MockResponse: def __init__(self, json_data, status_code): diff --git a/onestop-python-client/tests/test_WebPublisher_unit.py b/onestop-python-client/test/unit/test_WebPublisher.py similarity index 99% rename from onestop-python-client/tests/test_WebPublisher_unit.py rename to onestop-python-client/test/unit/test_WebPublisher.py index 4a97f80..af0802f 100644 --- a/onestop-python-client/tests/test_WebPublisher_unit.py +++ b/onestop-python-client/test/unit/test_WebPublisher.py @@ -6,7 +6,7 @@ from moto import mock_s3 from onestop.WebPublisher import WebPublisher -class WebPublisherTest(unittest.TestCase): +class test_WebPublisher(unittest.TestCase): username="admin" password="a_password" uuid = "9f0a5ff2-fcc0-5bcb-a225-024b669c9bba" diff --git a/onestop-python-client/test/unit/util/__init__.py b/onestop-python-client/test/unit/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py similarity index 100% rename from onestop-python-client/tests/util/S3MessageAdapterTest.py rename to onestop-python-client/test/unit/util/test_S3MessageAdapter.py diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/test/unit/util/test_S3Utils.py similarity index 97% rename from onestop-python-client/tests/util/S3UtilsTest.py rename to onestop-python-client/test/unit/util/test_S3Utils.py index c002003..70f3385 100644 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -4,7 +4,7 @@ from moto import mock_s3, mock_sqs from moto import mock_glacier -from tests.utils import abspath_from_relative +from test.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils class S3UtilsTest(unittest.TestCase): @@ -77,7 +77,7 @@ def test_add_uuid_metadata(self): @mock_s3 def test_add_file_s3(self): boto_client = self.s3_utils.connect('client', 's3', None) - local_file = abspath_from_relative(__file__, "../data/file4.csv") + local_file = abspath_from_relative(__file__, "../../data/file4.csv") s3_key = "csv/file4.csv" location = {'LocationConstraint': self.region} boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) @@ -119,7 +119,7 @@ def test_add_files(self): overwrite = True for file in local_files: - local_file = abspath_from_relative(__file__, "../data/" + file) + local_file = abspath_from_relative(__file__, "../../data/" + file) s3_file = "csv/" + file self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_file, overwrite)) diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/test/unit/util/test_SqsConsumer.py similarity index 100% rename from onestop-python-client/tests/util/SqsConsumerTest.py rename to onestop-python-client/test/unit/util/test_SqsConsumer.py diff --git a/onestop-python-client/tests/utils.py b/onestop-python-client/test/utils.py similarity index 100% rename from onestop-python-client/tests/utils.py rename to onestop-python-client/test/utils.py diff --git a/onestop-python-client/tests/util/IntegrationTest.py b/onestop-python-client/tests/util/IntegrationTest.py deleted file mode 100644 index 381e4d7..0000000 --- a/onestop-python-client/tests/util/IntegrationTest.py +++ /dev/null @@ -1 +0,0 @@ -#TBD \ No newline at end of file From c93bab294aca7990a6927335b5e69550a1a20cb6 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 11:56:23 -0600 Subject: [PATCH 035/129] 1500-Fixed one of the test_S3Utils tests that was commented out. Removed some blank lines from S3Utils. --- onestop-python-client/onestop/util/S3Utils.py | 1 - .../test/unit/util/test_S3Utils.py | 40 ++++++++++++++----- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index 0f86e2b..d5de564 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -390,7 +390,6 @@ def s3_restore(self, boto_client, bucket_name, key, days): # returns status of object retrieval return obj.restore - def retrieve_inventory(self, boto_client, vault_name): """ Initiate an Amazon Glacier inventory-retrieval job diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py index 70f3385..6b3321f 100644 --- a/onestop-python-client/test/unit/util/test_S3Utils.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -1,11 +1,16 @@ import csv import unittest import uuid +import json +from unittest import mock from moto import mock_s3, mock_sqs from moto import mock_glacier from test.utils import abspath_from_relative from onestop.util.S3Utils import S3Utils +from boto.glacier.layer1 import Layer1 +from botocore.response import StreamingBody +from io import StringIO class S3UtilsTest(unittest.TestCase): @@ -203,26 +208,39 @@ def test_retrieve_inventory(self): vault_name = 'archive-vault-new' glacier.create_vault(vaultName=vault_name) - response = self.s3_utils.retrieve_inventory(glacier, vault_name) - self.assertTrue(response['jobId']!= None) + print('jobid %s'%response['jobId']) + self.assertTrue(response['jobId'] != None) - ''' - Excluding for now because it's an asynchronous test - def test_retrieve_inventory_results(self, jobid): + @mock_glacier + @mock_s3 + def test_retrieve_inventory_results(self): """ Once the job has been completed, use the job id to retrieve archive results """ # Connect to your glacier vault for retrieval - glacier = self.su.connect('client', 'glacier', self.su.conf['region']) - vault_name = self.su.conf['vault_name'] + glacier = mock.Mock(spec=Layer1)#self.s3_utils.connect('client', 'glacier', self.region) + vault_name = 'archive-vault-new' + glacier.create_vault(vaultName=vault_name) + + body_json = {'Body': [{'test':'value'}]} + body_encoded = json.dumps(body_json)#.encode("utf-16") - # Retrieve the job results - inventory = self.su.retrieve_inventory_results(vault_name, glacier, jobid) + body = StreamingBody( + StringIO(str(body_encoded)), + len(str(body_encoded)) + ) + + mocked_response = { + 'body': body + } + glacier.get_job_output.return_value = mocked_response + with mock.patch('boto.glacier.job.tree_hash_from_str') as t: + t.return_value = 'tree_hash' + inventory = self.s3_utils.retrieve_inventory_results(vault_name, glacier, 'ASDF78') - self.assertTrue(inventory != None) - ''' + self.assertEqual(body_json, inventory) @mock_s3 def test_extra_parameters_constructor(self): From 32a300a94d27d5fe0582b42da01f9d912c717eab Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 13:46:01 -0600 Subject: [PATCH 036/129] 1500-Updated python-client requirements boto3. Seems to be using an old one. Trying to figure out how to force it to a newer one. --- onestop-python-client/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 9783885..06a3f5b 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -5,7 +5,7 @@ smart-open PyYAML~=5.3.1 setuptools~=49.2.0 argparse~=1.4.0 -boto3~=1.15.11 +boto3~=1.17.71 requests~=2.24.0 botocore~=1.18.11 moto[all]==2.0.5 From a3f6e96795dc167a2ad6906acd329f28ce8fcf6f Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 13:59:36 -0600 Subject: [PATCH 037/129] 1500-Updated python-client requirements botocore to 1.20.71 due to conflict between botocore 1.18.11 and moto 2.0.5 --- onestop-python-client/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 06a3f5b..9a38faa 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -7,6 +7,6 @@ setuptools~=49.2.0 argparse~=1.4.0 boto3~=1.17.71 requests~=2.24.0 -botocore~=1.18.11 +botocore~=1.20.71 moto[all]==2.0.5 undictify From 8fca7a9e55452e5238f1906b568c5729b9fe7e0f Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 14:06:52 -0600 Subject: [PATCH 038/129] 1500-Changed circleci config for python client to try and update boto --- .circleci/config.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index dbaddb4..c86d021 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -106,12 +106,16 @@ jobs: - python/install-packages: app-dir: ./onestop-python-client pkg-manager: pip + # This is to update boto + - run: pip -V + - run: pip list boto3 + - run: pip install --upgrade --user boto3 + - run: pip3 install boto - run: name: "Run unit tests" command: > cd onestop-python-client/; python -m unittest discover -s test/unit - - run: name: "Run integration tests" command: > From bd38748a45206b34da3b2da62e4920e6e7606ca7 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 14:15:53 -0600 Subject: [PATCH 039/129] 1500-added region_name to S3Utils connect for session. Suspect it was using my local aws config when region was not specified. --- onestop-python-client/onestop/util/S3Utils.py | 4 +++- .../test/unit/extractor/test_CsbExtractor.py | 2 +- onestop-python-client/test/unit/util/test_S3Utils.py | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index d5de564..cbc8f24 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -97,13 +97,15 @@ def connect(self, type, service_name, region): return boto3.Session( aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, + region_name=region ) elif type == 'client': return boto3.client( service_name, aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, - region_name=region) + region_name=region + ) elif type == 'resource': return boto3.resource( service_name, diff --git a/onestop-python-client/test/unit/extractor/test_CsbExtractor.py b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py index 415bb26..cba1bf7 100644 --- a/onestop-python-client/test/unit/extractor/test_CsbExtractor.py +++ b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py @@ -45,7 +45,7 @@ def test_csb_SME_user_path(self): self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key)) # This is how we would expect an external user to get the file. - sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect('session', None, None), self.bucket, self.key) + sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect('session', None, self.region), self.bucket, self.key) bounds_dict = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME') coords = bounds_dict["geospatial"] diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py index 6b3321f..f6bdd91 100644 --- a/onestop-python-client/test/unit/util/test_S3Utils.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -35,7 +35,7 @@ def setUp(self): @mock_sqs def test_connect_session(self): - session = self.s3_utils.connect('Session', None, None) + session = self.s3_utils.connect('Session', None, self.region) # No exception is called for unique method call session.client('sqs') @@ -92,7 +92,7 @@ def test_add_file_s3(self): @mock_s3 def test_get_csv_s3(self): - boto_session = self.s3_utils.connect('session', None, None) + boto_session = self.s3_utils.connect('session', None, self.region) s3 = self.s3_utils.connect('client', 's3', self.region) location = {'LocationConstraint': self.region} s3_key = "csv/file1.csv" From 6a101f8188e37b4a557f359d2a9d2801f2741da4 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 14:19:19 -0600 Subject: [PATCH 040/129] 1500-Changing python-client circleci config to see if need to tell it to install boto for pip3 every time. --- .circleci/config.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c86d021..a8ad73f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -106,11 +106,6 @@ jobs: - python/install-packages: app-dir: ./onestop-python-client pkg-manager: pip - # This is to update boto - - run: pip -V - - run: pip list boto3 - - run: pip install --upgrade --user boto3 - - run: pip3 install boto - run: name: "Run unit tests" command: > From 927fb7e00bdf80b88ec2a0959f1a61c1a6526874 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 14:27:46 -0600 Subject: [PATCH 041/129] 1500-updated python-client requirements to install boto --- onestop-python-client/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 9a38faa..036e217 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -5,6 +5,7 @@ smart-open PyYAML~=5.3.1 setuptools~=49.2.0 argparse~=1.4.0 +boto~=2.49.0 boto3~=1.17.71 requests~=2.24.0 botocore~=1.20.71 From 12374a046e48c43ce5c5d14b8b08ef8757675d44 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 12 May 2021 17:59:20 -0600 Subject: [PATCH 042/129] 1500-Changed python-client integration test(s) to use environment variables if credentials yml doesn't exist. Commented out integration task in circleCI config. Since cannot reach registry on cedardevs. --- .circleci/config.yml | 11 ++++---- .../test/integration/test_WebPublisher.py | 25 +++++++++++++------ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a8ad73f..d475399 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -111,11 +111,12 @@ jobs: command: > cd onestop-python-client/; python -m unittest discover -s test/unit - - run: - name: "Run integration tests" - command: > - cd onestop-python-client/; - python -m unittest discover -s test/integration +# This is commented out only because the OneStop we have running on cedardevs doesn't have its registry exposed. You can only reach it via sshing to another machine. +# - run: +# name: "Run integration tests" +# command: > +# cd onestop-python-client/; +# python -m unittest discover -s test/integration orbs: slack: circleci/slack@3.4.2 diff --git a/onestop-python-client/test/integration/test_WebPublisher.py b/onestop-python-client/test/integration/test_WebPublisher.py index 9263938..04211dc 100644 --- a/onestop-python-client/test/integration/test_WebPublisher.py +++ b/onestop-python-client/test/integration/test_WebPublisher.py @@ -2,8 +2,10 @@ import json import unittest import time +import os.path from onestop.WebPublisher import WebPublisher +from os import path class WebPublisherTest(unittest.TestCase): wp = None @@ -59,13 +61,22 @@ def setUpClass(cls): cred_loc = "config/credentials.yml" conf_loc = "config/csb-data-stream-config-template.yml" - with open(cred_loc) as f: - creds = yaml.load(f, Loader=yaml.FullLoader) - - registry_username = creds['registry']['username'] - registry_password = creds['registry']['password'] - access_key = creds['sandbox']['access_key'] - access_secret = creds['sandbox']['secret_key'] + if path.exists(cred_loc): + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Credentials file doesn't exist at '%s', using environment variables."%cred_loc) + registry_username = os.environ.get('REGISTRY_USERNAME') + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + if registry_username == None: + raise Exception("REGISTRY_USERNAME not defined as env variable. Credentials file at '%s' doesn't exist." % cred_loc) with open(conf_loc) as f: conf = yaml.load(f, Loader=yaml.FullLoader) From eb0646d103d933ef30b0ba6fb2b98ca5fb8edb41 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 13 May 2021 11:58:43 -0600 Subject: [PATCH 043/129] 1500-Removed redundant log_level fields in all the configs. Put into credentials template. --- onestop-python-client/config/aws-util-config-dev.yml | 1 - onestop-python-client/config/credentials-template.yml | 3 +-- .../config/csb-data-stream-config-template.yml | 1 - scripts/config/aws-util-config-dev.yml | 1 - scripts/config/aws-util-config-test.yml | 1 - scripts/config/csb-data-stream-config.yml | 1 - scripts/config/kafka-publisher-config-dev.yml | 1 - scripts/config/web-publisher-config-dev.yml | 1 - scripts/config/web-publisher-config-local.yml | 1 - 9 files changed, 1 insertion(+), 10 deletions(-) diff --git a/onestop-python-client/config/aws-util-config-dev.yml b/onestop-python-client/config/aws-util-config-dev.yml index c30683e..2fdb5c1 100644 --- a/onestop-python-client/config/aws-util-config-dev.yml +++ b/onestop-python-client/config/aws-util-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs diff --git a/onestop-python-client/config/credentials-template.yml b/onestop-python-client/config/credentials-template.yml index 006e175..f94c70b 100644 --- a/onestop-python-client/config/credentials-template.yml +++ b/onestop-python-client/config/credentials-template.yml @@ -9,5 +9,4 @@ registry: username: rw_user password: rw_user_pwd - - +log_level: INFO \ No newline at end of file diff --git a/onestop-python-client/config/csb-data-stream-config-template.yml b/onestop-python-client/config/csb-data-stream-config-template.yml index 56bad99..8c2d4de 100644 --- a/onestop-python-client/config/csb-data-stream-config-template.yml +++ b/onestop-python-client/config/csb-data-stream-config-template.yml @@ -1,4 +1,3 @@ -log_level: INFO format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER type: COLLECTION diff --git a/scripts/config/aws-util-config-dev.yml b/scripts/config/aws-util-config-dev.yml index e054f49..9102be0 100644 --- a/scripts/config/aws-util-config-dev.yml +++ b/scripts/config/aws-util-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs diff --git a/scripts/config/aws-util-config-test.yml b/scripts/config/aws-util-config-test.yml index 6aac07a..9de4618 100644 --- a/scripts/config/aws-util-config-test.yml +++ b/scripts/config/aws-util-config-test.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: DEBUG # AWS config values sqs_url: 'test-queue' diff --git a/scripts/config/csb-data-stream-config.yml b/scripts/config/csb-data-stream-config.yml index 24a7cf6..06a45b6 100644 --- a/scripts/config/csb-data-stream-config.yml +++ b/scripts/config/csb-data-stream-config.yml @@ -1,4 +1,3 @@ -log_level: INFO format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER type: COLLECTION diff --git a/scripts/config/kafka-publisher-config-dev.yml b/scripts/config/kafka-publisher-config-dev.yml index 85a66f3..bd5af58 100644 --- a/scripts/config/kafka-publisher-config-dev.yml +++ b/scripts/config/kafka-publisher-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: DEBUG # COLLECTION or GRANULE metadata_type: GRANULE diff --git a/scripts/config/web-publisher-config-dev.yml b/scripts/config/web-publisher-config-dev.yml index 9b08391..387d252 100644 --- a/scripts/config/web-publisher-config-dev.yml +++ b/scripts/config/web-publisher-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # COLLECTION or GRANULE metadata_type: granule diff --git a/scripts/config/web-publisher-config-local.yml b/scripts/config/web-publisher-config-local.yml index 32db955..3ce7d88 100644 --- a/scripts/config/web-publisher-config-local.yml +++ b/scripts/config/web-publisher-config-local.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # COLLECTION or GRANULE metadata_type: granule From aa0b9a9ce25f2d27928c83b9278e0597e1c1172a Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 14 May 2021 09:55:39 -0600 Subject: [PATCH 044/129] 1500-Changed the kafka config in the scripts for collection and granule _topic_produce to _topic_publish, as it is in the constructor for KafkaPublisher and KafkaConsumer. --- scripts/config/kafka-publisher-config-dev.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/config/kafka-publisher-config-dev.yml b/scripts/config/kafka-publisher-config-dev.yml index bd5af58..8a94bf3 100644 --- a/scripts/config/kafka-publisher-config-dev.yml +++ b/scripts/config/kafka-publisher-config-dev.yml @@ -6,8 +6,8 @@ metadata_type: GRANULE # Kafka config values brokers: onestop-dev-cp-kafka:9092 schema_registry: http://onestop-dev-cp-schema-registry:8081 -collection_topic_produce: psi-granules-by-collection -granule_topic_produce: psi-granule-parsed +collection_topic_publish: psi-granules-by-collection +granule_topic_publish: psi-granule-parsed collection_topic_consume: psi-collection-input-unknown granule_topic_consume: psi-granule-input-unknown group_id: sme-test From 3b14757e303159f62ea07cff5c9590c990d9033f Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 13:23:23 -0600 Subject: [PATCH 045/129] 1500-Changed exception message to first be a string then passed into exception. Otherwise wasn't evaluating the variable within message. --- onestop-python-client/test/integration/test_WebPublisher.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/test/integration/test_WebPublisher.py b/onestop-python-client/test/integration/test_WebPublisher.py index 04211dc..5c7935a 100644 --- a/onestop-python-client/test/integration/test_WebPublisher.py +++ b/onestop-python-client/test/integration/test_WebPublisher.py @@ -76,7 +76,8 @@ def setUpClass(cls): access_key = os.environ.get("ACCESS_KEY") access_secret = os.environ.get("SECRET_KEY") if registry_username == None: - raise Exception("REGISTRY_USERNAME not defined as env variable. Credentials file at '%s' doesn't exist." % cred_loc) + msg = "REGISTRY_USERNAME not defined as env variable. Credentials file at '" + cred_loc + "' doesn't exist." + raise Exception(msg) with open(conf_loc) as f: conf = yaml.load(f, Loader=yaml.FullLoader) From ebf71ee681da93ea00aad68d9e229c0d132f738a Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 13:40:41 -0600 Subject: [PATCH 046/129] 1500-Adjusted exception thrown in S3Utils.connect for invalid type, wasn't printing value of variable. Added test for that negative case. --- onestop-python-client/onestop/util/S3Utils.py | 2 +- onestop-python-client/test/unit/util/test_S3Utils.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index cbc8f24..d63e654 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -114,7 +114,7 @@ def connect(self, type, service_name, region): aws_secret_access_key=self.secret_key ) else: - raise Exception('Unknown boto3 type of %s'%type) + raise Exception('Unknown boto3 type of "%s"'%(type)) def objectkey_exists(self, bucket, s3_key): """ diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py index f6bdd91..91b90a3 100644 --- a/onestop-python-client/test/unit/util/test_S3Utils.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -55,6 +55,11 @@ def test_connect_resource(self): # No exception is called for unique method call resource.Queue(url='test') + @mock_sqs + def test_connect_exception_for_invalid_connection_type(self): + with self.assertRaises(Exception): + self.s3_utils.connect('junk', 'sqs', self.region) + @mock_s3 def test_get_uuid_metadata(self): boto_client = self.s3_utils.connect('resource', 's3', None) From 053df0599094d127715449a8031a872aaa9d9049 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 14:22:37 -0600 Subject: [PATCH 047/129] 1500-Fixed log but in SqsConsumer of microseconds process time being multiplied instead of divided to get seconds. --- onestop-python-client/onestop/util/SqsConsumer.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index 39356da..1972cc6 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -98,15 +98,12 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): else: self.logger.info("s3 event message without 'Records' content received.") - sqs_message.delete() - - self.logger.info("The SQS message has been deleted.") - dt_end = datetime.now(tz=timezone.utc) processing_time = dt_end - dt_start + self.logger.info("Completed processing the message in %s seconds."%(processing_time.microseconds / 1000000)) - self.logger.info("Completed processing message (s):" + str(processing_time.microseconds * 1000)) - + sqs_message.delete() + self.logger.info("The SQS message has been deleted.") except: self.logger.exception( "An exception was thrown while processing a message, but this program will continue. The " From 5c66efa79d9ec4ee79a97d178a515a912ee7c896 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 15:11:25 -0600 Subject: [PATCH 048/129] 2500-Added SqsHandlers create_upload_handler back with tests. Didn't realize was used, looked obsolete. --- .../onestop/util/SqsHandlers.py | 57 ++++++++- .../test/unit/test_SqsHandlers.py | 111 +++++++++++++++++- 2 files changed, 165 insertions(+), 3 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index ce0f010..894f8b5 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -11,7 +11,7 @@ def create_delete_handler(web_publisher): """ def delete(records, log_level='INFO'): - logger = ClientLogger.get_logger('SqsHandlers', log_level, False) + logger = ClientLogger.get_logger('SqsHandlers.create_delete_handler.delete', log_level, False) logger.info("In create_delete_handler.delete() handler") logger.debug("Records: %s"%records) @@ -36,9 +36,62 @@ def delete(records, log_level='INFO'): if len(response_json['data']) != 0: granule_uuid = response_json['data'][0]['id'] response = web_publisher.delete_registry('granule', granule_uuid) - print('delete_registry response: %s'%response) + logger.debug('web_publisher.delete_registry response: %s'%response) return response logger.warning("OneStop search response has no 'data' field. Response=%s"%response_json) return delete + +def create_upload_handler(web_publisher, s3_utils, s3_message_adapter): + """ + Creates a upload function handler to be used with SqsConsumer.receive_messages. + + The upload handler function checks the object for a UUID and if one is not found, it will create one for it. + + :param: web_publisher: WebPublisher object + :param: s3_utils: S3Utils object + :param: s3ma: S3MessageAdapter object + + """ + def upload(records, log_level='INFO'): + logger = ClientLogger.get_logger('SqsHandlers.create_upload_handler.upload', log_level, False) + logger.info("In create_upload_handler.upload() handler") + logger.debug("Records: %s"%records) + + rec = records[0] + s3_key = rec['s3']['object']['key'] + logger.info("Received message for " + s3_key) + logger.info("Event type: " + rec['eventName']) + bucket = rec['s3']['bucket']['name'] + logger.info("BUCKET: %s"%bucket) + s3_resource = s3_utils.connect("s3_resource", None) + + # Fetch the object to get the uuid + object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) + if object_uuid is not None: + logger.info("Retrieved object-uuid: %s"%object_uuid) + else: + logger.info("Adding uuid") + # Can't add uuid to glacier and should be copied over + if "backup" not in bucket: + object_uuid = s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) + + # Convert s3 message to IM message + json_payload = s3_message_adapter.transform(records) + logger.debug('transformed message, json_payload: %s'%json_payload) + + # Send the message to registry + payload = json_payload.serialize() + method = 'PATCH' # Backup location should be patched if not backup within bucket name + if "backup" not in bucket: + method = 'POST' + + logger.debug('web_publisher.publish_registry method using "%s" with payload %s'%(method,payload)) + registry_response = web_publisher.publish_registry("granule", object_uuid, payload, method) + logger.debug('web_publisher.publish_registry response=%s'%registry_response) + logger.debug('web_publisher.publish_registry response json=%s'%registry_response.json()) + + return registry_response + + return upload \ No newline at end of file diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py index b881fc9..c17b972 100644 --- a/onestop-python-client/test/unit/test_SqsHandlers.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -10,6 +10,7 @@ from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.util.SqsConsumer import SqsConsumer from onestop.util.SqsHandlers import create_delete_handler +from onestop.util.SqsHandlers import create_upload_handler class test_SqsHandler(unittest.TestCase): @@ -32,7 +33,7 @@ def setUp(self): self.wp = WebPublisher(**self.config_dict) self.s3_utils = S3Utils(**self.config_dict) - self.s3ma = S3MessageAdapter(**self.config_dict) + self.s3_message_adapter = S3MessageAdapter(**self.config_dict) self.sqs_consumer = SqsConsumer(**self.config_dict) self.sqs_max_polls = 3 @@ -215,5 +216,113 @@ def test_delete_handler_eventName_not_delete_ends_cb(self, mock_wp, mock_respons mock_wp.search_onestop.assert_not_called() mock_wp.delete_registry.assert_not_called() + @mock_sqs + @patch('onestop.WebPublisher') + @patch('onestop.util.S3Utils') + @patch('onestop.util.S3MessageAdapter') + def test_upload_handler_happy(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): + bucket = self.bucket + key = self.key + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, bucket, key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + records = json.loads(message['Message'])['Records'] + records_transformed = mock_s3_msg_adapter.transform(records) + cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify get uuid called + mock_s3_utils.get_uuid_metadata.assert_called_with( + mock_s3_utils.connect('s3_resource', None), + bucket, + key) + # Verify uuid not added + mock_s3_utils.add_uuid_metadata.assert_not_called() + # Verify transform called + mock_s3_msg_adapter.transform.assert_called_with(records) + # Verify publish called + mock_wp.publish_registry.assert_called_with( + 'granule', + mock_s3_utils.get_uuid_metadata(mock_s3_utils.connect('s3_resource', None), bucket, key), + records_transformed.serialize(), + 'POST' + ) + + @mock_sqs + @patch('onestop.WebPublisher') + @patch('onestop.util.S3Utils') + @patch('onestop.util.S3MessageAdapter') + def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): + bucket = self.bucket + key = self.key + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, bucket, key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_s3_utils.get_uuid_metadata.return_value = None + cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify add uuid called + mock_s3_utils.add_uuid_metadata.assert_called_with( + mock_s3_utils.connect('s3_resource', None), + bucket, + key) + + @mock_sqs + @patch('onestop.WebPublisher') + @patch('onestop.util.S3Utils') + @patch('onestop.util.S3MessageAdapter') + def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): + bucket = "testing_backup_bucket" + key = self.key + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, bucket, key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_s3_utils.get_uuid_metadata.return_value = None + records = json.loads(message['Message'])['Records'] + records_transformed = mock_s3_msg_adapter.transform(records) + cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify publish called + mock_wp.publish_registry.assert_called_with( + 'granule', + mock_s3_utils.get_uuid_metadata(mock_s3_utils.connect('s3_resource', None), bucket, key), + records_transformed.serialize(), + 'PATCH' + ) + if __name__ == '__main__': unittest.main() \ No newline at end of file From d4b2013c3f84125e0941be6605cdfb03c95944d2 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 18 May 2021 20:49:13 -0600 Subject: [PATCH 049/129] 1500-Changed references to psi_registry_url to registry_base_url --- kubernetes/pyconsumer-pod.yaml | 2 +- scripts/config/csb-data-stream-config.yml | 2 +- serverless/conf.py | 2 +- serverless/lambda_function.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kubernetes/pyconsumer-pod.yaml b/kubernetes/pyconsumer-pod.yaml index fed2258..6943403 100644 --- a/kubernetes/pyconsumer-pod.yaml +++ b/kubernetes/pyconsumer-pod.yaml @@ -72,7 +72,7 @@ data: headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 - psi_registry_url: https://cedardevs.org/ + registry_base_url: https://cedardevs.org/ access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_identifier_prefix: "gov.noaa.ncei.csb:" diff --git a/scripts/config/csb-data-stream-config.yml b/scripts/config/csb-data-stream-config.yml index 06a45b6..2d25328 100644 --- a/scripts/config/csb-data-stream-config.yml +++ b/scripts/config/csb-data-stream-config.yml @@ -2,7 +2,7 @@ format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 -psi_registry_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com +registry_base_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_id_prefix: "gov.noaa.ncei.csb:" diff --git a/serverless/conf.py b/serverless/conf.py index b41eb0b..26ef3cd 100644 --- a/serverless/conf.py +++ b/serverless/conf.py @@ -3,6 +3,6 @@ HEADERS = 'UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER' TYPE = 'COLLECTION' COLLECTION_ID = 'fdb56230-87f4-49f2-ab83-104cfd073177' -PSI_REGISTRY_URL = 'http://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com' +REGISTRY_BASE_URL = 'http://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com' ACCESS_BUCKET = 'https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com' FILE_IDENTIFIER_PREFIX = 'gov.noaa.ncei.csb:' diff --git a/serverless/lambda_function.py b/serverless/lambda_function.py index abe8fb7..3b6cd97 100644 --- a/serverless/lambda_function.py +++ b/serverless/lambda_function.py @@ -9,7 +9,7 @@ def lambda_handler(event, context): - registry_url = conf.PSI_REGISTRY_URL + "/metadata/granule" + registry_url = conf.REGISTRY_BASE_URL + "/metadata/granule" for rec in event['Records']: From a0711f2276e6854dad6a6502db3619f677631d74 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 20 May 2021 15:34:35 -0600 Subject: [PATCH 050/129] 1507-Added sqs_name to helm values. --- helm/onestop-sqs-consumer/values.yaml | 1 + helm/sme-chart/values.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index 20557a0..afbc414 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -58,6 +58,7 @@ config: |- # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs + sqs_name: 'test-queue' sqs_max_polls: 100 s3_region: us-east-2 s3_bucket: archive-testing-demo diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index 924f62f..0c68925 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -15,6 +15,7 @@ config: |- # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs + sqs_name: 'test-queue' sqs_max_polls: 100 s3_region: us-east-2 s3_bucket: archive-testing-demo From ef2785c2952617d17eed3b3536cf828089623ab1 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 21 May 2021 12:26:31 -0600 Subject: [PATCH 051/129] 1508-Added unit tests for parsed_record file. --- .../schemas/avro/psi/test_ParsedRecord.py | 144 ++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py new file mode 100644 index 0000000..3f2d865 --- /dev/null +++ b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py @@ -0,0 +1,144 @@ +import unittest + +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord + +class test_ParsedRecord(unittest.TestCase): + + def test_type(self): + content_dict = { + "fileInformation":{ + "checksums":[ + { + "value":"4809084627a18d54db59659819f8a4b5d2c76367", + "algorithm":"SHA1" + } + ], + "headers":"NetCDF file reader", + "size":22876986, + "name":"OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc", + "format":"NetCDF", + "optionalAttributes":{ + } + } + } + + parsed_record = ParsedRecord().from_dict(content_dict) + + self.assertIsNotNone(parsed_record) + + def test_discovery(self): + content_dict = { + "discovery":{ + "fileIdentifier":"1034194888", + "temporalBounding":{ + "beginDate":"2019-10-30T05:50:39Z", + "endDate":"2019-10-30T06:00:11Z" + }, + "parentIdentifier":"5b58de08-afef-49fb-99a1-9c5d5c003bde", + "links":[ + { + "linkFunction":"download", + "linkUrl":"s3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc", + "linkName":"Amazon S3", + "linkProtocol":"HTTPS" + } + ], + "spatialBounding":{ + "coordinates":[ + [ + [ + -156.2995, + -81.3282 + ], + [ + 6.2995, + -81.3282 + ], + [ + 6.2995, + 81.3282 + ], + [ + -156.2995, + 81.3282 + ], + [ + -156.2995, + -81.3282 + ] + ] + ], + "type":"Polygon" + }, + "title":"OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc" + } + } + + parsed_record = ParsedRecord().from_dict(content_dict) + + self.assertIsNotNone(parsed_record) + +# TODO +# def test_analysis(self): + + def test_fileInformation(self): + content_dict = { + "fileInformation":{ + "checksums":[ + { + "value":"4809084627a18d54db59659819f8a4b5d2c76367", + "algorithm":"SHA1" + } + ], + "headers":"NetCDF file reader", + "size":22876986, + "name":"OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc", + "format":"NetCDF", + "optionalAttributes":{ + } + } + } + + parsed_record = ParsedRecord().from_dict(content_dict) + + self.assertIsNotNone(parsed_record) + + def test_fileLocations(self): + content_dict = { + "fileLocations":{ + "s3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc":{ + "serviceType":"Amazon:AWS:S3", + "deleted":False, + "restricted":False, + "asynchronous":False, + "locality":"us-east-1", + "lastModified":1572430074000, + "type":"ACCESS", + "optionalAttributes":{ + }, + "uri":"s3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc" + } + } + } + + parsed_record = ParsedRecord().from_dict(content_dict) + print("parsed_record:%s"%parsed_record) + self.assertIsNotNone(parsed_record) + +# TODO +# def test_publishing(self): + + def test_relationships(self): + content_dict = { + "relationships":[ + { + "id":"5b58de08-afef-49fb-99a1-9c5d5c003bde", + "type":"COLLECTION" + } + ] + } + + # Seems to want: ‘relationships': [{"type": {"type":"COLLECTION"}, "id":'5b58de08-afef-49fb-99a1-9c5d5c003bde'}] + parsed_record = ParsedRecord().from_dict(content_dict) + + self.assertIsNotNone(parsed_record) From 77190e87d182dce09046cf5caf28f1553d072226 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 21 May 2021 15:53:07 -0600 Subject: [PATCH 052/129] 1507-fixed bug in S3Utils upload_s3 passing in wrong parameter type to --- onestop-python-client/onestop/util/S3Utils.py | 2 +- .../test/unit/util/test_S3Utils.py | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index d63e654..24a81c3 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -220,7 +220,7 @@ def upload_s3(self, boto_client, local_file, bucket, s3_key, overwrite): obj_uuid = str(uuid.uuid4()) if not overwrite: - key_exists = self.objectkey_exists(boto_client, bucket, s3_key) + key_exists = self.objectkey_exists(bucket, s3_key) if (not key_exists) or (key_exists and overwrite): try: diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py index 91b90a3..6508837 100644 --- a/onestop-python-client/test/unit/util/test_S3Utils.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -85,15 +85,24 @@ def test_add_uuid_metadata(self): self.assertTrue(self.s3_utils.add_uuid_metadata(boto_client, self.bucket, s3_key)) @mock_s3 - def test_add_file_s3(self): + def test_add_file_s3_overwrite(self): boto_client = self.s3_utils.connect('client', 's3', None) local_file = abspath_from_relative(__file__, "../../data/file4.csv") s3_key = "csv/file4.csv" location = {'LocationConstraint': self.region} boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) - overwrite = True - self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_key, overwrite)) + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_key, True)) + + @mock_s3 + def test_add_file_s3_nooverwrite(self): + boto_client = self.s3_utils.connect('client', 's3', None) + local_file = abspath_from_relative(__file__, "../../data/file4.csv") + s3_key = "csv/file4.csv" + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_key, False)) @mock_s3 def test_get_csv_s3(self): @@ -126,12 +135,11 @@ def test_add_files(self): local_files = ["file1_s3.csv", "file2.csv", "file3.csv"] location = {'LocationConstraint': self.region} boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) - overwrite = True for file in local_files: local_file = abspath_from_relative(__file__, "../../data/" + file) s3_file = "csv/" + file - self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_file, overwrite)) + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_file, True)) @mock_s3 @mock_glacier From c073a2f0419ceb768b7d9db43a287de637629ff9 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 28 May 2021 13:09:22 -0600 Subject: [PATCH 053/129] 1507-Updated circleCI config to run/publish docker image of onestop-python-client based on branch name. Removed unnecessary comments. --- .circleci/config.yml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d475399..9df7485 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -55,7 +55,6 @@ jobs: # - slack/status: # fail_only: false -# cli build cli-build: executor: docker/docker steps: @@ -75,8 +74,7 @@ jobs: - slack/status: fail_only: false -# clients build - client-build: + python-client-build: executor: docker/docker steps: - setup_remote_docker @@ -89,14 +87,11 @@ jobs: - run: name: "What branch am I on now?" command: echo $CIRCLE_BRANCH -#no need to push this image yet - docker/push: image: cedardevs/onestop-python-client tag: ${CIRCLE_BRANCH}-SNAPSHOT - slack/status: fail_only: false - # Base test configuration for Go library tests Each distinct version should - # inherit this base, and override (at least) the container image used. python-client-test: &python-client-test executor: python/default @@ -127,9 +122,8 @@ version: 2.1 workflows: main: jobs: -# - "latest" # - cli-test # - cli-build -# - client-build + - python-client-build - python-client-test From 5d3344c33a8269864d68fb1dfe0dfda343368ea5 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 28 May 2021 14:09:11 -0600 Subject: [PATCH 054/129] 1507-Copied root Dockerfile to onestop-python-client to get docker publishing images. --- onestop-python-client/Dockerfile | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 onestop-python-client/Dockerfile diff --git a/onestop-python-client/Dockerfile b/onestop-python-client/Dockerfile new file mode 100644 index 0000000..e5ec186 --- /dev/null +++ b/onestop-python-client/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.8 +COPY ./onestop-python-client /onestop-python-client +COPY ./scripts /scripts +RUN apt-get update +RUN pip install --upgrade pip +RUN pip install ./onestop-python-client +RUN pip install -r ./onestop-python-client/requirements.txt + +#Base image stays up for dev access +CMD tail -f /dev/null From 46f7fff4b17b4c88f71c54e14faf93181f00c573 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 28 May 2021 14:26:51 -0600 Subject: [PATCH 055/129] 1507-Removed from Dockerfile copy step of module in, had trouble doing to and from examples hoping unnecessary. --- onestop-python-client/Dockerfile | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/onestop-python-client/Dockerfile b/onestop-python-client/Dockerfile index e5ec186..28bdc61 100644 --- a/onestop-python-client/Dockerfile +++ b/onestop-python-client/Dockerfile @@ -1,10 +1,4 @@ FROM python:3.8 -COPY ./onestop-python-client /onestop-python-client -COPY ./scripts /scripts RUN apt-get update RUN pip install --upgrade pip -RUN pip install ./onestop-python-client -RUN pip install -r ./onestop-python-client/requirements.txt - -#Base image stays up for dev access -CMD tail -f /dev/null +RUN pip install -r requirements.txt From 0166d4fa2a787f55d97e88cf3fae9ad45f13a2be Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 28 May 2021 16:31:03 -0600 Subject: [PATCH 056/129] 1507-Changed onestop-python-client Dockerfile to copy onestop-python-client code to working directory then install requirements.txt. --- onestop-python-client/Dockerfile | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/Dockerfile b/onestop-python-client/Dockerfile index 28bdc61..4da5281 100644 --- a/onestop-python-client/Dockerfile +++ b/onestop-python-client/Dockerfile @@ -1,4 +1,12 @@ FROM python:3.8 + +WORKDIR /app + +# Copy requirements.txt into workspace and execute it, so installed in workspace. +COPY requirements.txt requirements.txt +RUN pip3 install -r requirements.txt RUN apt-get update -RUN pip install --upgrade pip -RUN pip install -r requirements.txt +RUN pip3 install --upgrade pip + +# Copy source code into workspace +COPY . . \ No newline at end of file From f3088da5d7fb37191eb0683e47d16b13dadef519 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 28 May 2021 16:49:51 -0600 Subject: [PATCH 057/129] 1507-Add to circleCI building and publishing images of onestop-s3-handler and onestop-sme onestop-python-client --- .circleci/config.yml | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9df7485..47b7dba 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -74,7 +74,39 @@ jobs: - slack/status: fail_only: false - python-client-build: + onestop-s3-handler-build: + executor: docker/docker + steps: + - setup_remote_docker + - checkout + - docker/check + - docker/build: + path: scripts/sqs-to-registry + image: cedardevs/onestop-s3-handler + tag: ${CIRCLE_BRANCH}-SNAPSHOT + - docker/push: + image: cedardevs/onestop-s3-handler + tag: ${CIRCLE_BRANCH}-SNAPSHOT + - slack/status: + fail_only: false + + onestop-sme-build: + executor: docker/docker + steps: + - setup_remote_docker + - checkout + - docker/check + - docker/build: + path: onestop-python-client + image: cedardevs/onestop-sme + tag: ${CIRCLE_BRANCH}-SNAPSHOT + - docker/push: + image: cedardevs/onestop-sme + tag: ${CIRCLE_BRANCH}-SNAPSHOT + - slack/status: + fail_only: false + + onestop-python-client-build: executor: docker/docker steps: - setup_remote_docker @@ -84,9 +116,6 @@ jobs: path: onestop-python-client image: cedardevs/onestop-python-client tag: ${CIRCLE_BRANCH}-SNAPSHOT - - run: - name: "What branch am I on now?" - command: echo $CIRCLE_BRANCH - docker/push: image: cedardevs/onestop-python-client tag: ${CIRCLE_BRANCH}-SNAPSHOT From eaf145422a24609220d0ef973e79c705d21e3a01 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 28 May 2021 16:51:55 -0600 Subject: [PATCH 058/129] 1507-Fixed booboo in circleCI of renaming build and not changing name in the jobs section and added the sme and s3 builds to jobs. --- .circleci/config.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 47b7dba..27ffef8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -122,7 +122,7 @@ jobs: - slack/status: fail_only: false - python-client-test: &python-client-test + onestop-python-client-test: &python-client-test executor: python/default steps: &steps - checkout @@ -153,6 +153,8 @@ workflows: jobs: # - cli-test # - cli-build - - python-client-build - - python-client-test + - onestop-sme-build + - onestop-s3-handler-build + - onestop-python-client-build + - onestop-python-client-test From ed3a93e6d502d8350c14fa8ba4471af8d4ba5a31 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 1 Jun 2021 13:26:11 -0600 Subject: [PATCH 059/129] 1507-Changed circleCI image tag to 'latest'. Need to revisit. --- .circleci/config.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 27ffef8..2fdc0b0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -83,10 +83,10 @@ jobs: - docker/build: path: scripts/sqs-to-registry image: cedardevs/onestop-s3-handler - tag: ${CIRCLE_BRANCH}-SNAPSHOT + tag: latest - docker/push: image: cedardevs/onestop-s3-handler - tag: ${CIRCLE_BRANCH}-SNAPSHOT + tag: latest - slack/status: fail_only: false @@ -99,10 +99,10 @@ jobs: - docker/build: path: onestop-python-client image: cedardevs/onestop-sme - tag: ${CIRCLE_BRANCH}-SNAPSHOT + tag: latest - docker/push: image: cedardevs/onestop-sme - tag: ${CIRCLE_BRANCH}-SNAPSHOT + tag: latest - slack/status: fail_only: false @@ -115,10 +115,10 @@ jobs: - docker/build: path: onestop-python-client image: cedardevs/onestop-python-client - tag: ${CIRCLE_BRANCH}-SNAPSHOT + tag: latest - docker/push: image: cedardevs/onestop-python-client - tag: ${CIRCLE_BRANCH}-SNAPSHOT + tag: latest - slack/status: fail_only: false From 095d5fafe01764061211cd8f4a3ac7b786e61699 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 2 Jun 2021 10:26:09 -0600 Subject: [PATCH 060/129] 1507-Changed dockerfiles back (had changed it due to so problems with circleci) and changed circleci config. --- .circleci/config.yml | 2 +- Dockerfile | 10 ++++++---- onestop-python-client/Dockerfile | 12 ------------ scripts/sme/Dockerfile | 4 +++- scripts/sqs-to-registry/Dockerfile | 4 +--- 5 files changed, 11 insertions(+), 21 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2fdc0b0..1f308ea 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -97,7 +97,7 @@ jobs: - checkout - docker/check - docker/build: - path: onestop-python-client + path: scripts/sme/ image: cedardevs/onestop-sme tag: latest - docker/push: diff --git a/Dockerfile b/Dockerfile index e5ec186..21df663 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,12 @@ FROM python:3.8 -COPY ./onestop-python-client /onestop-python-client -COPY ./scripts /scripts + +COPY onestop-python-client /onestop-python-client +COPY scripts /scripts + RUN apt-get update RUN pip install --upgrade pip -RUN pip install ./onestop-python-client -RUN pip install -r ./onestop-python-client/requirements.txt +RUN pip install /onestop-python-client +RUN pip install -r /onestop-python-client/requirements.txt #Base image stays up for dev access CMD tail -f /dev/null diff --git a/onestop-python-client/Dockerfile b/onestop-python-client/Dockerfile index 4da5281..e69de29 100644 --- a/onestop-python-client/Dockerfile +++ b/onestop-python-client/Dockerfile @@ -1,12 +0,0 @@ -FROM python:3.8 - -WORKDIR /app - -# Copy requirements.txt into workspace and execute it, so installed in workspace. -COPY requirements.txt requirements.txt -RUN pip3 install -r requirements.txt -RUN apt-get update -RUN pip3 install --upgrade pip - -# Copy source code into workspace -COPY . . \ No newline at end of file diff --git a/scripts/sme/Dockerfile b/scripts/sme/Dockerfile index d4b48fa..c91b8f4 100644 --- a/scripts/sme/Dockerfile +++ b/scripts/sme/Dockerfile @@ -1,6 +1,8 @@ FROM cedardevs/onestop-python-client:latest -COPY . . + RUN pip install argparse RUN pip install psycopg2 +RUN pip install ./onestop-python-client + #ENTRYPOINT [ "python" ,"scripts/sme/sme.py", "-cmd consume", "-b localhost:9092", "-s http://localhost:8081", "-t psi-collection-extractor-to" , "-g sme-test", "-o earliest" ] CMD tail -f /dev/null diff --git a/scripts/sqs-to-registry/Dockerfile b/scripts/sqs-to-registry/Dockerfile index 9db0598..4f59b4e 100644 --- a/scripts/sqs-to-registry/Dockerfile +++ b/scripts/sqs-to-registry/Dockerfile @@ -1,10 +1,8 @@ FROM cedardevs/onestop-python-client:latest -COPY . . + #required by the sme script, not our library RUN pip install argparse -#I should not have to do this, since it is done in the base image -#RUN pip install -r ./onestop-python-client/requirements.txt ENTRYPOINT [ "python" ] CMD [ "s3_notification_handler.py" ] #CMD tail -f /dev/null \ No newline at end of file From 9d4eaba6c91082d3c3008046ea0e16dad76c749f Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 2 Jun 2021 12:35:26 -0600 Subject: [PATCH 061/129] 1507-Removed onestop-python-client dockerfile, unecessary. Therefore changed circleci path for onestop-python-client build to indicate use project root as path. Changed paths in root dockerfile back to ./ instead of / so clearer. --- .circleci/config.yml | 2 +- Dockerfile | 8 ++++---- onestop-python-client/Dockerfile | 0 3 files changed, 5 insertions(+), 5 deletions(-) delete mode 100644 onestop-python-client/Dockerfile diff --git a/.circleci/config.yml b/.circleci/config.yml index 1f308ea..c54ed47 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -113,7 +113,7 @@ jobs: - checkout - docker/check - docker/build: - path: onestop-python-client + path: ./ image: cedardevs/onestop-python-client tag: latest - docker/push: diff --git a/Dockerfile b/Dockerfile index 21df663..d73fa34 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,12 @@ FROM python:3.8 -COPY onestop-python-client /onestop-python-client -COPY scripts /scripts +COPY ./onestop-python-client /onestop-python-client +COPY ./scripts /scripts RUN apt-get update RUN pip install --upgrade pip -RUN pip install /onestop-python-client -RUN pip install -r /onestop-python-client/requirements.txt +RUN pip install ./onestop-python-client +RUN pip install -r ./onestop-python-client/requirements.txt #Base image stays up for dev access CMD tail -f /dev/null diff --git a/onestop-python-client/Dockerfile b/onestop-python-client/Dockerfile deleted file mode 100644 index e69de29..0000000 From bca1ce3bc1a8f409fc2c610fbdd1e6205d1e36da Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 2 Jun 2021 12:38:47 -0600 Subject: [PATCH 062/129] 1507-added comment to sqs-to-registry dockerfile about how things get copied over. --- scripts/sqs-to-registry/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/sqs-to-registry/Dockerfile b/scripts/sqs-to-registry/Dockerfile index 4f59b4e..985421d 100644 --- a/scripts/sqs-to-registry/Dockerfile +++ b/scripts/sqs-to-registry/Dockerfile @@ -1,3 +1,4 @@ +# Expect this to copy the scripts directory over and install onestop-python-client. FROM cedardevs/onestop-python-client:latest #required by the sme script, not our library From 4c62de8506add5e5563abd3773f0b904a75978f3 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 2 Jun 2021 12:44:00 -0600 Subject: [PATCH 063/129] 1507-Changed scripts/sme dockerfile to not install onestop-python-client, path is one up which is out of context for this dockerfile (hint root dockerfile does that step). Added comment about in root dockerfile. --- Dockerfile | 5 ++++- scripts/sme/Dockerfile | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index d73fa34..a906511 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,8 +5,11 @@ COPY ./scripts /scripts RUN apt-get update RUN pip install --upgrade pip -RUN pip install ./onestop-python-client RUN pip install -r ./onestop-python-client/requirements.txt +# Needed for scripts - do here since directory out of scope when in scripts/* dockerfiles. +# Unsure if possible this isn't latest build, like doing pip install before this is built. +RUN pip install ./onestop-python-client + #Base image stays up for dev access CMD tail -f /dev/null diff --git a/scripts/sme/Dockerfile b/scripts/sme/Dockerfile index c91b8f4..19051c3 100644 --- a/scripts/sme/Dockerfile +++ b/scripts/sme/Dockerfile @@ -1,8 +1,8 @@ +# Expect this to copy the scripts directory over and install onestop-python-client. FROM cedardevs/onestop-python-client:latest +# Install additional python libraries needed by scripts RUN pip install argparse RUN pip install psycopg2 -RUN pip install ./onestop-python-client -#ENTRYPOINT [ "python" ,"scripts/sme/sme.py", "-cmd consume", "-b localhost:9092", "-s http://localhost:8081", "-t psi-collection-extractor-to" , "-g sme-test", "-o earliest" ] CMD tail -f /dev/null From 34ed5975624d44357b174afa75f1c55e58158065 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 2 Jun 2021 21:08:15 -0600 Subject: [PATCH 064/129] 1507-In helm values files changed image pullPolicy to Always because from what can tell it only pulls if the image name changes, such as if you have a version number. We don't do this ATM. --- helm/onestop-sqs-consumer/values.yaml | 2 +- helm/sme-chart/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index afbc414..351cbfa 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -7,7 +7,7 @@ replicaCount: 1 image: repository: cedardevs/onestop-sme tag: latest - pullPolicy: IfNotPresent + pullPolicy: Always imagePullSecrets: [] nameOverride: "" diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index 0c68925..eb19445 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -1,7 +1,7 @@ image: repository: cedardevs/onestop-e2e-demo tag: latest - pullPolicy: IfNotPresent + pullPolicy: Always secret: registry_username: From d09ad1c7980f33382f5de5da5ca9ce5fa4deb324 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 3 Jun 2021 21:47:32 -0600 Subject: [PATCH 065/129] 1507-To helm/*/values.yaml removed unused sqs_url, added cert locations w comment and set security to false (cedar-devs is false), adjusted s3_bucket2 to a real bucket, fixed bug of granule type needing to be GRANULE, and added kafka information. --- helm/onestop-sqs-consumer/values.yaml | 31 +++++++++++++++++++-------- helm/sme-chart/values.yaml | 31 +++++++++++++++++++++------ 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index 351cbfa..3af3396 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -57,16 +57,15 @@ config: |- log_level: INFO # AWS config values - sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs - sqs_name: 'test-queue' - sqs_max_polls: 100 + sqs_name: cloud-archive-client-sqs s3_region: us-east-2 s3_bucket: archive-testing-demo + sqs_max_polls: 100 #AWS config values for 2nd vault in different region vault_name: archive-vault-new s3_region2: us-east-2 - s3_bucket2: noaa-nccf-dev-archive + s3_bucket2: archive-testing-testing-test #CSB stream config format: csv @@ -78,15 +77,29 @@ config: |- file_identifier_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE - metadata_type: granule + metadata_type: GRANULE registry_base_url: http://onestop-registry:80 onestop_base_url: http://onestop-search:8080 - security: - enabled: True - prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' NESDIS/H8: '0fad03df-0805-434a-86a6-7dc42d68480e' NESDIS/GOES: '11111111-1111-1111-1111-111111111111' - NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6' \ No newline at end of file + NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6' + + # Kafka config values + brokers: onestop-dev-cp-kafka:9092 + schema_registry: http://onestop-dev-cp-schema-registry:8081 + collection_topic_publish: psi-granules-by-collection + granule_topic_publish: psi-granule-parsed + collection_topic_consume: psi-collection-input-unknown + granule_topic_consume: psi-granule-input-unknown + group_id: sme-test + auto_offset_reset: earliest + security: + # True/False + enabled: False + # If security is enabled then need these: + caLoc: /etc/pki/tls/cert.pem + keyLoc: /etc/pki/tls/private/kafka-user.key + certLoc: /etc/pki/tls/certs/kafka-user.crt \ No newline at end of file diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index eb19445..795a388 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -14,16 +14,15 @@ config: |- log_level: INFO # AWS config values - sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs - sqs_name: 'test-queue' - sqs_max_polls: 100 + sqs_name: cloud-archive-client-sqs s3_region: us-east-2 s3_bucket: archive-testing-demo + sqs_max_polls: 100 #AWS config values for 2nd vault in different region vault_name: archive-vault-new s3_region2: us-east-2 - s3_bucket2: noaa-nccf-dev-archive + s3_bucket2: archive-testing-testing-test #CSB stream config format: csv @@ -35,9 +34,29 @@ config: |- file_identifier_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE - metadata_type: granule + metadata_type: GRANULE registry_base_url: http://onestop-registry:80 onestop_base_url: http://onestop-search:8080 + prefixMap: + NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' + NESDIS/H8: '0fad03df-0805-434a-86a6-7dc42d68480e' + NESDIS/GOES: '11111111-1111-1111-1111-111111111111' + NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6' + + # Kafka config values + brokers: onestop-dev-cp-kafka:9092 + schema_registry: http://onestop-dev-cp-schema-registry:8081 + collection_topic_publish: psi-granules-by-collection + granule_topic_publish: psi-granule-parsed + collection_topic_consume: psi-collection-input-unknown + granule_topic_consume: psi-granule-input-unknown + group_id: sme-test + auto_offset_reset: earliest security: - enabled: True \ No newline at end of file + # True/False + enabled: False + # If security is enabled then need these: + caLoc: /etc/pki/tls/cert.pem + keyLoc: /etc/pki/tls/private/kafka-user.key + certLoc: /etc/pki/tls/certs/kafka-user.crt \ No newline at end of file From 37a65a299ef89615725f79dfd9e0667e83e927fa Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 3 Jun 2021 22:13:22 -0600 Subject: [PATCH 066/129] 1507-Set metadata_type to uppercase and adjusted the ValueError raised if it metadata_type wasn't GRANULE or COLLECTION. --- onestop-python-client/onestop/KafkaPublisher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index 047783c..2be275e 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -77,7 +77,7 @@ def __init__(self, metadata_type, brokers, schema_registry, security, collection granule_topic: str granule topic you want to produce to """ - self.metadata_type = metadata_type + self.metadata_type = metadata_type.upper() self.brokers = brokers self.schema_registry = schema_registry self.security_enabled = security['enabled'] @@ -91,7 +91,7 @@ def __init__(self, metadata_type, brokers, schema_registry, security, collection self.granule_topic = granule_topic_publish if self.metadata_type not in ['COLLECTION', 'GRANULE']: - raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") + raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type)) self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) From c3f37611ce8bbfd29aac3a3dc3d2546af5e544ad Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 3 Jun 2021 22:13:37 -0600 Subject: [PATCH 067/129] 1507-Set metadata_type to uppercase and adjusted the ValueError raised if it metadata_type wasn't GRANULE or COLLECTION. Changed try/raise in consume method to try/finally with same closing of the consumer as before. This way exceptions do fail script, because before was unclear why it wasn't working. --- onestop-python-client/onestop/KafkaConsumer.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 747b0e4..0481af9 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -79,7 +79,7 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r What log level to use for this class """ - self.metadata_type = metadata_type + self.metadata_type = metadata_type.upper() self.brokers = brokers self.group_id = group_id self.auto_offset_reset = auto_offset_reset @@ -95,7 +95,7 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r self.granule_topic = granule_topic_consume if self.metadata_type not in ['COLLECTION', 'GRANULE']: - raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") + raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type)) self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) @@ -153,7 +153,8 @@ def create_consumer(self, registry_client): metadata_schema = latest_schema.schema.schema_str self.logger.debug("metadata_schema: "+metadata_schema) - metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) + + metadata_deserializer = AvroDeserializer(schema_str=metadata_schema, schema_registry_client=registry_client) conf = { 'bootstrap.servers': self.brokers, 'key.deserializer': StringDeserializer('utf-8'), @@ -199,12 +200,7 @@ def consume(self, metadata_consumer, handler): key = msg.key() value = msg.value() - except KafkaError: - raise - try: handler(key, value) - except Exception as e: - self.logger.error("Message handler failed: {}".format(e)) - break - self.logger.debug("Closing metadata_consumer") - metadata_consumer.close() + finally: + self.logger.debug("Closing metadata_consumer") + metadata_consumer.close() From 2cfdcf3a03e21370fe529401c08520176c1da4be Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 3 Jun 2021 22:14:57 -0600 Subject: [PATCH 068/129] 1507-Added additional log statement in delete handler if record eventName was a Delete type. --- onestop-python-client/onestop/util/SqsHandlers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index 894f8b5..fa3503a 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -24,6 +24,8 @@ def delete(records, log_level='INFO'): logger.info("Ending handler, eventName=%s"%record['eventName']) return + logger.info('Attempting to delete record %s'%record) + bucket = record['s3']['bucket']['name'] s3_key = record['s3']['object']['key'] s3_url = "s3://" + bucket + "/" + s3_key From be034ea6585216b4dc1ad30e33d9271c00b8c966 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 3 Jun 2021 22:23:15 -0600 Subject: [PATCH 069/129] 1507-Updated KafkaConsumer unit test to reflect change neglected to mention in last commit that the AvroDeserializer constructor needed more specific parameter names (think it got updated and our order of params was wrong). --- onestop-python-client/test/unit/test_KafkaConsumer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/test/unit/test_KafkaConsumer.py b/onestop-python-client/test/unit/test_KafkaConsumer.py index b119e9a..5404c31 100644 --- a/onestop-python-client/test/unit/test_KafkaConsumer.py +++ b/onestop-python-client/test/unit/test_KafkaConsumer.py @@ -126,7 +126,7 @@ def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consume deser_consumer = consumer.create_consumer(reg_client) # Verify AvroDeserializer called with expected registry client - mock_avro_deserializer.assert_called_with(ANY, reg_client) + mock_avro_deserializer.assert_called_with(schema_str=ANY, schema_registry_client=reg_client) self.assertIsNotNone(deser_consumer) From 528fa83ad858a423e24521bf707897926ef3870f Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 3 Jun 2021 22:25:51 -0600 Subject: [PATCH 070/129] 1507-Updated scripts to work with new class constructors that take a dict instead of config location. Removed mocking for launch_delete_handler script since have unit tests that cover this now. --- scripts/launch_delete_handler.py | 106 +++++++++++++------------------ 1 file changed, 43 insertions(+), 63 deletions(-) diff --git a/scripts/launch_delete_handler.py b/scripts/launch_delete_handler.py index 7bb3983..6d000d4 100644 --- a/scripts/launch_delete_handler.py +++ b/scripts/launch_delete_handler.py @@ -1,79 +1,59 @@ -import json -import boto3 import argparse -from moto import mock_s3 -from moto import mock_sqs -from tests.utils import create_delete_message +import os +import yaml + from onestop.WebPublisher import WebPublisher from onestop.util.S3Utils import S3Utils from onestop.util.SqsConsumer import SqsConsumer from onestop.util.SqsHandlers import create_delete_handler - -def mock_init_s3(s3u): - """ Sets up bucket, object, SQS queue, and delete message. - - Assumes there are additional keys passed in via config - - :param s3u: S3Utils object - :return: URL of the mock queue created in SQS - """ - boto_client = s3u.connect("s3", None) - bucket = s3u.conf['s3_bucket'] - region = s3u.conf['s3_region'] - key = s3u.conf['s3_key'] - boto_client.create_bucket(Bucket=bucket) - boto_client.put_object(Bucket=bucket, Key=key, Body="foobar") - - sqs_client = boto3.client('sqs', region_name=region) - sqs_queue = sqs_client.create_queue(QueueName=s3u.conf['sqs_name']) - message = create_delete_message(region, bucket, key) - sqs_client.send_message(QueueUrl=sqs_queue['QueueUrl'], MessageBody=json.dumps(message)) - return sqs_queue['QueueUrl'] - +config_dict = {} if __name__ == '__main__': # All command-line arguments have defaults that use test data, with AWS mocking set to true parser = argparse.ArgumentParser(description="Launches SQS delete test") - parser.add_argument('--aws-conf', dest="aws_conf", required=False, default="config/aws-util-config-test.yml", + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="AWS config filepath") - parser.add_argument('--osim-conf', dest="osim_conf", required=False, default="config/web-publisher-config-local.yml", - help="OSIM config filepath") - parser.add_argument('-mock', dest="mock", required=False, default=True, help="Use mock AWS or real values") - - parser.add_argument('-cred', dest="cred", required=False, default="config/credentials-template.yml", + parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") args = vars(parser.parse_args()) - wp_config = args.pop('osim_conf') - aws_config = args.pop('aws_conf') - cred_config = args.pop('cred') - use_mocks = args.pop('mock') - - web_publisher = WebPublisher(wp_config, cred_config) - s3_utils = S3Utils(aws_config, cred_config) - sqs_consumer = SqsConsumer(aws_config, cred_config) - - if use_mocks is True: - mock_1 = mock_s3() - mock_2 = mock_sqs() - mock_1.start() - mock_2.start() - mock_queue_url = mock_init_s3(s3_utils) - # Need to override the config value here so that sqs_consumer.connect will use the correct url for the queue - sqs_consumer.conf['sqs_url'] = mock_queue_url - - sqs_max_polls = s3_utils.conf['sqs_max_polls'] + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + + web_publisher = WebPublisher(**config_dict) + s3_utils = S3Utils(**config_dict) + sqs_consumer = SqsConsumer(**config_dict) + + sqs_max_polls = config_dict['sqs_max_polls'] delete_handler = create_delete_handler(web_publisher) + s3_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region']) + queue = sqs_consumer.connect(s3_resource, config_dict['sqs_name']) - queue = sqs_consumer.connect() - try: - sqs_consumer.receive_messages(queue, sqs_max_polls, delete_handler) - if use_mocks is True: - mock_1.stop() - mock_2.stop() - except Exception as e: - print("Message queue consumption failed: {}".format(e)) - if use_mocks is True: - mock_1.stop() - mock_2.stop() + sqs_consumer.receive_messages(queue, sqs_max_polls, delete_handler) From d43d0673db226a5b57713b9fd052e98c855b82e7 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 4 Jun 2021 15:55:17 -0600 Subject: [PATCH 071/129] 1507-Updated AvroSerializer call to specify parameter name, since order of params seem to have changed. --- onestop-python-client/onestop/KafkaPublisher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index 2be275e..a95081d 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -146,7 +146,7 @@ def create_producer(self, registry_client): topic = self.granule_topic metadata_schema = registry_client.get_latest_version(topic + '-value').schema.schema_str - metadata_serializer = AvroSerializer(metadata_schema, registry_client) + metadata_serializer = AvroSerializer(schema_str=metadata_schema, schema_registry_client=registry_client) conf = {'bootstrap.servers': self.brokers} if self.security_enabled: From 27e7d1be5a2025f88f8b4d0c45a734516edfb77e Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 4 Jun 2021 15:57:44 -0600 Subject: [PATCH 072/129] 1507-Updated AvroSerializer call to specify parameter name, since order of params seem to have changed. (forgot intellij's multi select for commits doesn't work) --- onestop-python-client/test/unit/test_KafkaPublisher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/test/unit/test_KafkaPublisher.py b/onestop-python-client/test/unit/test_KafkaPublisher.py index 1c9497b..f43d3f6 100644 --- a/onestop-python-client/test/unit/test_KafkaPublisher.py +++ b/onestop-python-client/test/unit/test_KafkaPublisher.py @@ -123,7 +123,7 @@ def test_create_producer_calls_AvroSerializer(self, mock_serializing_publisher, publisher.create_producer(reg_client) # Verify AvroSerializer called with expected registry client - mock_avro_serializer.assert_called_with(ANY, reg_client) + mock_avro_serializer.assert_called_with(schema_str=ANY, schema_registry_client=reg_client) @patch('onestop.KafkaPublisher.AvroSerializer') @patch('onestop.KafkaPublisher.SerializingProducer') From 3993106d76a434cb76fbc1e241bd4779fe717e29 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 4 Jun 2021 16:05:50 -0600 Subject: [PATCH 073/129] 1507-Removed try/catch around code in KafkaPublisher publish_granule so as to get better error reporting. --- onestop-python-client/onestop/KafkaPublisher.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index a95081d..a0c66ce 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -274,13 +274,11 @@ def publish_granule(self, granule_producer, collection_uuid, content_dict): 'discovery': content_dict['discovery'] } - try: - self.logger.debug('Publishing granule with topic='+self.granule_topic+' key='+key+' value='+str(value_dict)) - granule_producer.produce( - topic=self.granule_topic, - value=value_dict, - key=key, - on_delivery=self.delivery_report) - except KafkaError: - raise + self.logger.debug('Publishing granule with topic='+self.granule_topic+' key='+key+' value='+str(value_dict)) + granule_producer.produce( + topic=self.granule_topic, + value=value_dict, + key=key, + on_delivery=self.delivery_report) + granule_producer.poll() From 2ade49bf8dda72244457e422a425d0afd52d5be1 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 4 Jun 2021 21:16:16 -0600 Subject: [PATCH 074/129] 1507-Removed try/catch around code in KafkaPublisher publish_collection so as to get better error reporting. --- onestop-python-client/onestop/KafkaPublisher.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index a0c66ce..0ca40d0 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -214,15 +214,12 @@ def publish_collection(self, collection_producer, collection_uuid, content_dict, 'method': method, 'source': 'unknown', } - try: - self.logger.debug('Publishing collection with topic='+self.collection_topic+' key='+key+' value='+str(value_dict)) - collection_producer.produce( - topic=self.collection_topic, - value=value_dict, - key=key, - on_delivery=self.delivery_report) - except KafkaError: - raise + self.logger.debug('Publishing collection with topic='+self.collection_topic+' key='+key+' value='+str(value_dict)) + collection_producer.produce( + topic=self.collection_topic, + value=value_dict, + key=key, + on_delivery=self.delivery_report) collection_producer.poll() def publish_granule(self, granule_producer, collection_uuid, content_dict): From 9b6dc1b270ae1c77c716f3f042575fe7c197b9ac Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 4 Jun 2021 21:49:12 -0600 Subject: [PATCH 075/129] 1507-Updated helm/*/values.yml for Kafka brokers and schema_registry to what they are on cedar-devs. --- helm/onestop-sqs-consumer/values.yaml | 4 ++-- helm/sme-chart/values.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index 3af3396..6d0ac4e 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -88,8 +88,8 @@ config: |- NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6' # Kafka config values - brokers: onestop-dev-cp-kafka:9092 - schema_registry: http://onestop-dev-cp-schema-registry:8081 + brokers: cp-cp-kafka:9092 + schema_registry: http://cp-cp-schema-registry:8081 collection_topic_publish: psi-granules-by-collection granule_topic_publish: psi-granule-parsed collection_topic_consume: psi-collection-input-unknown diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index 795a388..3c72d66 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -45,8 +45,8 @@ config: |- NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6' # Kafka config values - brokers: onestop-dev-cp-kafka:9092 - schema_registry: http://onestop-dev-cp-schema-registry:8081 + brokers: cp-cp-kafka:9092 + schema_registry: http://cp-cp-schema-registry:8081 collection_topic_publish: psi-granules-by-collection granule_topic_publish: psi-granule-parsed collection_topic_consume: psi-collection-input-unknown From 35ed0dc6c199716adeef4d93dda9cf8b0932eafa Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 7 Jun 2021 10:48:39 -0600 Subject: [PATCH 076/129] 1507-Changed helm/*/values file_identifier_prefix to file_id_prefix (was changed a while ago in the code). --- helm/onestop-sqs-consumer/values.yaml | 2 +- helm/sme-chart/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index 6d0ac4e..0baaf1e 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -74,7 +74,7 @@ config: |- collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com - file_identifier_prefix: "gov.noaa.ncei.csb:" + file_id_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE metadata_type: GRANULE diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index 3c72d66..58678a3 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -31,7 +31,7 @@ config: |- collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com - file_identifier_prefix: "gov.noaa.ncei.csb:" + file_id_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE metadata_type: GRANULE From 1e7669635a959863186b2ed92fa33b1d23c6cb95 Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 7 Jun 2021 14:28:07 -0600 Subject: [PATCH 077/129] 1507-Found bug of not updating the s3Utils connect call to new way, updated test to reflect/catch. --- onestop-python-client/onestop/util/SqsHandlers.py | 2 +- onestop-python-client/test/unit/test_SqsHandlers.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index fa3503a..08f1d05 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -67,9 +67,9 @@ def upload(records, log_level='INFO'): logger.info("Event type: " + rec['eventName']) bucket = rec['s3']['bucket']['name'] logger.info("BUCKET: %s"%bucket) - s3_resource = s3_utils.connect("s3_resource", None) # Fetch the object to get the uuid + s3_resource = s3_utils.connect('resource', 's3', None) object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) if object_uuid is not None: logger.info("Retrieved object-uuid: %s"%object_uuid) diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py index c17b972..cd6a3ad 100644 --- a/onestop-python-client/test/unit/test_SqsHandlers.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -242,8 +242,9 @@ def test_upload_handler_happy(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp) self.sqs_consumer.receive_messages(sqs_queue, 1, cb) # Verify get uuid called + mock_s3_utils.connect.assert_called_with('resource', 's3', None) mock_s3_utils.get_uuid_metadata.assert_called_with( - mock_s3_utils.connect('s3_resource', None), + mock_s3_utils.connect(), bucket, key) # Verify uuid not added @@ -285,7 +286,7 @@ def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_s3_msg_adapter, mock # Verify add uuid called mock_s3_utils.add_uuid_metadata.assert_called_with( - mock_s3_utils.connect('s3_resource', None), + mock_s3_utils.connect(), bucket, key) @@ -319,7 +320,7 @@ def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_s3_msg_ # Verify publish called mock_wp.publish_registry.assert_called_with( 'granule', - mock_s3_utils.get_uuid_metadata(mock_s3_utils.connect('s3_resource', None), bucket, key), + mock_s3_utils.get_uuid_metadata(), records_transformed.serialize(), 'PATCH' ) From ede2e556d1ed98e4d8e3fa9c7b6b51489b25566f Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 7 Jun 2021 15:33:51 -0600 Subject: [PATCH 078/129] 1507-Changed bulid order in CircleCI config, seeing sme/sqs script builds with old code and hoping is because it was being built before the onestop-python-client-build. --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c54ed47..c15d729 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -153,8 +153,8 @@ workflows: jobs: # - cli-test # - cli-build - - onestop-sme-build - - onestop-s3-handler-build - onestop-python-client-build - onestop-python-client-test + - onestop-sme-build + - onestop-s3-handler-build From 6e843f0ed792597395d5059a709329c0483b2dff Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 7 Jun 2021 16:08:54 -0600 Subject: [PATCH 079/129] 1507-Changed circleCI config to have the onestop-python-client-build require onestop-python-client-test to run and the two script builds to require onestop-python-client-build to run. --- .circleci/config.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c15d729..a5f0dde 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -153,8 +153,13 @@ workflows: jobs: # - cli-test # - cli-build - - onestop-python-client-build - onestop-python-client-test + - onestop-python-client-build + requires: + - onestop-python-client-test - onestop-sme-build + requires: + - onestop-python-client-build - onestop-s3-handler-build - + requires: + - onestop-python-client-build From 1beb0bc6f9ef339352c73eb6f209208673a09b50 Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 7 Jun 2021 16:22:22 -0600 Subject: [PATCH 080/129] 1507-Changed circleci config - added colons at end of jobs that had requires field. --- .circleci/config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a5f0dde..c8ea89b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -154,12 +154,12 @@ workflows: # - cli-test # - cli-build - onestop-python-client-test - - onestop-python-client-build + - onestop-python-client-build: requires: - onestop-python-client-test - - onestop-sme-build + - onestop-sme-build: requires: - onestop-python-client-build - - onestop-s3-handler-build + - onestop-s3-handler-build: requires: - onestop-python-client-build From 19dc282ce9aa313bc46431533c5eb583e17f453e Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 7 Jun 2021 16:34:04 -0600 Subject: [PATCH 081/129] 1507-Making a visual change to onestop-python-client code to test new build requires configuration. --- onestop-python-client/onestop/util/SqsHandlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index 08f1d05..ce3ca60 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -68,7 +68,7 @@ def upload(records, log_level='INFO'): bucket = rec['s3']['bucket']['name'] logger.info("BUCKET: %s"%bucket) - # Fetch the object to get the uuid + # Fetch the object's uuid from cloud object, if exists. s3_resource = s3_utils.connect('resource', 's3', None) object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) if object_uuid is not None: From 91ba8f3b53b2eea74b80f22fe82d22684154bc7b Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 11:45:34 -0600 Subject: [PATCH 082/129] 1507-Changed SqsHandlers to not serialize the json payload but instead do a json dumps. Was complaining ParsedRecord doesn't have method serialize. Fixed tests, also added size and versionid to test util message since S3MessageAdapter.transform required. --- .../onestop/util/SqsHandlers.py | 13 +++++---- .../test/unit/test_SqsHandlers.py | 28 +++++++------------ onestop-python-client/test/utils.py | 6 +++- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index ce3ca60..9170f8d 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -1,4 +1,7 @@ +import json + from onestop.util.ClientLogger import ClientLogger +from onestop.schemas.util.jsonEncoder import EnumEncoder def create_delete_handler(web_publisher): """ @@ -56,7 +59,7 @@ def create_upload_handler(web_publisher, s3_utils, s3_message_adapter): :param: s3ma: S3MessageAdapter object """ - def upload(records, log_level='INFO'): + def upload(records, log_level='DEBUG'): logger = ClientLogger.get_logger('SqsHandlers.create_upload_handler.upload', log_level, False) logger.info("In create_upload_handler.upload() handler") logger.debug("Records: %s"%records) @@ -80,17 +83,17 @@ def upload(records, log_level='INFO'): object_uuid = s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) # Convert s3 message to IM message - json_payload = s3_message_adapter.transform(records) + im_message = s3_message_adapter.transform(records) + json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder) logger.debug('transformed message, json_payload: %s'%json_payload) # Send the message to registry - payload = json_payload.serialize() method = 'PATCH' # Backup location should be patched if not backup within bucket name if "backup" not in bucket: method = 'POST' - logger.debug('web_publisher.publish_registry method using "%s" with payload %s'%(method,payload)) - registry_response = web_publisher.publish_registry("granule", object_uuid, payload, method) + logger.debug('web_publisher.publish_registry method using "%s" with payload %s'%(method,json_payload)) + registry_response = web_publisher.publish_registry("granule", object_uuid, json_payload, method) logger.debug('web_publisher.publish_registry response=%s'%registry_response) logger.debug('web_publisher.publish_registry response json=%s'%registry_response.json()) diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py index cd6a3ad..b9e2894 100644 --- a/onestop-python-client/test/unit/test_SqsHandlers.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -11,6 +11,7 @@ from onestop.util.SqsConsumer import SqsConsumer from onestop.util.SqsHandlers import create_delete_handler from onestop.util.SqsHandlers import create_upload_handler +from onestop.schemas.util.jsonEncoder import EnumEncoder class test_SqsHandler(unittest.TestCase): @@ -219,8 +220,7 @@ def test_delete_handler_eventName_not_delete_ends_cb(self, mock_wp, mock_respons @mock_sqs @patch('onestop.WebPublisher') @patch('onestop.util.S3Utils') - @patch('onestop.util.S3MessageAdapter') - def test_upload_handler_happy(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): + def test_upload_handler_happy(self, mock_s3_utils, mock_wp): bucket = self.bucket key = self.key queue_name = 'test_queue' @@ -236,9 +236,7 @@ def test_upload_handler_happy(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp) MessageBody=json.dumps(message) ) - records = json.loads(message['Message'])['Records'] - records_transformed = mock_s3_msg_adapter.transform(records) - cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + cb = create_upload_handler(mock_wp, mock_s3_utils, self.s3_message_adapter) self.sqs_consumer.receive_messages(sqs_queue, 1, cb) # Verify get uuid called @@ -249,13 +247,11 @@ def test_upload_handler_happy(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp) key) # Verify uuid not added mock_s3_utils.add_uuid_metadata.assert_not_called() - # Verify transform called - mock_s3_msg_adapter.transform.assert_called_with(records) - # Verify publish called + # Verify publish called & transform called mock_wp.publish_registry.assert_called_with( 'granule', - mock_s3_utils.get_uuid_metadata(mock_s3_utils.connect('s3_resource', None), bucket, key), - records_transformed.serialize(), + mock_s3_utils.get_uuid_metadata(), + json.dumps(self.s3_message_adapter.transform(json.loads(message['Message'])['Records']).to_dict(), cls=EnumEncoder), 'POST' ) @@ -293,9 +289,8 @@ def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_s3_msg_adapter, mock @mock_sqs @patch('onestop.WebPublisher') @patch('onestop.util.S3Utils') - @patch('onestop.util.S3MessageAdapter') - def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): - bucket = "testing_backup_bucket" + def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_wp): + bucket = "testing_backup_bucket" # backup in bucket means a PATCH should happen. key = self.key queue_name = 'test_queue' sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) @@ -310,10 +305,7 @@ def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_s3_msg_ MessageBody=json.dumps(message) ) - mock_s3_utils.get_uuid_metadata.return_value = None - records = json.loads(message['Message'])['Records'] - records_transformed = mock_s3_msg_adapter.transform(records) - cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + cb = create_upload_handler(mock_wp, mock_s3_utils, self.s3_message_adapter) self.sqs_consumer.receive_messages(sqs_queue, 1, cb) @@ -321,7 +313,7 @@ def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_s3_msg_ mock_wp.publish_registry.assert_called_with( 'granule', mock_s3_utils.get_uuid_metadata(), - records_transformed.serialize(), + json.dumps(self.s3_message_adapter.transform(json.loads(message['Message'])['Records']).to_dict(), cls=EnumEncoder), 'PATCH' ) diff --git a/onestop-python-client/test/utils.py b/onestop-python-client/test/utils.py index 9cb7913..fc124fb 100644 --- a/onestop-python-client/test/utils.py +++ b/onestop-python-client/test/utils.py @@ -26,7 +26,11 @@ def create_delete_message(region, bucket, key): "bucket": {"name": "''' + bucket + '''", "ownerIdentity": {"principalId": "AX8TWPQYA8JEM"}, "arn": "arn:aws:s3:::''' + bucket + '''"}, - "object": {"key": "''' + key + '''", "sequencer": "005FD7D1765F04D8BE"} + "object": {"key": "''' + key + '''", + "sequencer": "005FD7D1765F04D8BE", + "eTag": "44d2452e8bc2c8013e9c673086fbab7a", + "size": 1385, + "versionId": "q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf"} } }] }''', From d29e7a57045827e490040e5da5452eba7ba8c32d Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 13:27:35 -0600 Subject: [PATCH 083/129] 1507-Changed helm*/values.yml onestop and registry urls from onestop* to os*, is url on cedardevs. --- helm/onestop-sqs-consumer/values.yaml | 4 ++-- helm/sme-chart/values.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index 0baaf1e..bc0f8fb 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -78,8 +78,8 @@ config: |- # COLLECTION or GRANULE metadata_type: GRANULE - registry_base_url: http://onestop-registry:80 - onestop_base_url: http://onestop-search:8080 + registry_base_url: http://os-registry:80 + onestop_base_url: http://os-search:8080 prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index 58678a3..3fc6922 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -35,8 +35,8 @@ config: |- # COLLECTION or GRANULE metadata_type: GRANULE - registry_base_url: http://onestop-registry:80 - onestop_base_url: http://onestop-search:8080 + registry_base_url: http://os-registry:80 + onestop_base_url: http://os-search:8080 prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' From 3beefc398ae2820c13ab2548e53b77323c63b5d0 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 16:11:35 -0600 Subject: [PATCH 084/129] 1507-Added passing in log level to callback method in KafkaConsumer. Adjusted some log statements. --- onestop-python-client/onestop/KafkaConsumer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 0481af9..5359a29 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -97,6 +97,7 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r if self.metadata_type not in ['COLLECTION', 'GRANULE']: raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type)) + self.log_level = log_level self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) @@ -196,11 +197,12 @@ def consume(self, metadata_consumer, handler): self.logger.info('No Messages') continue - self.logger.debug("Message key="+str(msg.key())+" value="+str(msg.value())) key = msg.key() value = msg.value() + self.logger.debug('Message key=%s'%key) + self.logger.debug('Message value=%s'%value) - handler(key, value) + handler(key, value, self.log_level) finally: self.logger.debug("Closing metadata_consumer") metadata_consumer.close() From ffaf33fef5c7d01527e6499986c17b0cb02d650f Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 20:05:55 -0600 Subject: [PATCH 085/129] 1507-Removed try/finally from the KafkaConsumer.consume() since our code shouldn't be able to throw any exceptions, the ones that could happen you want to bubble up (like connection problems). Moved the closing of connection after while polling section, since was closing the connection. --- .../onestop/KafkaConsumer.py | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 5359a29..18a84cf 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -189,20 +189,19 @@ def consume(self, metadata_consumer, handler): """ self.logger.info('Consuming from topic') while True: - try: - msg = metadata_consumer.poll(10) - self.logger.debug("Message received: "+str(msg)) - - if msg is None: - self.logger.info('No Messages') - continue - - key = msg.key() - value = msg.value() - self.logger.debug('Message key=%s'%key) - self.logger.debug('Message value=%s'%value) - - handler(key, value, self.log_level) - finally: - self.logger.debug("Closing metadata_consumer") - metadata_consumer.close() + msg = metadata_consumer.poll(10) + self.logger.debug("Message received: "+str(msg)) + + if msg is None: + self.logger.info('No Messages') + continue + + key = msg.key() + value = msg.value() + self.logger.debug('Message key=%s'%key) + self.logger.debug('Message value=%s'%value) + + handler(key, value, self.log_level) + + self.logger.debug("Closing metadata_consumer") + metadata_consumer.close() From 880b8588f3d6cc4b06abe3536072e4e0b6875172 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 20:08:51 -0600 Subject: [PATCH 086/129] 1507-Narrowed down try/except code in the SqsConsumer.consume() so if there's a problem with the callback it gets thrown - also the sqs_message.delete is outside try so if a connection problem that ends everything. --- .../onestop/util/SqsConsumer.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index 1972cc6..d784734 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -80,31 +80,31 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): self.logger.debug("Messages: %s" % sqs_messages) for sqs_message in sqs_messages: - try: - # Log start time - dt_start = datetime.now(tz=timezone.utc) - self.logger.info("Starting processing message") - self.logger.debug("Message: %s" % sqs_message) - self.logger.debug("Message body: %s" % sqs_message.body) + # Log start time + dt_start = datetime.now(tz=timezone.utc) + self.logger.info("Starting message processing") + self.logger.debug("Message: %s" % sqs_message) + self.logger.debug("Message body: %s" % sqs_message.body) + try: message_body = json.loads(sqs_message.body) self.logger.debug("Message body message: %s" % message_body['Message']) message_content = json.loads(message_body['Message']) - - if 'Records' in message_content: - recs = message_content['Records'] - self.logger.debug('Message "Records": %s' % recs) - cb(recs, self.log_level) - else: - self.logger.info("s3 event message without 'Records' content received.") - - dt_end = datetime.now(tz=timezone.utc) - processing_time = dt_end - dt_start - self.logger.info("Completed processing the message in %s seconds."%(processing_time.microseconds / 1000000)) - - sqs_message.delete() - self.logger.info("The SQS message has been deleted.") except: self.logger.exception( "An exception was thrown while processing a message, but this program will continue. The " "message will not be deleted from the SQS queue. The message was: %s" % sqs_message) + + if 'Records' in message_content: + recs = message_content['Records'] + self.logger.debug('Message "Records": %s' % recs) + cb(recs, self.log_level) + else: + self.logger.info("s3 event message without 'Records' content received.") + + dt_end = datetime.now(tz=timezone.utc) + processing_time = dt_end - dt_start + self.logger.info("Completed processing the message in %s seconds."%(processing_time.microseconds / 1000000)) + + sqs_message.delete() + self.logger.info("The SQS message has been deleted.") From dd1d5f42fc95b7e55d6fc3523ec64e72ff68bd2f Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 20:39:56 -0600 Subject: [PATCH 087/129] 1507-Fixed test for a KafkaConsumer call back that now includes log level. --- onestop-python-client/test/unit/test_KafkaConsumer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/test/unit/test_KafkaConsumer.py b/onestop-python-client/test/unit/test_KafkaConsumer.py index 5404c31..6106738 100644 --- a/onestop-python-client/test/unit/test_KafkaConsumer.py +++ b/onestop-python-client/test/unit/test_KafkaConsumer.py @@ -281,7 +281,7 @@ def test_consume(self, mock_metadata_consumer, mock_message): # Verify callback function was called once with expected message attributes mock_handler.assert_called_once() - mock_handler.assert_called_with(mock_message_key, mock_message_value) + mock_handler.assert_called_with(mock_message_key, mock_message_value, self.conf_w_security['log_level']) if __name__ == '__main__': unittest.main() \ No newline at end of file From 2d8025a0338a4aba29cd6733275868bf0e5015f8 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 8 Jun 2021 20:40:50 -0600 Subject: [PATCH 088/129] 1507-Fixed a test missed related to commit of changed SqsHandlers to not serialize the json payload but instead do a json dumps. --- onestop-python-client/test/unit/test_SqsHandlers.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py index b9e2894..5bba184 100644 --- a/onestop-python-client/test/unit/test_SqsHandlers.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -258,8 +258,7 @@ def test_upload_handler_happy(self, mock_s3_utils, mock_wp): @mock_sqs @patch('onestop.WebPublisher') @patch('onestop.util.S3Utils') - @patch('onestop.util.S3MessageAdapter') - def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_s3_msg_adapter, mock_wp): + def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_wp): bucket = self.bucket key = self.key queue_name = 'test_queue' @@ -276,7 +275,7 @@ def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_s3_msg_adapter, mock ) mock_s3_utils.get_uuid_metadata.return_value = None - cb = create_upload_handler(mock_wp, mock_s3_utils, mock_s3_msg_adapter) + cb = create_upload_handler(mock_wp, mock_s3_utils, self.s3_message_adapter) self.sqs_consumer.receive_messages(sqs_queue, 1, cb) From 7e6b72daf60d7cb7f5aa221b0b12c8d82a70e669 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 9 Jun 2021 09:40:20 -0600 Subject: [PATCH 089/129] 1507-Updated scripts to work with new class constructors that take a dict instead of config location. --- scripts/archive_client_integration.py | 96 ++++---- scripts/bucket_automation.py | 89 +++++--- scripts/launch_e2e.py | 197 ++++++---------- scripts/launch_kafka_publisher.py | 18 +- scripts/launch_pyconsumer.py | 135 ++++++----- scripts/sme/sme.py | 213 ++++++++++-------- scripts/sme/smeFunc.py | 75 ++---- .../s3_notification_handler.py | 164 +++++++------- 8 files changed, 495 insertions(+), 492 deletions(-) diff --git a/scripts/archive_client_integration.py b/scripts/archive_client_integration.py index 2831045..be672f8 100644 --- a/scripts/archive_client_integration.py +++ b/scripts/archive_client_integration.py @@ -1,64 +1,74 @@ import argparse -from onestop.util.S3Utils import S3Utils - - -def handler(): - ''' - Simultaneously upload files to main bucket 'noaa-nccf-dev' in us-east-2 and glacier in cross region bucket 'noaa-nccf-dev-archive' in us-west-2. - - :return: str - Returns response from boto3 indicating if upload was successful. - ''' - print("Handler...") +import yaml +import os - # config for s3 low level api for us-east-2 - s3 = s3_utils.connect('s3', s3_utils.conf['s3_region']) - bucket_name = s3_utils.conf['s3_bucket'] - - # config for s3 low level api cross origin us-west-2 - s3_cross_region = s3_utils.connect('s3', s3_utils.conf['s3_region2']) - bucket_name_cross_region = s3_utils.conf['s3_bucket2'] - - overwrite = True - - # Add 3 files to bucket - local_files = ["file1.csv", "file2.csv"] - s3_file = None - for file in local_files: - local_file = "tests/data/" + file - # changed the key for testing - s3_file = "public/NESDIS/CSB/" + file - s3_utils.upload_s3(s3, local_file, bucket_name, s3_file, overwrite) +from onestop.util.S3Utils import S3Utils - # Upload file to cross region bucket then transfer to glacier right after - s3_utils.upload_s3(s3_cross_region, local_file, bucket_name_cross_region, s3_file, overwrite) - s3_utils.s3_to_glacier(s3_cross_region, bucket_name_cross_region, s3_file) +config_dict = {} if __name__ == '__main__': + # Example command: python3 archive_client_integration.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + # python3 archive_client_integration.py -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml parser = argparse.ArgumentParser(description="Launches archive client integration") - parser.add_argument('-conf', dest="conf", required=True, + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="AWS config filepath") - parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") args = vars(parser.parse_args()) - # Get configuration file path locations + # Generate configuration dictionary conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) # Upload a test file to s3 bucket - s3_utils = S3Utils(conf_loc, cred_loc) - - handler() - - - - + s3_utils = S3Utils(**config_dict) - + s3 = s3_utils.connect('client', 's3', config_dict['s3_region']) + # config for s3 low level api cross origin us-west-2 + s3_cross_region = s3_utils.connect('client', 's3', config_dict['s3_region2']) + bucket_name_cross_region = config_dict['s3_bucket2'] + overwrite = True + # Files to upload - TODO: User should change these paths. + local_files = ["/scripts/data/file1.csv", "/scripts/data/file2.csv"] + for file in local_files: + print("Uploading file: %s"%file) + # changed the key for testing + s3_file = "public/NESDIS/CSB/" + file + upload = s3_utils.upload_s3(s3, file, config_dict['s3_bucket'], s3_file, overwrite) + if not upload: + raise Exception("Unknown, upload to s3 failed.") + # Upload file to cross region bucket then transfer to glacier right after + upload = s3_utils.upload_s3(s3_cross_region, file, bucket_name_cross_region, s3_file, overwrite) + if not upload: + raise Exception("Unknown, upload to s3 failed.") + s3_utils.s3_to_glacier(s3_cross_region, bucket_name_cross_region, s3_file) diff --git a/scripts/bucket_automation.py b/scripts/bucket_automation.py index a64f11c..5c922ee 100644 --- a/scripts/bucket_automation.py +++ b/scripts/bucket_automation.py @@ -1,7 +1,12 @@ import argparse import json +import os +import yaml + from onestop.util.S3Utils import S3Utils +config_dict = {} + def handler(): ''' Creates bucket with defined key paths @@ -10,43 +15,42 @@ def handler(): Returns boto3 response indicating if bucket creation was successful ''' # connect to low level api - s3 = s3_utils.connect("s3", s3_utils.conf['s3_region']) + s3 = s3_utils.connect('client', 's3', config_dict['s3_region']) # use s3_resource api to check if the bucket exists - s3_resource = s3_utils.connect("s3_resource", s3_utils.conf['s3_region']) + s3_resource = s3_utils.connect('resource', 's3', config_dict['s3_region']) # Create bucket name bucket_name = "noaa-nccf-dev" - # checks to see if the bucket is already created, if it isn't create yet then it will create the bucket, set bucket policy, and create key paths + # Create bucket policy + bucket_policy = { + "Version": "2012-10-17", + "Id": "noaa-nccf-dev-policy", + "Statement": [ + { + "Sid": "PublicRead", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": f'arn:aws:s3:::{bucket_name}/public/*' + }] + } + # Convert the policy from JSON dict to string + bucket_policy_str = json.dumps(bucket_policy) + + # checks to see if the bucket is already created, if it isn't create it, then it will create the bucket, set bucket policy, and create key paths if not s3_resource.Bucket(bucket_name) in s3_resource.buckets.all(): """ - Create bucket - need to specify bucket location for every region except us-east-1 -> https://github.com/aws/aws-cli/issues/2603 """ s3.create_bucket(Bucket=bucket_name, - CreateBucketConfiguration={'LocationConstraint': s3_utils.conf['s3_region']}, + CreateBucketConfiguration={'LocationConstraint': config_dict['s3_region']}, ObjectLockEnabledForBucket=True) - # Create bucket policy - bucket_policy = { - "Version": "2012-10-17", - "Id": "noaa-nccf-dev-policy", - "Statement": [ - { - "Sid": "PublicRead", - "Effect": "Allow", - "Principal": "*", - "Action": "s3:GetObject", - "Resource": f'arn:aws:s3:::{bucket_name}/public/*' - }] - } - - # Convert the policy from JSON dict to string - bucket_policy = json.dumps(bucket_policy) - # Set new bucket policy - s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy) + s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy_str) """ - Create Public Key Paths @@ -86,6 +90,9 @@ def handler(): s3.put_object(Bucket=bucket_name, Body='', Key='private/OMAO/') s3.put_object(Bucket=bucket_name, Body='', Key='private/OAR/') + else: + #Set bucket policy + s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy_str) # Set CORS bucket config cors_config = { @@ -109,12 +116,6 @@ def handler(): } s3.put_bucket_cors(Bucket=bucket_name, CORSConfiguration=cors_config) - # Convert the policy from JSON dict to string - bucket_policy = json.dumps(bucket_policy) - - #Set new bucket policy - s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy) - """ - Set ACL for public read """ @@ -131,18 +132,42 @@ def handler(): if __name__ == '__main__': parser = argparse.ArgumentParser(description="Launches e2e test") - parser.add_argument('-conf', dest="conf", required=True, + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="AWS config filepath") - parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") args = vars(parser.parse_args()) - # Get configuration file path locations + # Generate configuration dictionary conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) # Create S3Utils instance - s3_utils = S3Utils(conf_loc, cred_loc) + s3_utils = S3Utils(**config_dict) handler() \ No newline at end of file diff --git a/scripts/launch_e2e.py b/scripts/launch_e2e.py index 6d60b2c..820fd22 100644 --- a/scripts/launch_e2e.py +++ b/scripts/launch_e2e.py @@ -7,11 +7,12 @@ from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.WebPublisher import WebPublisher -from onestop.extract.CsbExtractor import CsbExtractor from onestop.schemas.util.jsonEncoder import EnumEncoder +from onestop.util.ClientLogger import ClientLogger +config_dict = {} -def handler(recs): +def handler(recs, log_level): ''' Processes metadata information from sqs message triggered by S3 event and uploads to registry through web publisher (https). Also uploads s3 object to glacier. @@ -22,50 +23,48 @@ def handler(recs): IM registry response and boto3 glacier response ''' - print("Handler...") + logger = ClientLogger.get_logger('launch_e2e.handler', log_level, False) + logger.info('In Handler') - # Now get boto client for object-uuid retrieval - object_uuid = None - bucket = None - print(recs) + # If record exists try to get object-uuid retrieval + logger.debug('Records:%s'%recs) if recs is None: - print("No records retrieved") + logger.info('No records retrieved, doing nothing.') + return + + rec = recs[0] + logger.debug('Record: %s'%rec) + bucket = rec['s3']['bucket']['name'] + s3_key = rec['s3']['object']['key'] + logger.info("Getting uuid") + s3_resource = s3_utils.connect('resource', 's3', None) + object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) + if object_uuid is not None: + logger.info('Retrieved object-uuid: %s'% object_uuid) else: - rec = recs[0] - print(rec) - bucket = rec['s3']['bucket']['name'] - s3_key = rec['s3']['object']['key'] - print("Getting uuid") - # High-level api - s3_resource = s3_utils.connect("s3_resource", None) - object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) - if object_uuid is not None: - print("Retrieved object-uuid: " + object_uuid) - else: - print("Adding uuid") - s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) + logger.info('UUID not found, adding uuid to bucket=%s key=%s'%(bucket, s3_key)) + s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) + s3ma = S3MessageAdapter(**config_dict) im_message = s3ma.transform(recs) - + logger.debug('S3MessageAdapter.transform: %s'%im_message) json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder) + logger.debug('S3MessageAdapter.transform.json dump: %s'%json_payload) - print(json_payload) - - + wp = WebPublisher(**config_dict) registry_response = wp.publish_registry("granule", object_uuid, json_payload, "POST") - #print(registry_response.json()) + logger.debug('publish_registry response: %s'%registry_response.json()) # Upload to archive file_data = s3_utils.read_bytes_s3(s3_client, bucket, s3_key) - glacier = s3_utils.connect("glacier", cloud_conf['s3_region']) - vault_name = cloud_conf['vault_name'] - + glacier = s3_utils.connect('client', 'glacier', config_dict['s3_region']) + vault_name = config_dict['vault_name'] resp_dict = s3_utils.upload_archive(glacier, vault_name, file_data) - - print("archiveLocation: " + resp_dict['location']) - print("archiveId: " + resp_dict['archiveId']) - print("sha256: " + resp_dict['checksum']) + logger.debug('Upload response: %s'%resp_dict) + logger.info('upload archived location: %s'% resp_dict['location']) + logger.info('archiveId: %s'% resp_dict['archiveId']) + logger.info('sha256: %s'% resp_dict['checksum']) addlocPayload = { "fileLocations": { @@ -82,116 +81,60 @@ def handler(recs): json_payload = json.dumps(addlocPayload, indent=2) # Send patch request next with archive location registry_response = wp.publish_registry("granule", object_uuid, json_payload, "PATCH") - + logger.debug('publish to registry response: %s'% registry_response) + logger.info('Finished publishing to registry.') if __name__ == '__main__': - """ parser = argparse.ArgumentParser(description="Launches e2e test") - parser.add_argument('-conf', dest="conf", required=True, + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="AWS config filepath") parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") args = vars(parser.parse_args()) - # Get configuration file path locations - conf_loc = args.pop('conf') - cred_loc = args.pop('cred') - # Upload a test file to s3 bucket - s3_utils = S3Utils(conf_loc, cred_loc) - # Low-level api ? Can we just use high level revisit me! - s3 = s3_utils.connect("s3", None) - registry_user = os.environ.get("REGISTRY_USERNAME") - registry_pwd = os.environ.get("REGISTRY_PASSWORD") - print(registry_user) - access_key = os.environ.get("AWS_ACCESS") - access_secret = os.environ.get("AWS_SECRET") - print(access_key) - - # High-level api - s3_resource = s3_utils.connect("s3_resource", None) - bucket = cloud_conf['s3_bucket'] - overwrite = True - sqs_max_polls = cloud_conf['sqs_max_polls'] - # Add 3 files to bucket - local_files = ["file1.csv", "file4.csv"] - s3_file = None - for file in local_files: - local_file = "tests/data/" + file - s3_file = "csv/" + file - s3_utils.upload_s3(s3, local_file, bucket, s3_file, overwrite) - # Receive s3 message and MVM from SQS queue - sqs_consumer = SqsConsumer(conf_loc, cred_loc) - s3ma = S3MessageAdapter("scripts/config/csb-data-stream-config.yml", s3_utils) - # Retrieve data from s3 object - #csb_extractor = CsbExtractor() - wp = WebPublisher("config/web-publisher-config-dev.yml", cred_loc) - queue = sqs_consumer.connect() - try: - debug = False - sqs_consumer.receive_messages(queue, sqs_max_polls, handler) - except Exception as e: - print("Message queue consumption failed: {}".format(e)) - """ - parser = argparse.ArgumentParser(description="Launches e2e test") - parser.add_argument('-conf', dest="conf", required=True, - help="AWS config filepath") - - parser.add_argument('-cred', dest="cred", required=True, - help="Credentials filepath") - args = vars(parser.parse_args()) - - # Get configuration file path locations + # Generate configuration dictionary conf_loc = args.pop('conf') - cred_loc = args.pop('cred') - stream_conf_loc = args.pop('cred') - - with open(os.path.abspath(os.path.join(os.path.dirname(__file__), cred_loc))) as f: - cred = yaml.load(f, Loader=yaml.FullLoader) - with open(os.path.abspath(os.path.join(os.path.dirname(__file__), conf_loc))) as f: - cloud_conf = yaml.load(f, Loader=yaml.FullLoader) - with open(os.path.abspath(os.path.join(os.path.dirname(__file__), stream_conf_loc))) as f: - stream_conf = yaml.load(f, Loader=yaml.FullLoader) - - s3_utils = S3Utils(cred['sandbox']['access_key'], - cred['sandbox']['secret_key'], - "DEBUG") - - bucket = cloud_conf['s3_bucket'] - sqs_max_polls = cloud_conf['sqs_max_polls'] - - #Source - access_bucket = stream_conf['access_bucket'] + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) - #Onestop related - file_id_prefix = stream_conf['file_identifier_prefix'] - file_format = stream_conf['format'] - headers = stream_conf['headers'] - type = stream_conf['type'] - - # Low-level api ? Can we just use high level revisit me! - s3_client = s3_utils.connect("s3", None) + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + + s3_utils = S3Utils(**config_dict) + s3_client = s3_utils.connect('client', 's3', config_dict['s3_region']) # Upload test files to s3 bucket local_files = ["file1.csv", "file4.csv"] s3_file = None for file in local_files: - local_file = "data/" + file + local_file = "scripts/data/" + file # s3_file = "csv/" + file - s3_file = "NESDIS/CSB/" + file - if not s3_utils.upload_s3(s3_client, local_file, bucket, s3_file, True): + s3_file = "public/" + file + if not s3_utils.upload_s3(s3_client, local_file, config_dict['s3_bucket'], s3_file, True): exit("Error setting up for e2e: The test files were not uploaded to the s3 bucket therefore the tests cannot continue.") - - # Receive s3 message and MVM from SQS queue - sqs_consumer = SqsConsumer(conf_loc, cred_loc) - s3ma = S3MessageAdapter(access_bucket, headers, type, file_id_prefix, "DEBUG") - wp = WebPublisher("config/web-publisher-config-dev.yml", cred_loc) - - queue = sqs_consumer.connect() - try: - debug = False - sqs_consumer.receive_messages(queue, sqs_max_polls, handler) - - except Exception as e: - print("Message queue consumption failed: {}".format(e)) + sqs_consumer = SqsConsumer(**config_dict) + sqs_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region']) + queue = sqs_consumer.connect(sqs_resource, config_dict['sqs_name']) + sqs_consumer.receive_messages(queue, config_dict['sqs_max_polls'], handler) diff --git a/scripts/launch_kafka_publisher.py b/scripts/launch_kafka_publisher.py index f4a853d..85283c2 100644 --- a/scripts/launch_kafka_publisher.py +++ b/scripts/launch_kafka_publisher.py @@ -1,17 +1,21 @@ import argparse +import yaml + from onestop.KafkaPublisher import KafkaPublisher if __name__ == '__main__': ''' Uploads collection to Kafka collection topic ''' - parser = argparse.ArgumentParser(description="Launches KafkaPublisher to publish kafkda topics") - parser.add_argument('-conf', dest="conf", required=True, + parser = argparse.ArgumentParser(description="Launches KafkaPublisher to publish kafka topics") + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="Config filepath") args = vars(parser.parse_args()) conf_loc = args.pop('conf') + with open(conf_loc) as f: + conf = yaml.load(f, Loader=yaml.FullLoader) # "discovery": # { @@ -22,13 +26,19 @@ # FileIdentifier: gov.noaa.ngdc.mgg.dem:yaquina_bay_p210_30m collection_uuid = '3ee5976e-789a-41d5-9cae-d51e7b92a247' content_dict = {'discovery': {'title': 'My Extra New Title!', - 'fileIdentifier': 'gov.noaa.osim2.mgg.dem:yaquina_bay_p210_30m' + 'fileIdentifier': 'gov.noaa.osim2.mgg.dem:yaquina_bay_p210_30m', + "links": [ + { + "linkFunction": "download", "linkName": "Amazon S3", "linkProtocol": "HTTPS", + "linkUrl": "https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2" + } + ] } } # method one of POST, PUT, PATCH, DELETE method = 'POST' #Update - kafka_publisher = KafkaPublisher(conf_loc) + kafka_publisher = KafkaPublisher(**conf) metadata_producer = kafka_publisher.connect() kafka_publisher.publish_collection(metadata_producer, collection_uuid, content_dict, method) diff --git a/scripts/launch_pyconsumer.py b/scripts/launch_pyconsumer.py index 7850f38..5d22317 100644 --- a/scripts/launch_pyconsumer.py +++ b/scripts/launch_pyconsumer.py @@ -1,13 +1,18 @@ +import argparse import os import yaml +import json from onestop.util.SqsConsumer import SqsConsumer from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.WebPublisher import WebPublisher +from onestop.util.ClientLogger import ClientLogger +from onestop.schemas.util.jsonEncoder import EnumEncoder +config_dict = {} -def handler(recs): +def handler(recs, log_level): ''' Processes metadata information from sqs message triggered by S3 event and uploads to registry through web publisher (https). Utilizes helm for credentials and conf. @@ -17,77 +22,83 @@ def handler(recs): :return: str IM registry response ''' - print("Handling message...") + logger = ClientLogger.get_logger('launch_pyconsumer.handler', log_level, False) + logger.info('In Handler') # Now get boto client for object-uuid retrieval object_uuid = None if recs is None: - print("No records retrieved") - else: - rec = recs[0] - bucket = rec['s3']['bucket']['name'] - s3_key = rec['s3']['object']['key'] + logger.info('No records retrieved, doing nothing.') + return + + rec = recs[0] + bucket = rec['s3']['bucket']['name'] + s3_key = rec['s3']['object']['key'] - # Fetch the object to get the uuid - object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) + # Fetch the object to get the uuid + logger.info("Getting uuid") + s3_resource = s3_utils.connect('resource', 's3', None) + object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) - if object_uuid is not None: - print("Retrieved object-uuid: " + object_uuid) - else: - print("Adding uuid") - s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) + if object_uuid is not None: + logger.info('Retrieved object-uuid: %s'% object_uuid) + else: + logger.info('UUID not found, adding uuid to bucket=%s key=%s'%(bucket, s3_key)) + s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) # Convert s3 message to IM message - s3ma = S3MessageAdapter(conf_loc, s3_utils) - json_payload = s3ma.transform(recs) + s3ma = S3MessageAdapter(**config_dict) + im_message = s3ma.transform(recs) + logger.debug('S3MessageAdapter.transform: %s'%im_message) + json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder) + logger.debug('S3MessageAdapter.transform.json dump: %s'%json_payload) #Send the message to Onestop - wp = WebPublisher(conf_loc, cred_loc) - registry_response = wp.publish_registry("granule", object_uuid, json_payload.serialize(), "POST") - print("RESPONSE: ") - print(registry_response.json()) + wp = WebPublisher(**config_dict) + registry_response = wp.publish_registry("granule", object_uuid, json_payload, "POST") + logger.debug('publish_registry response: %s'%registry_response.json()) if __name__ == '__main__': - conf_loc = "/etc/config/config.yml" - cred_loc = "creds.yml" - with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "creds.yml"))) as f: - cred = yaml.load(f, Loader=yaml.FullLoader) - with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "/etc/config/config.yml"))) as f: - conf = yaml.load(f, Loader=yaml.FullLoader) - - registry_user = os.environ.get("REGISTRY_USERNAME") - registry_pwd = os.environ.get("REGISTRY_PASSWORD") - access_key = os.environ.get("ACCESS_KEY") - access_secret = os.environ.get("SECRET_KEY") - - f = open(cred_loc, "w+") - -#write creds to a file to avoid changing the python library - s = """sandbox: - access_key: {key} - secret_key: {secret} - -registry: - username: {user} - password: {pw} - """.format(key=access_key, secret=access_secret, user=registry_user, pw=registry_pwd) - f.write(s) - f.close() - r = open(cred_loc, "r") - - # # Receive s3 message and MVM from SQS queue - s3_utils = S3Utils(cred['sandbox']['access_key'], - cred['sandbox']['secret_key'], - "DEBUG") - sqs_max_polls = conf['sqs_max_polls'] - sqs_consumer = SqsConsumer(conf_loc, cred_loc) - queue = sqs_consumer.connect() - - try: - debug = False - # # Pass in the handler method - sqs_consumer.receive_messages(queue, sqs_max_polls, handler) - - except Exception as e: - print("Message queue consumption failed: {}".format(e)) + parser = argparse.ArgumentParser(description="Launches e2e test") + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', + help="AWS config filepath") + parser.add_argument('-cred', dest="cred", required=True, + help="Credentials filepath") + args = vars(parser.parse_args()) + + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + + s3_utils = S3Utils(**config_dict) + + # Receive s3 message and MVM from SQS queue + sqs_consumer = SqsConsumer(**config_dict) + sqs_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region']) + queue = sqs_consumer.connect(sqs_resource, config_dict['sqs_name']) + sqs_consumer.receive_messages(queue, config_dict['sqs_max_polls'], handler) diff --git a/scripts/sme/sme.py b/scripts/sme/sme.py index 6509aa3..12f7859 100644 --- a/scripts/sme/sme.py +++ b/scripts/sme/sme.py @@ -1,6 +1,7 @@ -import argparse import json import os +import yaml +import argparse from onestop.extract.CsbExtractor import CsbExtractor from onestop.KafkaConsumer import KafkaConsumer @@ -9,13 +10,16 @@ from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.point import Point from onestop.schemas.geojsonSchemaClasses.point_type import PointType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord -from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm +#from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.temporal_bounding import TemporalBounding from onestop.schemas.util.jsonEncoder import EnumEncoder, as_enum, EnumEncoderValue from onestop.KafkaPublisher import KafkaPublisher -from spatial import script_generation, postgres_insert +#from spatial import script_generation, postgres_insert +from onestop.util.ClientLogger import ClientLogger -def handler(key, value): +config_dict = {} + +def handler(key, value, log_level = 'INFO'): ''' Consumes message from psi-input-unknown, extracts geospatial data, uploads new payload to parsed-record topic in kafka, and uploads geospatial data to Postgres @@ -27,99 +31,120 @@ def handler(key, value): :return: str returns response message from kafka ''' - print('Key:', key) - print('Value: ' ,value) # Grabs the contents of the message and turns the dict string into a dictionary using json.loads - try: - content_dict = json.loads(value['content'], object_hook=as_enum) - - parsed_record = ParsedRecord().from_dict(content_dict) - - # Geospatial Extraction - # Extract the bucket key for csb_extractor object initialization - bucket_key = content_dict['discovery']['links'][0]['linkUrl'].split('.com/')[1] - - csb_extractor = CsbExtractor(su, bucket_key) - if csb_extractor.is_csv(bucket_key): - geospatial = csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') - begin_date, end_date = geospatial['temporal'][0], geospatial['temporal'][1] - max_lon, max_lat, min_lon, min_lat = geospatial['geospatial'][2], geospatial['geospatial'][3], \ - geospatial['geospatial'][0], geospatial['geospatial'][1] - coords = csb_extractor.extract_coords(max_lon, max_lat, min_lon, min_lat) - - # Create spatial bounding types based on the given coords - pointType = PointType('Point') - point = Point(coordinates=coords[0], type=pointType) - - # Create temp bounding obj - tempBounding = TemporalBounding(beginDate=begin_date, endDate=end_date) - - # Update parsed record object with geospatial data - parsed_record.discovery.temporalBounding = tempBounding - parsed_record.discovery.spatialBounding = point - - """ - # Insert data into postgres - script = script_generation(coords[0], key) - postgres_insert(script) - """ - - # update content dict - parsed_record.type = value['type'] - content_dict = parsed_record.to_dict() - # reformat Relationship field - relationship_type = content_dict['relationships'][0]['type']['type'] - content_dict['relationships'][0]['type'] = relationship_type - - # reformat File Locations - filelocation_type = content_dict['fileLocations']['type']['type'] - content_dict['fileLocations']['type'] = filelocation_type - + logger = ClientLogger.get_logger('sme.handler', log_level, False) + logger.info('In Handler') + # This is an example for testing purposes. + value = { + "type": "granule", + "content": "{ \"discovery\": {\n \"fileIdentifier\": \"92ade5dc-946d-11ea-abe4-0242ac120004\",\n \"links\": [\n {\n \"linkFunction\": \"download\",\n \"linkName\": \"Amazon S3\",\n \"linkProtocol\": \"HTTPS\",\n \"linkUrl\": \"https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n }\n ],\n \"parentIdentifier\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"spatialBounding\": null,\n \"temporalBounding\": {\n \"beginDate\": \"2020-05-12T16:20:15.158Z\", \n \"endDate\": \"2020-05-12T16:21:51.494Z\"\n },\n \"title\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"fileInformation\": {\n \"checksums\": [{\"algorithm\": \"MD5\",\"value\": \"44d2452e8bc2c8013e9c673086fbab7a\"}]\n, \"optionalAttributes\":{}, \"format\": \"HSD\",\n \"name\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\",\n \"size\": 208918\n },\n \"fileLocations\": {\n \"optionalAttributes\":{}, \"uri\":\"//nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\", \"asynchronous\": false,\n \"deleted\": false,\n \"lastModified\": 1589300890000,\n \"locality\": \"us-east-1\",\n \"restricted\": false,\n \"serviceType\": \"Amazon:AWS:S3\",\n \"type\": {\"type\":\"ACCESS\"},\n \"uri\": \"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"relationships\": [\n {\n \"id\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"type\": {\"type\":\"COLLECTION\"}\n }\n ]\n }", + "contentType": "application/json", + "method": "PUT", + "source": "unknown", + "operation": "ADD" + } + logger.debug('content: %s'%value['content']) + + content_dict = json.loads(value['content'], object_hook=as_enum) + logger.debug('content_dict: %s'%content_dict) + parsed_record = ParsedRecord().from_dict(content_dict) + + # Geospatial Extraction + bucket_key = content_dict['discovery']['links'][0]['linkUrl'].split('.com/')[1] + logger.info("Bucket key="+bucket_key) + if CsbExtractor.is_csv(bucket_key): + logger.info('Extracting geospatial information') + sm_open_file = su.get_csv_s3(su.connect("session", None), config_dict['s3_bucket'], bucket_key) + geospatial = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME') + begin_date, end_date = geospatial['temporal'][0], geospatial['temporal'][1] + max_lon, max_lat, min_lon, min_lat = geospatial['geospatial'][2], geospatial['geospatial'][3], \ + geospatial['geospatial'][0], geospatial['geospatial'][1] + coords = CsbExtractor.extract_coords(sm_open_file, max_lon, max_lat, min_lon, min_lat) + + # Create spatial bounding types based on the given coords + pointType = PointType('Point') + point = Point(coordinates=coords[0], type=pointType) content_dict['discovery']['spatialBounding']['type'] = pointType.value - # Transform content_dict to appropiate payload - # cls=EnumEncoderValue argument looks for instances of Enum classes and extracts only the value of the Enum - content_dict = json.dumps(content_dict, cls=EnumEncoderValue) - content_dict = json.loads(content_dict) - - # Produce new information to kafka - kafka_publisher = KafkaPublisher("scripts/config/kafka-publisher-config-dev.yml") - metadata_producer = kafka_publisher.connect() - collection_id = parsed_record.relationships[0].id - kafka_publisher.publish_granule(metadata_producer, collection_id, collection_id, content_dict) - - except: - print('Invalid Format') - + # Create temp bounding obj + tempBounding = TemporalBounding(beginDate=begin_date, endDate=end_date) + + # Update parsed record object with geospatial data + parsed_record.discovery.temporalBounding = tempBounding + parsed_record.discovery.spatialBounding = point + + """ + # Insert data into postgres + script = script_generation(coords[0], key) + postgres_insert(script) + """ + else: + logger.info('Record not CSV - Skipping extracting geospatial information') + + # update content dict + parsed_record.type = value['type'] + content_dict = parsed_record.to_dict() + # reformat Relationship field + relationship_type = content_dict['relationships'][0]['type']['type'] + content_dict['relationships'][0]['type'] = relationship_type + + # reformat File Locations + filelocation_type = content_dict['fileLocations']['type']['type'] + content_dict['fileLocations']['type'] = filelocation_type + + # Transform content_dict to appropiate payload + # cls=EnumEncoderValue argument looks for instances of Enum classes and extracts only the value of the Enum + content_dict = json.dumps(content_dict, cls=EnumEncoderValue) + content_dict = json.loads(content_dict) + + # Produce new information to publish to kafka, TODO: Be wary of cyclical publish/consuming here, since the consumer calls this handler. + kafka_publisher = KafkaPublisher(**config_dict) + metadata_producer = kafka_publisher.connect() + collection_id = parsed_record.relationships[0].id + kafka_publisher.publish_granule(metadata_producer, collection_id, content_dict) if __name__ == '__main__': - # This is where helm will mount the config - conf_loc = "/etc/config/config.yml" - # this is where we are about to write the cred yaml - cred_loc = "creds.yml" - - registry_user = os.environ.get("REGISTRY_USERNAME") - registry_pwd = os.environ.get("REGISTRY_PASSWORD") - access_key = os.environ.get("ACCESS_KEY") - access_secret = os.environ.get("SECRET_KEY") - - f = open(cred_loc, "w+") - - # TODO revisit this when we make a standard that all scripts will follow - # write creds to a file to avoid changing the python library - s = """ - sandbox: - access_key: {key} - secret_key: {secret} - registry: - username: {user} - password: {pw} - """.format(key=access_key, secret=access_secret, user=registry_user, pw=registry_pwd) - f.write(s) - f.close() - r = open(cred_loc, "r") - - su = S3Utils(conf_loc, cred_loc) - kafka_consumer = KafkaConsumer(conf_loc) + # Example command: python3 sme.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + # python3 archive_client_integration.py -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + parser = argparse.ArgumentParser(description="Launches sme test") + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', + help="AWS config filepath") + parser.add_argument('-cred', dest="cred", required=True, + help="Credentials filepath") + args = vars(parser.parse_args()) + + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + + su = S3Utils(**config_dict) + + kafka_consumer = KafkaConsumer(**config_dict) metadata_consumer = kafka_consumer.connect() - kafka_consumer.consume(metadata_consumer, lambda k, v: handler(k, v)) \ No newline at end of file +# handler('', '', config_dict['log_level']) # For testing purposes + kafka_consumer.consume(metadata_consumer, handler) \ No newline at end of file diff --git a/scripts/sme/smeFunc.py b/scripts/sme/smeFunc.py index 084e15b..f07c7b6 100644 --- a/scripts/sme/smeFunc.py +++ b/scripts/sme/smeFunc.py @@ -1,8 +1,11 @@ +import yaml import argparse -import json from onestop.KafkaConsumer import KafkaConsumer +from onestop.util.ClientLogger import ClientLogger -def handler(key,value): +config_dict = {} + +def handler(key, value, log_level = 'INFO'): ''' Prints key, value pair of items in topic @@ -13,9 +16,10 @@ def handler(key,value): :return: None ''' - print(key) - print('VALUE-------') - print(value) + logger = ClientLogger.get_logger('smeFunc.handler', log_level, False) + logger.info('In Handler') + logger.info('key=%s value=%s'%(key, value)) + """ if (value['type'] == 'collection' or not bool(value['fileInformation'])): print(value['discovery']['fileIdentifier']) @@ -25,51 +29,20 @@ def handler(key,value): if __name__ == '__main__': - - kafka_consumer = KafkaConsumer("scripts/config/kafka-publisher-config-dev.yml") + # Example command: python3 smeFunc.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml + # python3 smeFunc.py + parser = argparse.ArgumentParser(description="Launches smeFunc test") + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', + help="AWS config filepath") + args = vars(parser.parse_args()) + + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + kafka_consumer = KafkaConsumer(**config_dict) kafka_consumer.granule_topic_consume = 'psi-granule-parsed' metadata_consumer = kafka_consumer.connect() - kafka_consumer.consume(metadata_consumer, lambda k, v: handler(k, v)) - """ - parser = argparse.ArgumentParser(description="Allows smeFunc to produce or consume messagges from kafkda topics") - parser.add_argument('-cmd', dest="command", required=True, - help="Command (produce/consume)") - - parser.add_argument('-b', dest="bootstrap.servers", required=True, - help="Bootstrap broker(s) (host[:port])") - parser.add_argument('-s', dest="schema.registry.url", required=True, - help="Schema Registry (http(s)://host[:port]") - parser.add_argument('-t', dest="topic", required=True, - help="Topic name") - parser.add_argument('-g', dest="group.id", required=False, - help="Consumer group") - parser.add_argument('-o', dest="auto.offset.reset", required=False, - help="offset") - - config = vars(parser.parse_args()) - topic = config.pop('topic') - cmd = config.pop('command') - - if (cmd=="consume"): - consume(config, topic, lambda k, v: handler(k, v)) - - - if (cmd=="produce"): - - #Example content - value = { - "type": "collection", - "content": "Update!", - "contentType": "application/json", - "method": "PUT", - "source": "unknown", - "operation": "ADD" - } - - key = "3ee5976e-789a-41d5-9cae-d51e7b92a247" - - data = {key: value} - - produce(config, topic, data) - """ - + kafka_consumer.consume(metadata_consumer, handler) diff --git a/scripts/sqs-to-registry/s3_notification_handler.py b/scripts/sqs-to-registry/s3_notification_handler.py index 2b26ab5..c2785a0 100644 --- a/scripts/sqs-to-registry/s3_notification_handler.py +++ b/scripts/sqs-to-registry/s3_notification_handler.py @@ -1,55 +1,87 @@ import os import yaml +import json + from onestop.util.SqsConsumer import SqsConsumer from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.WebPublisher import WebPublisher from onestop.util.SqsHandlers import create_delete_handler from onestop.util.SqsHandlers import create_upload_handler +from onestop.util.ClientLogger import ClientLogger -from datetime import date import argparse +config_dict = {} + +test_message = { + "Type": "Notification", + "MessageId": "e12f0129-0236-529c-aeed-5978d181e92a", + "TopicArn": "arn:aws:sns:" + config_dict['s3_region'] + ":798276211865:cloud-archive-client-sns", + "Subject": "Amazon S3 Notification", + "Message": '''{ + "Records": [{ + "eventVersion": "2.1", "eventSource": "aws:s3", "awsRegion": "''' + config_dict['s3_region'] + '''", + "eventTime": "2020-12-14T20:56:08.725Z", + "eventName": "ObjectRemoved:Delete", + "userIdentity": {"principalId": "AX8TWPQYA8JEM"}, + "requestParameters": {"sourceIPAddress": "65.113.158.185"}, + "responseElements": {"x-amz-request-id": "D8059E6A1D53597A", + "x-amz-id-2": "7DZF7MAaHztZqVMKlsK45Ogrto0945RzXSkMnmArxNCZ+4/jmXeUn9JM1NWOMeKK093vW8g5Cj5KMutID+4R3W1Rx3XDZOio"}, + "s3": { + "s3SchemaVersion": "1.0", "configurationId": "archive-testing-demo-event", + "bucket": {"name": "''' + config_dict['s3_bucket'] + '''", + "ownerIdentity": {"principalId": "AX8TWPQYA8JEM"}, + "arn": "arn:aws:s3:::''' + config_dict['s3_bucket'] + '''"}, + "object": {"key": "123", + "sequencer": "005FD7D1765F04D8BE", + "eTag": "44d2452e8bc2c8013e9c673086fbab7a", + "size": 1385, + "versionId": "q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf"} + } + }] + }''', + "Timestamp": "2020-12-14T20:56:23.786Z", + "SignatureVersion": "1", + "Signature": "MB5P0H5R5q3zOFoo05lpL4YuZ5TJy+f2c026wBWBsQ7mbNQiVxAy4VbbK0U1N3YQwOslq5ImVjMpf26t1+zY1hoHoALfvHY9wPtc8RNlYqmupCaZgtwEl3MYQz2pHIXbcma4rt2oh+vp/n+viARCToupyysEWTvw9a9k9AZRuHhTt8NKe4gpphG0s3/C1FdvrpQUvxoSGVizkaX93clU+hAFsB7V+yTlbKP+SNAqP/PaLtai6aPY9Lb8reO2ZjucOl7EgF5IhBVT43HhjBBj4JqYBNbMPcId5vMfBX8qI8ANIVlGGCIjGo1fpU0ROxSHsltuRjkmErpxUEe3YJJM3Q==", + "SigningCertURL": "https://sns.us-east-2.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem", + "UnsubscribeURL": "https://sns.us-east-2.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-2:798276211865:cloud-archive-client-sns:461222e7-0abf-40c6-acf7-4825cef65cce" +} + +def handler(recs, log_level): + logger = ClientLogger.get_logger('s3_notification_handler.handler', log_level, False) + logger.info('In Handler') -def handler(recs): - print("Handling message...") + if recs is None: + logger.info('No records retrieved, doing nothing.') + return - # Now get boto client for object-uuid retrieval - object_uuid = None + rec = recs[0] + logger.info('Record:%s'%rec) - if recs is None: - print("No records retrieved" + date.today()) + if 'ObjectRemoved' in rec['eventName']: + delete_handler(recs) else: - rec = recs[0] - print(rec) - if 'ObjectRemoved' in rec['eventName']: - print("SME - calling delete handler") - print(rec['eventName']) - delete_handler(recs) - else: - print("SME - calling upload handler") - upload_handler(recs) - #copy_handler(recs) - + upload_handler(recs) if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="Launch SQS to Registry consumer") - parser.add_argument('-conf', dest="conf", required=False, - help="Config filepath") - - parser.add_argument('-cred', dest="cred", required=False, + # Example command: python3 archive_client_integration.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + # python3 archive_client_integration.py -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + parser = argparse.ArgumentParser(description="Launches archive client integration") + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', + help="AWS config filepath") + parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") - args = vars(parser.parse_args()) - cred_loc = args.pop('cred') - #credentials from either file or env - registry_username = None - registry_password = None - access_key = None - access_secret = None + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') if cred_loc is not None: with open(cred_loc) as f: creds = yaml.load(f, Loader=yaml.FullLoader) @@ -64,60 +96,34 @@ def handler(recs): access_key = os.environ.get("ACCESS_KEY") access_secret = os.environ.get("SECRET_KEY") - # default config location mounted in pod - if args.pop('conf') is None: - conf_loc = "/etc/config/config.yml" - else: - conf_loc = args.pop('conf') - - conf = None - with open(conf_loc) as f: - conf = yaml.load(f, Loader=yaml.FullLoader) - - #TODO organize the config - #System - log_level = conf['log_level'] - sqs_max_polls = conf['sqs_max_polls'] - - #Destination - registry_base_url = conf['registry_base_url'] - onestop_base_url = conf['onestop_base_url'] - - #Source - access_bucket = conf['access_bucket'] - sqs_url = conf['sqs_url'] - s3_region = conf['s3_region'] - s3_bucket2 = conf['s3_bucket2'] - s3_region2 = conf['s3_region2'] - - - #Onestop related - prefix_map = conf['prefixMap'] - file_id_prefix = conf['file_identifier_prefix'] - file_format = conf['format'] - headers = conf['headers'] - type = conf['type'] + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + sqs_consumer = SqsConsumer(**config_dict) - sqs_consumer = SqsConsumer(access_key, access_secret, s3_region, sqs_url, log_level) + wp = WebPublisher(**config_dict) - wp = WebPublisher(registry_base_url=registry_base_url, username=registry_username, password=registry_password, - onestop_base_url=onestop_base_url, log_level=log_level) + s3_utils = S3Utils(**config_dict) - s3_utils = S3Utils(access_key, access_secret, log_level) - s3ma = S3MessageAdapter(access_bucket, prefix_map, format, headers, type, file_id_prefix, log_level) + s3ma = S3MessageAdapter(**config_dict) delete_handler = create_delete_handler(wp) upload_handler = create_upload_handler(wp, s3_utils, s3ma) - queue = sqs_consumer.connect() + s3_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region']) + queue = sqs_consumer.connect(s3_resource, config_dict['sqs_name']) - try: - debug = False - # # Pass in the handler method - #Hack to make this stay up forever - #TODO add feature to client library for polling indefinitely - while True: - sqs_consumer.receive_messages(queue, sqs_max_polls, handler) + # Send a test message +# sqs_client = s3_utils.connect('client', 'sqs' , config_dict['s3_region']) +# sqs_client.send_message( +# QueueUrl='https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs', +# MessageBody=json.dumps(test_message) +# ) - except Exception as e: - print("Message queue consumption failed: {}".format(e)) + #Hack to make this stay up forever + #TODO add feature to client library for polling indefinitely + while True: + sqs_consumer.receive_messages(queue, config_dict['sqs_max_polls'], handler) From 0fe8853797cdd06987e59c3d5b5b40928c730b1e Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 9 Jun 2021 10:04:23 -0600 Subject: [PATCH 090/129] 1507 - Changed warning about extra constructor arguments to debug statement since no harm in extra params. --- onestop-python-client/onestop/KafkaConsumer.py | 2 +- onestop-python-client/onestop/KafkaPublisher.py | 2 +- onestop-python-client/onestop/WebPublisher.py | 2 +- onestop-python-client/onestop/util/S3MessageAdapter.py | 2 +- onestop-python-client/onestop/util/S3Utils.py | 2 +- onestop-python-client/onestop/util/SqsConsumer.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 18a84cf..80cacb6 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -102,7 +102,7 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.warning("There were extra constructor arguments: " + str(wildargs)) + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) def register_client(self): """ diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index 0ca40d0..3144ff0 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -97,7 +97,7 @@ def __init__(self, metadata_type, brokers, schema_registry, security, collection self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.warning("There were extra constructor arguments: " + str(wildargs)) + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) def connect(self): """ diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index 7b1c6bd..47c3bd3 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -42,7 +42,7 @@ def __init__(self, registry_base_url, registry_username, registry_password, ones self.logger.info("Initializing " + self.__class__.__name__) if kwargs: - self.logger.warning("There were extra constructor arguments: " + str(kwargs)) + self.logger.debug("Superfluous parameters in constructor call: " + str(kwargs)) def publish_registry(self, metadata_type, uuid, payload, method): """ diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index 6bd832d..0f6d020 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -59,7 +59,7 @@ def __init__(self, access_bucket, type, file_id_prefix, collection_id, log_leve self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.warning("There were extra constructor arguments: " + str(wildargs)) + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) def transform(self, recs): """ diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index 24a81c3..e654df9 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -76,7 +76,7 @@ def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.warning("There were extra constructor arguments: " + str(wildargs)) + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) def connect(self, type, service_name, region): """ diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index d784734..4f2b6ac 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -30,7 +30,7 @@ def __init__(self, log_level = 'INFO', **wildargs): self.logger.info("Initializing " + self.__class__.__name__) if wildargs: - self.logger.warning("There were extra constructor arguments: " + str(wildargs)) + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) def connect(self, sqs_resource, sqs_queue_name): """ From bb03a987c9494621a6afd814d596341262e53ff2 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 10 Jun 2021 13:32:23 -0600 Subject: [PATCH 091/129] 1507-Change all init parameters from metadata_type to their classname_metadata_type, including changing type var used in S3MessageAdapter. --- helm/onestop-sqs-consumer/values.yaml | 6 +++-- helm/sme-chart/values.yaml | 7 +++-- kubernetes/pyconsumer-pod.yaml | 10 ++++--- .../csb-data-stream-config-template.yml | 6 ++++- .../onestop/KafkaConsumer.py | 6 ++--- .../onestop/KafkaPublisher.py | 6 ++--- .../onestop/util/S3MessageAdapter.py | 8 +++--- .../test/unit/test_KafkaConsumer.py | 18 ++++++------- .../test/unit/test_KafkaPublisher.py | 18 ++++++------- .../test/unit/test_SqsHandlers.py | 2 +- .../test/unit/util/test_S3MessageAdapter.py | 26 ++++++++++++------- .../test/unit/util/test_S3Utils.py | 2 +- .../test/unit/util/test_SqsConsumer.py | 2 +- scripts/config/csb-data-stream-config.yml | 6 ++++- scripts/sqs-to-registry/config/e2e.yml | 6 +++-- 15 files changed, 77 insertions(+), 52 deletions(-) diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index bc0f8fb..f5a24fb 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -70,14 +70,16 @@ config: |- #CSB stream config format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER - type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_id_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE - metadata_type: GRANULE + kafka_consumer_metadata_type: GRANULE + kafka_publisher_metadata_type: GRANULE + s3_message_adapter_metadata_type: COLLECTION + registry_base_url: http://os-registry:80 onestop_base_url: http://os-search:8080 diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index 3fc6922..6016adc 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -27,14 +27,17 @@ config: |- #CSB stream config format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER - type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_id_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE - metadata_type: GRANULE + kafka_consumer_metadata_type: GRANULE + kafka_producer_metadata_type: GRANULE + web_publisher_metadata_type: GRANULE + s3_message_adapter_metadata_type: COLLECTION + registry_base_url: http://os-registry:80 onestop_base_url: http://os-search:8080 diff --git a/kubernetes/pyconsumer-pod.yaml b/kubernetes/pyconsumer-pod.yaml index 6943403..e6ac5c5 100644 --- a/kubernetes/pyconsumer-pod.yaml +++ b/kubernetes/pyconsumer-pod.yaml @@ -70,7 +70,6 @@ data: csb: format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER - type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 registry_base_url: https://cedardevs.org/ access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com @@ -79,10 +78,13 @@ data: # Web Publisher web: - # COLLECTION or GRANULE - metadata_type: granule registry_base_url: https://cedardevs.org/onestop/registry-api onestop_base_url: https://cedardevs.org/onestop/search-api security: - enabled: True \ No newline at end of file + enabled: True + + # COLLECTION or GRANULE + kafka_consumer_metadata_type: GRANULE + kafka_publisher_metadata_type: GRANULE + s3_message_adapter_metadata_type: COLLECTION \ No newline at end of file diff --git a/onestop-python-client/config/csb-data-stream-config-template.yml b/onestop-python-client/config/csb-data-stream-config-template.yml index 8c2d4de..07ab823 100644 --- a/onestop-python-client/config/csb-data-stream-config-template.yml +++ b/onestop-python-client/config/csb-data-stream-config-template.yml @@ -1,6 +1,10 @@ +# COLLECTION or GRANULE +kafka_consumer_metadata_type: COLLECTION +kafka_publisher_metadata_type: COLLECTION +s3_message_adapter_metadata_type: COLLECTION + format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER -type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 #registry_base_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com registry_base_url: http://localhost/onestop/api/registry diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index 80cacb6..b15b529 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -51,11 +51,11 @@ class KafkaConsumer: asynchronously polls for messages in the connected topic, results vary depending on the handler function that is passed into it """ - def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_registry, security, collection_topic_consume, granule_topic_consume, log_level = 'INFO', **wildargs): + def __init__(self, kafka_consumer_metadata_type, brokers, group_id, auto_offset_reset, schema_registry, security, collection_topic_consume, granule_topic_consume, log_level = 'INFO', **wildargs): """ Attributes ---------- - metadata_type: str + kafka_consumer_metadata_type: str type of metadata (COLLECTION or GRANULE) brokers: str brokers (kubernetes service) @@ -79,7 +79,7 @@ def __init__(self, metadata_type, brokers, group_id, auto_offset_reset, schema_r What log level to use for this class """ - self.metadata_type = metadata_type.upper() + self.metadata_type = kafka_consumer_metadata_type.upper() self.brokers = brokers self.group_id = group_id self.auto_offset_reset = auto_offset_reset diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index 3144ff0..15a5d3b 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -52,11 +52,11 @@ class KafkaPublisher: Publish granule to granule topic """ - def __init__(self, metadata_type, brokers, schema_registry, security, collection_topic_publish, granule_topic_publish, log_level='INFO', **wildargs): + def __init__(self, kafka_publisher_metadata_type, brokers, schema_registry, security, collection_topic_publish, granule_topic_publish, log_level='INFO', **wildargs): """ Attributes ---------- - metadata_type: str + kafka_publisher_metadata_type: str type of metadata (COLLECTION or GRANULE) brokers: str brokers (kubernetes service) @@ -77,7 +77,7 @@ def __init__(self, metadata_type, brokers, schema_registry, security, collection granule_topic: str granule topic you want to produce to """ - self.metadata_type = metadata_type.upper() + self.metadata_type = kafka_publisher_metadata_type.upper() self.brokers = brokers self.schema_registry = schema_registry self.security_enabled = security['enabled'] diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index 0f6d020..cb8ffe0 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -35,13 +35,13 @@ class S3MessageAdapter: transform(recs) transforms sqs message triggered by s3 event to correct format for publishing to IM registry """ - def __init__(self, access_bucket, type, file_id_prefix, collection_id, log_level = 'INFO', **wildargs): + def __init__(self, access_bucket, s3_message_adapter_metadata_type, file_id_prefix, collection_id, log_level = 'INFO', **wildargs): """ Parameters ---------- access_bucket: str access bucket to put in the links field when transformed. - type: str + s3_message_adapter_metadata_type: str COLLECTION or GRANULE file_id_prefix: str File prefix returned as fileIdentifier @@ -52,7 +52,7 @@ def __init__(self, access_bucket, type, file_id_prefix, collection_id, log_leve """ self.access_bucket = access_bucket - self.type = type + self.metadata_type = s3_message_adapter_metadata_type.upper() self.file_id_prefix = file_id_prefix self.collection_id = collection_id self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) @@ -91,7 +91,7 @@ def transform(self, recs): fileInformation = FileInformation(name=file_name, size=file_size, checksums=[checkSum], optionalAttributes={}) # Relationship - relationshipType = RelationshipType(type=self.type) + relationshipType = RelationshipType(type=self.metadata_type) relationship = Relationship(id=self.collection_id, type=relationshipType) # File Location diff --git a/onestop-python-client/test/unit/test_KafkaConsumer.py b/onestop-python-client/test/unit/test_KafkaConsumer.py index 6106738..4a5345f 100644 --- a/onestop-python-client/test/unit/test_KafkaConsumer.py +++ b/onestop-python-client/test/unit/test_KafkaConsumer.py @@ -13,7 +13,7 @@ class test_KafkaConsumer(unittest.TestCase): def setUp(cls): print("Set it up!") cls.conf_w_security = { - "metadata_type" : "GRANULE", + "kafka_consumer_metadata_type" : "GRANULE", "brokers" : "onestop-dev-cp-kafka:9092", "group_id" : "sme-test", "auto_offset_reset" : "earliest", @@ -41,7 +41,7 @@ def tearDown(self): def test_init_happy_nonconditional_params(self): consumer = KafkaConsumer(**self.conf_w_security) - self.assertEqual(consumer.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(consumer.metadata_type, self.conf_w_security['kafka_consumer_metadata_type']) self.assertEqual(consumer.brokers, self.conf_w_security['brokers']) self.assertEqual(consumer.group_id, self.conf_w_security['group_id']) self.assertEqual(consumer.auto_offset_reset, self.conf_w_security['auto_offset_reset']) @@ -67,11 +67,11 @@ def test_init_security_disabled(self): def test_init_metadata_type_valid(self): consumer = KafkaConsumer(**self.conf_w_security) - self.assertEqual(consumer.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(consumer.metadata_type, self.conf_w_security['kafka_consumer_metadata_type']) def test_init_metadata_type_invalid(self): wrong_metadata_type_config = dict(self.conf_w_security) - wrong_metadata_type_config['metadata_type'] = "invalid_type" + wrong_metadata_type_config['kafka_consumer_metadata_type'] = "invalid_type" self.assertRaises(ValueError, KafkaConsumer, **wrong_metadata_type_config) @@ -118,7 +118,7 @@ def test_register_client_wo_security(self, mock_client): @patch('onestop.KafkaConsumer.DeserializingConsumer') def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consumer, mock_avro_deserializer): conf_w_security_collection = dict(self.conf_w_security) - conf_w_security_collection['metadata_type'] = "COLLECTION" + conf_w_security_collection['kafka_consumer_metadata_type'] = "COLLECTION" consumer = KafkaConsumer(**conf_w_security_collection) reg_client = consumer.register_client() @@ -135,7 +135,7 @@ def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consume def test_create_consumer_collection_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): conf_w_security_collection = dict(self.conf_w_security) topic = conf_w_security_collection['collection_topic_consume'] - conf_w_security_collection['metadata_type'] = 'COLLECTION' + conf_w_security_collection['kafka_consumer_metadata_type'] = 'COLLECTION' consumer = KafkaConsumer(**conf_w_security_collection) reg_client = MagicMock() @@ -166,7 +166,7 @@ def test_create_consumer_collection_w_security(self, mock_deserializing_consumer def test_create_consumer_collection_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): conf_wo_security_collection = dict(self.conf_wo_security) topic = conf_wo_security_collection['collection_topic_consume'] - conf_wo_security_collection['metadata_type'] = 'COLLECTION' + conf_wo_security_collection['kafka_consumer_metadata_type'] = 'COLLECTION' consumer = KafkaConsumer(**conf_wo_security_collection) reg_client = MagicMock() @@ -193,7 +193,7 @@ def test_create_consumer_collection_wo_security(self, mock_deserializing_consume def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): conf_w_security_granule = dict(self.conf_w_security) topic = conf_w_security_granule['granule_topic_consume'] - conf_w_security_granule['metadata_type'] = 'GRANULE' + conf_w_security_granule['kafka_consumer_metadata_type'] = 'GRANULE' consumer = KafkaConsumer(**conf_w_security_granule) reg_client = MagicMock() @@ -224,7 +224,7 @@ def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, m def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): conf_wo_security_granule = dict(self.conf_wo_security) exp_topic = conf_wo_security_granule['granule_topic_consume'] - conf_wo_security_granule['metadata_type'] = 'GRANULE' + conf_wo_security_granule['kafka_consumer_metadata_type'] = 'GRANULE' consumer = KafkaConsumer(**conf_wo_security_granule) reg_client = MagicMock() diff --git a/onestop-python-client/test/unit/test_KafkaPublisher.py b/onestop-python-client/test/unit/test_KafkaPublisher.py index f43d3f6..6357a3c 100644 --- a/onestop-python-client/test/unit/test_KafkaPublisher.py +++ b/onestop-python-client/test/unit/test_KafkaPublisher.py @@ -14,7 +14,7 @@ class test_KafkaPublisher(unittest.TestCase): def setUp(cls): print("Set it up!") cls.conf_w_security = { - "metadata_type" : "GRANULE", + "kafka_publisher_metadata_type" : "GRANULE", "brokers" : "onestop-dev-cp-kafka:9092", "schema_registry" : "http://onestop-dev-cp-schema-registry:8081", "security" : { @@ -40,7 +40,7 @@ def tearDown(self): def test_init_happy_nonconditional_params(self): publisher = KafkaPublisher(**self.conf_w_security) - self.assertEqual(publisher.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(publisher.metadata_type, self.conf_w_security['kafka_publisher_metadata_type']) self.assertEqual(publisher.brokers, self.conf_w_security['brokers']) self.assertEqual(publisher.schema_registry, self.conf_w_security['schema_registry']) self.assertEqual(publisher.security_enabled, self.conf_w_security['security']['enabled']) @@ -64,11 +64,11 @@ def test_init_security_disabled(self): def test_init_metadata_type_valid(self): publisher = KafkaPublisher(**self.conf_w_security) - self.assertEqual(publisher.metadata_type, self.conf_w_security['metadata_type']) + self.assertEqual(publisher.metadata_type, self.conf_w_security['kafka_publisher_metadata_type']) def test_init_metadata_type_invalid(self): wrong_metadata_type_config = dict(self.conf_w_security) - wrong_metadata_type_config['metadata_type'] = "invalid_type" + wrong_metadata_type_config['kafka_publisher_metadata_type'] = "invalid_type" self.assertRaises(ValueError, KafkaPublisher, **wrong_metadata_type_config) @@ -115,7 +115,7 @@ def test_register_client_wo_security(self, mock_client): @patch('onestop.KafkaPublisher.SerializingProducer') def test_create_producer_calls_AvroSerializer(self, mock_serializing_publisher, mock_avro_serializer): conf_w_security_collection = dict(self.conf_w_security) - conf_w_security_collection['metadata_type'] = "COLLECTION" + conf_w_security_collection['kafka_publisher_metadata_type'] = "COLLECTION" publisher = KafkaPublisher(**conf_w_security_collection) reg_client = publisher.register_client() @@ -130,7 +130,7 @@ def test_create_producer_calls_AvroSerializer(self, mock_serializing_publisher, def test_create_producer_collection_w_security(self, mock_serializing_producer, mock_avro_serializer): conf_w_security_collection = dict(self.conf_w_security) topic = conf_w_security_collection['collection_topic_publish'] - conf_w_security_collection['metadata_type'] = 'COLLECTION' + conf_w_security_collection['kafka_publisher_metadata_type'] = 'COLLECTION' publisher = KafkaPublisher(**conf_w_security_collection) reg_client = MagicMock() @@ -157,7 +157,7 @@ def test_create_producer_collection_w_security(self, mock_serializing_producer, def test_create_producer_collection_wo_security(self, mock_serializing_producer, mock_avro_serializer): conf_wo_security_collection = dict(self.conf_wo_security) topic = conf_wo_security_collection['collection_topic_publish'] - conf_wo_security_collection['metadata_type'] = 'COLLECTION' + conf_wo_security_collection['kafka_publisher_metadata_type'] = 'COLLECTION' publisher = KafkaPublisher(**conf_wo_security_collection) reg_client = MagicMock() @@ -180,7 +180,7 @@ def test_create_producer_collection_wo_security(self, mock_serializing_producer, def test_create_producer_granule_w_security(self, mock_serializing_producer, mock_avro_serializer): conf_w_security_granule = dict(self.conf_w_security) topic = conf_w_security_granule['granule_topic_publish'] - conf_w_security_granule['metadata_type'] = 'GRANULE' + conf_w_security_granule['kafka_publisher_metadata_type'] = 'GRANULE' publisher = KafkaPublisher(**conf_w_security_granule) reg_client = MagicMock() @@ -207,7 +207,7 @@ def test_create_producer_granule_w_security(self, mock_serializing_producer, moc def test_create_producer_granule_wo_security(self, mock_serializing_producer, mock_avro_serializer): conf_wo_security_granule = dict(self.conf_wo_security) exp_topic = conf_wo_security_granule['granule_topic_publish'] - conf_wo_security_granule['metadata_type'] = 'GRANULE' + conf_wo_security_granule['kafka_publisher_metadata_type'] = 'GRANULE' publisher = KafkaPublisher(**conf_wo_security_granule) reg_client = MagicMock() diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py index 5bba184..231e6cf 100644 --- a/onestop-python-client/test/unit/test_SqsHandlers.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -22,7 +22,7 @@ def setUp(self): 'access_key': 'test_access_key', 'secret_key': 'test_secret_key', 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', - 'type': 'COLLECTION', + 's3_message_adapter_metadata_type': 'COLLECTION', 'file_id_prefix': 'gov.noaa.ncei.csb:', 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', 'registry_base_url': 'http://localhost/onestop/api/registry', diff --git a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py index 925be2e..4eb277f 100644 --- a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py +++ b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py @@ -6,6 +6,7 @@ class S3MessageAdapterTest(unittest.TestCase): s3ma = None + config_dict = None recs1 = \ [{ @@ -52,24 +53,34 @@ class S3MessageAdapterTest(unittest.TestCase): def setUp(self): print("Set it up!") - config_dict = { + self.config_dict = { 'access_key': 'test_access_key', 'secret_key': 'test_secret_key', 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', - 'type': 'COLLECTION', + 's3_message_adapter_metadata_type': 'COLLECTION', 'file_id_prefix': 'gov.noaa.ncei.csb:', 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', 'log_level': 'DEBUG' } - self.s3_utils = S3Utils(**config_dict) - self.s3ma = S3MessageAdapter(**config_dict) + self.s3_utils = S3Utils(**self.config_dict) + self.s3ma = S3MessageAdapter(**self.config_dict) self.region = 'us-east-2' def tearDown(self): print("Tear it down!") + def test_metadata_type_lowercase(self): + metadata_type = 'collection' + uppercase_metadata_type = metadata_type.upper() + config = dict(self.config_dict) + config['s3_message_adapter_metadata_type'] = metadata_type + + s3MA = S3MessageAdapter(**config) + + self.assertEqual(uppercase_metadata_type, s3MA.metadata_type) + @mock_s3 def test_transform(self): s3 = self.s3_utils.connect('client', 's3', self.region) @@ -90,11 +101,8 @@ def test_transform(self): @mock_s3 def test_extra_parameters_constructor(self): - testParams = {"access_bucket": "blah1", - "type": "blah2", - "file_id_prefix": "blah3", - "collection_id": "blah4", - "extra": "extra value"} + testParams = dict(self.config_dict) + testParams['extra'] = 'extra value' self.assertRaises(Exception, S3MessageAdapter(**testParams)) if __name__ == '__main__': diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py index 6508837..830a1d8 100644 --- a/onestop-python-client/test/unit/util/test_S3Utils.py +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -21,7 +21,7 @@ def setUp(self): 'access_key': 'test_access_key', 'secret_key': 'test_secret_key', 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', - 'type': 'COLLECTION', + 'metadata_type': 'COLLECTION', 'file_id_prefix': 'gov.noaa.ncei.csb:', 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', 'log_level': 'DEBUG' diff --git a/onestop-python-client/test/unit/util/test_SqsConsumer.py b/onestop-python-client/test/unit/util/test_SqsConsumer.py index ef50b20..03ee897 100644 --- a/onestop-python-client/test/unit/util/test_SqsConsumer.py +++ b/onestop-python-client/test/unit/util/test_SqsConsumer.py @@ -13,7 +13,7 @@ class SqsConsumerTest(unittest.TestCase): 's3_region': 'us-east-2', 's3_bucket': 'archive-testing-demo', 'sqs_url': 'https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs', - 'type': 'COLLECTION', + 'metadata_type': 'COLLECTION', 'file_id_prefix': 'gov.noaa.ncei.csb:', 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', 'registry_base_url': 'http://localhost/onestop/api/registry', diff --git a/scripts/config/csb-data-stream-config.yml b/scripts/config/csb-data-stream-config.yml index 2d25328..f110852 100644 --- a/scripts/config/csb-data-stream-config.yml +++ b/scripts/config/csb-data-stream-config.yml @@ -1,6 +1,10 @@ +# COLLECTION or GRANULE +kafka_consumer_metadata_type: COLLECTION +kafka_publisher_metadata_type: COLLECTION +s3_message_adapter_metadata_type: COLLECTION + format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER -type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 registry_base_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com diff --git a/scripts/sqs-to-registry/config/e2e.yml b/scripts/sqs-to-registry/config/e2e.yml index 4c2c800..a2bdcfc 100644 --- a/scripts/sqs-to-registry/config/e2e.yml +++ b/scripts/sqs-to-registry/config/e2e.yml @@ -14,14 +14,16 @@ s3_bucket2: noaa-nccf-dev-archive #CSB stream config format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER -type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_identifier_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE -metadata_type: granule +kafka_consumer_metadata_type: GRANULE +kafka_publisher_metadata_type: GRANULE +s3_message_adapter_metadata_type: COLLECTION + registry_base_url: http://onestop-registry:80 onestop_base_url: http://onestop-search:8080 From c34b33891daae02fb81e0a9fca5a11d38e79050f Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 11 Jun 2021 13:24:55 -0600 Subject: [PATCH 092/129] 1507-Added metadata_type validation to S3MessageAdapter and tests. --- .../onestop/util/S3MessageAdapter.py | 3 +++ .../test/unit/util/test_S3MessageAdapter.py | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index cb8ffe0..9b74bb3 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -58,6 +58,9 @@ def __init__(self, access_bucket, s3_message_adapter_metadata_type, file_id_pre self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) + if self.metadata_type not in ['COLLECTION', 'GRANULE']: + raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type)) + if wildargs: self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) diff --git a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py index 4eb277f..93dfed2 100644 --- a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py +++ b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py @@ -71,6 +71,17 @@ def setUp(self): def tearDown(self): print("Tear it down!") + def test_init_metadata_type_valid(self): + publisher = S3MessageAdapter(**self.config_dict) + + self.assertEqual(publisher.metadata_type, self.config_dict['s3_message_adapter_metadata_type']) + + def test_init_metadata_type_invalid(self): + wrong_metadata_type_config = dict(self.config_dict) + wrong_metadata_type_config['s3_message_adapter_metadata_type'] = "invalid_type" + + self.assertRaises(ValueError, S3MessageAdapter, **wrong_metadata_type_config) + def test_metadata_type_lowercase(self): metadata_type = 'collection' uppercase_metadata_type = metadata_type.upper() From 1566e1c1e9fd80873b277ba5c43e7ed18f802382 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 11 Jun 2021 14:29:49 -0600 Subject: [PATCH 093/129] 1507-Adjusted log messages in KafkaConsumer and KafkaPublisher create methods. Mostly added, removed a redundant one.Adjusted one in deliver reports from error to info. --- onestop-python-client/onestop/KafkaConsumer.py | 3 +-- onestop-python-client/onestop/KafkaPublisher.py | 7 +++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index b15b529..c064dd9 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -170,9 +170,8 @@ def create_consumer(self, registry_client): conf['ssl.key.location'] = self.security_keyLoc conf['ssl.certificate.location'] = self.security_certLoc - self.logger.debug("conf: "+str(conf)) + self.logger.debug("Deserializing conf: "+str(conf)) metadata_consumer = DeserializingConsumer(conf) - self.logger.debug("topic: "+str(topic)) metadata_consumer.subscribe([topic]) return metadata_consumer diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index 15a5d3b..da0e16d 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -144,8 +144,11 @@ def create_producer(self, registry_client): if self.metadata_type == "GRANULE": topic = self.granule_topic + self.logger.debug("topic: "+str(topic)) metadata_schema = registry_client.get_latest_version(topic + '-value').schema.schema_str + self.logger.debug("metadata_schema: "+metadata_schema) + metadata_serializer = AvroSerializer(schema_str=metadata_schema, schema_registry_client=registry_client) conf = {'bootstrap.servers': self.brokers} @@ -156,7 +159,7 @@ def create_producer(self, registry_client): conf['ssl.certificate.location'] = self.security_certLoc conf['value.serializer'] = metadata_serializer - + self.logger.debug("Serializing conf: "+str(conf)) metadata_producer = SerializingProducer(conf) return metadata_producer @@ -172,7 +175,7 @@ def delivery_report(self, err, msg): if err is not None: self.logger.error('Message delivery failed: {}'.format(err)) else: - self.logger.error('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) + self.logger.info('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) @staticmethod def get_collection_key_from_uuid(collection_uuid): From 3946bf1bf36fd5957dbab2b8cc3cec273ab65721 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 11 Jun 2021 14:30:47 -0600 Subject: [PATCH 094/129] 1507-KafkaPublisher consolidated create method's config into one spot. --- onestop-python-client/onestop/KafkaPublisher.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index da0e16d..9206fe0 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -150,7 +150,9 @@ def create_producer(self, registry_client): self.logger.debug("metadata_schema: "+metadata_schema) metadata_serializer = AvroSerializer(schema_str=metadata_schema, schema_registry_client=registry_client) - conf = {'bootstrap.servers': self.brokers} + conf = { + 'bootstrap.servers': self.brokers, + 'value.serializer': metadata_serializer} if self.security_enabled: conf['security.protocol'] = 'SSL' @@ -158,7 +160,6 @@ def create_producer(self, registry_client): conf['ssl.key.location'] = self.security_keyLoc conf['ssl.certificate.location'] = self.security_certLoc - conf['value.serializer'] = metadata_serializer self.logger.debug("Serializing conf: "+str(conf)) metadata_producer = SerializingProducer(conf) return metadata_producer From 9ce570e366506bdd74035aabfbf85bc81eb57872 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 15 Jun 2021 09:23:15 -0600 Subject: [PATCH 095/129] 1507-To the scripts readme added more information about helm, build information, and tried to organize by the automated process vs manual. Second pass to come in next story. --- scripts/README.md | 156 ++++++++++++++++++++++++++++++---------------- 1 file changed, 104 insertions(+), 52 deletions(-) diff --git a/scripts/README.md b/scripts/README.md index e0276cb..4773928 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,60 +1,112 @@ -#Scripts +# Using onestop-python-client ## Table of Contents -* [Quickstart](#quickstart) -* [Kubectl Pod Verification](#kubectl-pod-verification) -* [Load Data](#load-data) -* [Updating Containers](#updating-containers) +* [Setup](#setup) + * [Helm](#helm) + * [Use Helm to Create a Script Container](#use-helm-to-create-a-script-container) + * [Using Helm Config File](#using-helm-config-file) + * [Helm Pulling of Image](#helm-pulling-of-image) + * [Startup Helm Script Container](#startup-helm-script-container) + * [Manually Setup Environment](#manually-setup-environment) +* [Building](#building) + * [Rebuilding Code or Scripts](#rebuilding-code-or-scripts) + * [Rebuilding Containers](#rebuilding-containers) +* [Load Data into OneStop](#load-data-into-onestop) + * [onestop-test-data repository](#onestop-test-data-repositoryhttpsgithubcomcedardevsonestop-test-data) + * [osim-deployment repository](#osim-deployment-repositoryhttpsgithubcomcedardevsosim-deployment) +* [OneStop Quickstart](https://cedardevs.github.io/onestop/developer/quickstart) -This directory contains scripts that use the onestop-python-library to send data to a OneStop. - -## Quickstart -- Install conda (miniconda works). -- Restart terminal or source files to recognize conda commands. -- Create a new conda environment and activate it - - `conda create -n onestop-clients python=3` - - `conda activate onestop-clients` - - `pip install setuptools` - -- Install any libraries needed by your sme script - - Ex: `pip install PyYaml` - -- Build the latest onestop-python-client - - `pip uninstall onestop-python-client-cedardevs` - - `pip install ./onestop-python-client` (run from root of this repository) - -- Input credentials for helm in the file `helm/onestop-sqs-consumer/values.yaml` - - Then: - - `helm uninstall sme` - - `helm install sme helm/onestop-sqs-consumer` - -## Kubectl Pod Verification -- Verify onestop-client pod is running, copy the pod name. - - `kubectl get pods` - -- Exec into it - - `kubectl exec -it -- sh` where the is listed in `kubectl get pods` - -- Check logs - - `kubectl logs ` - -## Load Data -There are several repositories to aid in loading data into a OneStop. Please read the appropriate repository's readme for accurate and up to date usage information. +## Setup +To use onestop-python-client there are two options: helm or manually. + +### Helm +#### Use Helm to Create a Script Container +We use helm to pull a OneStop-Clients image (specified in `helm//values.yml`) and deploy a kubernetes container that can communicate to the configured OneStop. It also copies over the onestop-python-client and scripts directories to the container. + +Those configuration values are in this repo under `helm//values.yml`. Our helm is configured to create a configuration file in the script container at `/etc/confif/confif.yml` from the appropriate values.yml. You can use this or create your own configuration file and put it in the script container. Our scripts are configured to use the command-line parameter `conf` or will look for the helm configuration file that isn't specified. + +#### Using Helm Config File +If you are going to use the helm generated configuration file then you should probably edit the conf section in the helm values.yaml file for the container you will have helm create (Ex. 1helm/onestop-sqs-consumer/values.yaml1). + * *_metadata_type - should be granule or collection, depending on what you are sending/receiving. + * schema_registry, registry_base_url, and onestop_base_url - set to what you are communicating with, especially if not on cedar-devs talking to its OneStop. + * AWS section - there's several config values for AWS you probably need to change, many are set to testing values. + * Kafka section - There is a whole Kafka section that if you are using kafka you might need to adjust this. This isn't perhaps the most preferred way to submit to OneStop. + * log_level - If you are troubleshooting or just want to see a more granular log level set this to DEBUG. + +#### Helm Pulling of Image +When you run the helm install command helm pulls the specified image from the repository that is indicated in the helm values yaml file. + +#### Startup Helm Script Container +The helm install command, done from the root of this repository, will use the charts in the helm directory to create a container called `sme` using the helm charts and configuration information in this repo fom `helm/onestop-sqs-consumer` + * cd to the root of this repository + * `helm uninstall sme` + * `helm install sme helm/onestop-sqs-consumer` + +To check on the container run this and look for the pod with the : + +`kubectl get pods` +``` +(base) ~/repo/onestop-clients 07:00 PM$ kubectl get pods +NAME READY STATUS RESTARTS AGE +sme-onestop-sqs-consumer-5c678675f7-q2s7h 0/1 Pending 0 26s +``` +If it isn't in a 'Running' state within 10 seconds then something is probably wrong. If it hasn't crashed yet, CrashBackLoop state, then it is probably a timeout problem trying to connect to a resource. -- To load data locally you will need a OneStop running locally. This is an example of how to do that, more info in the OneStop repository. - - `skaffold dev --status-check false` - -- To load test collections from onestop-test-data repository (read the README for more information) to your local OneStop: - - `./upload.sh demo http://localhost/onestop/api/registry` - -- From the osim-deployment repository there is a staging-scripts directory with scripts for loading some data: - - `./copyS3objects.sh -max_files=5 copy-config/archive-testing-demo-csb.sh` +Once the container is running, which should only be a matter of seconds, you can "ssh" into the container via this command. -## Updating Containers -- If the onestop-python-client code changes then run: - - `docker build . -t cedardevs/onestop-python-client:latest` +NOTE: you need to have the container name listed in the `kubectl get pods` command results for this command: -- If just the scripts change - - `docker build ./scripts/sqs-to-registry -t cedardevs/onestop-s3-handler` +`kubectl exec --stdin --tty sme-onestop-sqs-consumer-5c678675f7-kmpvn -- /bin/bash` + +### Manually Setup Environment +* Install conda (miniconda works). +* Restart terminal or source files to recognize conda commands. +* Create a new conda environment and activate it (not convinced you need this) + * `conda create -n onestop-clients python=3` + * `conda activate onestop-clients` + * `pip install setuptools` + +* Install any libraries needed by your script + * Ex: `pip install PyYaml` + + `pip install ./onestop-python-client` + + To test the import, try this and it shouldn't give an error: + + ``` + $ python3 + >>> import onestop_client + ``` + +## Building +Building locally is not necessary if you are using the images that we build automatically. Currently, we build an image via docker files with the tag 'latest' when *any* commits, even branches, are made to github and trigger CircleCI. +You might want to do this is to make code changes, build them, and then run your python script against that pip installed onestop-python-client locally. + +### Rebuilding Code or Scripts +* Install the latest onestop-python-client into directory + + `pip uninstall onestop-python-client-cedardevs` + + `pip install ./onestop-python-client` (run from root of this repository) + +### Rebuilding Containers +* If the onestop-python-client code changes then run: + + `docker build . -t cedardevs/onestop-python-client:latest` + +* If just the scripts change + + `docker build ./scripts/sqs-to-registry -t cedardevs/onestop-s3-handler` + + `docker build ./scripts/sme/ -t cedardevs/onestop-sme:latest` + +## Load Data into OneStop +There are several repositories to aid in loading data into a OneStop. Please read the appropriate repository's readme for accurate and up to date usage information. +### [onestop-test-data repository](https://github.com/cedardevs/onestop-test-data) + `./upload.sh demo http://localhost/onestop/api/registry` +### [osim-deployment repository](https://github.com/cedardevs/osim-deployment) + From the osim-deployment repository there is a staging-scripts directory with scripts for loading some data: + + `./copyS3objects.sh -max_files=5 copy-config/archive-testing-demo-csb.sh` From e91f052fc72e22d4b0764756290e67e13a6c4943 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 24 Jun 2021 18:31:54 -0600 Subject: [PATCH 096/129] 1508-Changed python package avro-python3 to avro since it sounds like that package was merged into the avro one and will be removed in the future. --- onestop-python-client/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 036e217..75e4f29 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -1,5 +1,5 @@ confluent-kafka -avro-python3 +avro fastavro smart-open PyYAML~=5.3.1 From 9732d7ebc3df118acb8ded261e055cc8bbbf4d5f Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 24 Jun 2021 18:34:29 -0600 Subject: [PATCH 097/129] 1508-Changed avro schema requirement of relationship to be a string. This is because it was too nested and to change to an enum didn't seem to work with Optional. --- .../cedar/schemas/avro/psi/relationship.py | 5 +--- .../schemas/avro/psi/relationship_type.py | 28 ------------------- .../schemas/avro/psi/test_ParsedRecord.py | 3 ++ 3 files changed, 4 insertions(+), 32 deletions(-) delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship.py index fa4d92e..045d994 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship.py @@ -3,16 +3,13 @@ from undictify import type_checked_constructor -from .relationship_type import RelationshipType - - @type_checked_constructor() @dataclass class Relationship: """ Record of a relationship to another object in inventory """ - type: Optional[RelationshipType] + type: Optional[str] #: The id of the related object id: str diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py deleted file mode 100644 index c227f89..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py +++ /dev/null @@ -1,28 +0,0 @@ -from dataclasses import asdict, dataclass -from typing import Dict - -from undictify import type_checked_constructor - - -@type_checked_constructor() -@dataclass -class RelationshipType: - type: str - - def to_dict(self) -> Dict: - """ - Returns a dictionary version of this instance. - """ - return asdict(self) - - @classmethod - def from_dict( - cls, - the_dict: Dict - ) -> 'RelationshipType': - """ - Returns an instance of this class from a dictionary. - - :param the_dict: The dictionary from which to create an instance of this class. - """ - return cls(**the_dict) diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py index 3f2d865..80aec1d 100644 --- a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py +++ b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py @@ -134,6 +134,9 @@ def test_relationships(self): { "id":"5b58de08-afef-49fb-99a1-9c5d5c003bde", "type":"COLLECTION" + }, + { + "id":"6668de08-afef-49fb-99a1-9c5d5c003bde" } ] } From de86ff2ecc4fb026edabed2e94ebcf41403ff960 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 25 Jun 2021 11:13:12 -0600 Subject: [PATCH 098/129] 1508-Didn't realize our avro RelationshipType was referenced elsewhere. Removed it since switched to simply a string. --- onestop-python-client/onestop/util/S3MessageAdapter.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index 6bd832d..08d0c1d 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -4,7 +4,7 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location import FileLocation,FileLocationType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_information import FileInformation from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum import Checksum, ChecksumAlgorithm -from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship import Relationship, RelationshipType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship import Relationship from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.discovery import Discovery, Link @@ -91,8 +91,7 @@ def transform(self, recs): fileInformation = FileInformation(name=file_name, size=file_size, checksums=[checkSum], optionalAttributes={}) # Relationship - relationshipType = RelationshipType(type=self.type) - relationship = Relationship(id=self.collection_id, type=relationshipType) + relationship = Relationship(id=self.collection_id, type=self.type) # File Location fileLocationType = FileLocationType(type='ARCHIVE') From e44e1354569fc6a862e00de8de8260acc50681a1 Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 25 Jun 2021 11:58:59 -0600 Subject: [PATCH 099/129] 1508-Removed unused avro tests since looks like an import is no longer available. Never worked for our code, would need work. --- .../geojsonSchemaClasses_test/__init__.py | 7 -- .../line_string_type_factory.py | 9 --- .../multi_line_string_type_factory.py | 10 --- .../multi_point_type_factory.py | 10 --- .../multi_polygon_type_factory.py | 10 --- .../geojsonSchemaClasses_test/org/__init__.py | 0 .../org/cedar/__init__.py | 0 .../org/cedar/schemas/__init__.py | 0 .../org/cedar/schemas/avro/__init__.py | 0 .../cedar/schemas/avro/geojson/__init__.py | 6 -- .../avro/geojson/line_string_factory.py | 14 ---- .../avro/geojson/multi_line_string_factory.py | 14 ---- .../avro/geojson/multi_point_factory.py | 14 ---- .../avro/geojson/multi_polygon_factory.py | 14 ---- .../schemas/avro/geojson/point_factory.py | 14 ---- .../schemas/avro/geojson/polygon_factory.py | 14 ---- .../point_type_factory.py | 10 --- .../polygon_type_factory.py | 10 --- .../testing_classes.py | 16 ----- .../schemas/psiSchemaClasses_test/__init__.py | 7 -- .../data_access_analysis_factory.py | 12 ---- .../data_format_factory.py | 13 ---- .../description_analysis_factory.py | 15 ----- .../identification_analysis_factory.py | 19 ------ .../instruments_factory.py | 14 ---- .../keywords_element_factory.py | 14 ---- .../operation_factory.py | 15 ----- .../psiSchemaClasses_test/org/__init__.py | 0 .../org/cedar/__init__.py | 0 .../org/cedar/schemas/__init__.py | 0 .../org/cedar/schemas/avro/__init__.py | 0 .../cedar/schemas/avro/geojson/__init__.py | 6 -- .../avro/geojson/line_string_factory.py | 10 --- .../avro/geojson/multi_line_string_factory.py | 10 --- .../avro/geojson/multi_point_factory.py | 10 --- .../avro/geojson/multi_polygon_factory.py | 10 --- .../schemas/avro/geojson/point_factory.py | 10 --- .../schemas/avro/geojson/polygon_factory.py | 10 --- .../org/cedar/schemas/avro/psi/__init__.py | 24 ------- .../avro/psi/aggregated_input_factory.py | 26 ------- .../schemas/avro/psi/analysis_factory.py | 24 ------- .../avro/psi/checksum_algorithm_factory.py | 10 --- .../schemas/avro/psi/checksum_factory.py | 13 ---- .../schemas/avro/psi/discovery_factory.py | 67 ------------------- .../schemas/avro/psi/error_event_factory.py | 16 ----- .../avro/psi/file_information_factory.py | 18 ----- .../schemas/avro/psi/file_location_factory.py | 22 ------ .../avro/psi/file_location_type_factory.py | 10 --- .../schemas/avro/psi/input_event_factory.py | 18 ----- .../cedar/schemas/avro/psi/input_factory.py | 19 ------ .../cedar/schemas/avro/psi/link_factory.py | 16 ----- .../cedar/schemas/avro/psi/method_factory.py | 10 --- .../avro/psi/operation_type_factory.py | 10 --- .../schemas/avro/psi/parsed_record_factory.py | 22 ------ .../avro/psi/parsed_record_with_id_factory.py | 13 ---- .../schemas/avro/psi/publishing_factory.py | 13 ---- .../schemas/avro/psi/record_type_factory.py | 10 --- .../schemas/avro/psi/reference_factory.py | 15 ----- .../schemas/avro/psi/relationship_factory.py | 15 ----- .../avro/psi/relationship_type_factory.py | 10 --- .../avro/psi/responsible_party_factory.py | 17 ----- .../avro/psi/temporal_bounding_factory.py | 18 ----- .../avro/psi/valid_descriptor_factory.py | 10 --- .../psiSchemaClasses_test/platform_factory.py | 14 ---- .../psiSchemaClasses_test/service_factory.py | 20 ------ .../spatial_bounding_analysis_factory.py | 14 ---- .../temporal_bounding_analysis_factory.py | 46 ------------- .../psiSchemaClasses_test/testing_classes.py | 48 ------------- .../thumbnail_analysis_factory.py | 12 ---- .../time_range_descriptor_factory.py | 10 --- .../title_analysis_factory.py | 19 ------ 71 files changed, 976 deletions(-) delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/__init__.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/line_string_type_factory.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_line_string_type_factory.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_point_type_factory.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_polygon_type_factory.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/__init__.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/__init__.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/__init__.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/__init__.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/point_type_factory.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/polygon_type_factory.py delete mode 100644 onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/testing_classes.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/__init__.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_access_analysis_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_format_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/description_analysis_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/identification_analysis_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/instruments_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/keywords_element_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/operation_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/__init__.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/__init__.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/__init__.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/__init__.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/__init__.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/aggregated_input_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/analysis_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_algorithm_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/discovery_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/error_event_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_information_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_type_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_event_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/link_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/method_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/operation_type_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_with_id_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/publishing_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/record_type_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/reference_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_type_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/responsible_party_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/temporal_bounding_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/valid_descriptor_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/platform_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/service_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/spatial_bounding_analysis_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/temporal_bounding_analysis_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/testing_classes.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/thumbnail_analysis_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/time_range_descriptor_factory.py delete mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses_test/title_analysis_factory.py diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/__init__.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/__init__.py deleted file mode 100644 index 3862fe7..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -import faker - -from pyavro_gen.enum_with_schema_provider import EnumWithSchemaProvider -from .testing_classes import test_classes - -fake = faker.Faker() -fake.add_provider(EnumWithSchemaProvider) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/line_string_type_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/line_string_type_factory.py deleted file mode 100644 index 2f69d1d..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/line_string_type_factory.py +++ /dev/null @@ -1,9 +0,0 @@ -from factory import Factory, lazy_attribute -from onestop.schemaTest2.line_string_type import LineStringType -from onestop.schemaTest2_test import fake - - -class LineStringTypeFactory(Factory): - class Meta: - model = LineStringType - value = lazy_attribute(lambda x: fake.enum_with_schema(LineStringType)) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_line_string_type_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_line_string_type_factory.py deleted file mode 100644 index 3c3ef42..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_line_string_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.multi_line_string_type import MultiLineStringType -from onestop.schemaTest2_test import fake - - -class MultiLineStringTypeFactory(Factory): - class Meta: - model = MultiLineStringType - value = lazy_attribute(lambda x: fake.enum_with_schema(MultiLineStringType)) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_point_type_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_point_type_factory.py deleted file mode 100644 index 90b4b6a..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_point_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.multi_point_type import MultiPointType -from onestop.schemaTest2_test import fake - - -class MultiPointTypeFactory(Factory): - class Meta: - model = MultiPointType - value = lazy_attribute(lambda x: fake.enum_with_schema(MultiPointType)) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_polygon_type_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_polygon_type_factory.py deleted file mode 100644 index a417b9f..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_polygon_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.multi_polygon_type import MultiPolygonType -from onestop.schemaTest2_test import fake - - -class MultiPolygonTypeFactory(Factory): - class Meta: - model = MultiPolygonType - value = lazy_attribute(lambda x: fake.enum_with_schema(MultiPolygonType)) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/__init__.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/__init__.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/__init__.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/__init__.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py deleted file mode 100644 index 832bb72..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from onestop.schemaTest2_test.org.cedar.schemas.avro.geojson.line_string_factory import LineStringFactory -from onestop.schemaTest2_test.org.cedar.schemas.avro.geojson.polygon_factory import PolygonFactory -from onestop.schemaTest2_test.org.cedar.schemas.avro.geojson.multi_line_string_factory import MultiLineStringFactory -from onestop.schemaTest2_test.org.cedar.schemas.avro.geojson.point_factory import PointFactory -from onestop.schemaTest2_test.org.cedar.schemas.avro.geojson.multi_point_factory import MultiPointFactory -from onestop.schemaTest2_test.org.cedar.schemas.avro.geojson.multi_polygon_factory import MultiPolygonFactory diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py deleted file mode 100644 index 85213c3..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.org.cedar.schemas.avro.geojson import LineString -from onestop.schemaTest2_test import fake -from onestop.schemaTest2_test.line_string_type_factory import LineStringTypeFactory - - -class LineStringFactory(Factory): - class Meta: - model = LineString - type = lazy_attribute(lambda x: LineStringTypeFactory()) - coordinates = lazy_attribute(lambda x: [[[fake.pyfloat() for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py deleted file mode 100644 index 227621f..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.org.cedar.schemas.avro.geojson import MultiLineString -from onestop.schemaTest2_test import fake -from onestop.schemaTest2_test.multi_line_string_type_factory import MultiLineStringTypeFactory - - -class MultiLineStringFactory(Factory): - class Meta: - model = MultiLineString - type = lazy_attribute(lambda x: MultiLineStringTypeFactory()) - coordinates = lazy_attribute(lambda x: [[[[[fake.pyfloat() for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py deleted file mode 100644 index 5d55bbd..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.org.cedar.schemas.avro.geojson import MultiPoint -from onestop.schemaTest2_test import fake -from onestop.schemaTest2_test.multi_point_type_factory import MultiPointTypeFactory - - -class MultiPointFactory(Factory): - class Meta: - model = MultiPoint - type = lazy_attribute(lambda x: MultiPointTypeFactory()) - coordinates = lazy_attribute(lambda x: [[[fake.pyfloat() for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py deleted file mode 100644 index 2d716ab..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.org.cedar.schemas.avro.geojson import MultiPolygon -from onestop.schemaTest2_test import fake -from onestop.schemaTest2_test.multi_polygon_type_factory import MultiPolygonTypeFactory - - -class MultiPolygonFactory(Factory): - class Meta: - model = MultiPolygon - type = lazy_attribute(lambda x: MultiPolygonTypeFactory()) - coordinates = lazy_attribute(lambda x: [[[[[[[fake.pyfloat() for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py deleted file mode 100644 index 05ad1e8..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.org.cedar.schemas.avro.geojson import Point -from onestop.schemaTest2_test import fake -from onestop.schemaTest2_test.point_type_factory import PointTypeFactory - - -class PointFactory(Factory): - class Meta: - model = Point - type = lazy_attribute(lambda x: PointTypeFactory()) - coordinates = lazy_attribute(lambda x: [fake.pyfloat() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py deleted file mode 100644 index 1274b26..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.org.cedar.schemas.avro.geojson import Polygon -from onestop.schemaTest2_test import fake -from onestop.schemaTest2_test.polygon_type_factory import PolygonTypeFactory - - -class PolygonFactory(Factory): - class Meta: - model = Polygon - type = lazy_attribute(lambda x: PolygonTypeFactory()) - coordinates = lazy_attribute(lambda x: [[[[[fake.pyfloat() for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/point_type_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/point_type_factory.py deleted file mode 100644 index 542a39d..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/point_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.point_type import PointType -from onestop.schemaTest2_test import fake - - -class PointTypeFactory(Factory): - class Meta: - model = PointType - value = lazy_attribute(lambda x: fake.enum_with_schema(PointType)) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/polygon_type_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/polygon_type_factory.py deleted file mode 100644 index 01ca0e3..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/polygon_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.polygon_type import PolygonType -from onestop.schemaTest2_test import fake - - -class PolygonTypeFactory(Factory): - class Meta: - model = PolygonType - value = lazy_attribute(lambda x: fake.enum_with_schema(PolygonType)) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/testing_classes.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/testing_classes.py deleted file mode 100644 index d2fa1a0..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/testing_classes.py +++ /dev/null @@ -1,16 +0,0 @@ -from pyavro_gen.codewriters.namespace import ClassItem - -test_classes = [ - ClassItem('schemaTest2.org.cedar.schemas.avro.geojson', 'LineString', 'schemaTest2_test.org.cedar.schemas.avro.geojson', 'LineStringFactory'), - ClassItem('schemaTest2.org.cedar.schemas.avro.geojson', 'Polygon', 'schemaTest2_test.org.cedar.schemas.avro.geojson', 'PolygonFactory'), - ClassItem('schemaTest2.org.cedar.schemas.avro.geojson', 'MultiLineString', 'schemaTest2_test.org.cedar.schemas.avro.geojson', 'MultiLineStringFactory'), - ClassItem('schemaTest2.org.cedar.schemas.avro.geojson', 'Point', 'schemaTest2_test.org.cedar.schemas.avro.geojson', 'PointFactory'), - ClassItem('schemaTest2.org.cedar.schemas.avro.geojson', 'MultiPoint', 'schemaTest2_test.org.cedar.schemas.avro.geojson', 'MultiPointFactory'), - ClassItem('schemaTest2.org.cedar.schemas.avro.geojson', 'MultiPolygon', 'schemaTest2_test.org.cedar.schemas.avro.geojson', 'MultiPolygonFactory'), - ClassItem('schemaTest2.', 'LineStringType', 'schemaTest2_test.', 'LineStringTypeFactory'), - ClassItem('schemaTest2.', 'PolygonType', 'schemaTest2_test.', 'PolygonTypeFactory'), - ClassItem('schemaTest2.', 'MultiLineStringType', 'schemaTest2_test.', 'MultiLineStringTypeFactory'), - ClassItem('schemaTest2.', 'PointType', 'schemaTest2_test.', 'PointTypeFactory'), - ClassItem('schemaTest2.', 'MultiPointType', 'schemaTest2_test.', 'MultiPointTypeFactory'), - ClassItem('schemaTest2.', 'MultiPolygonType', 'schemaTest2_test.', 'MultiPolygonTypeFactory'), -] diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/__init__.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/__init__.py deleted file mode 100644 index 3862fe7..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -import faker - -from pyavro_gen.enum_with_schema_provider import EnumWithSchemaProvider -from .testing_classes import test_classes - -fake = faker.Faker() -fake.add_provider(EnumWithSchemaProvider) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_access_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_access_analysis_factory.py deleted file mode 100644 index d666bfa..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_access_analysis_factory.py +++ /dev/null @@ -1,12 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import DataAccessAnalysis -from psiSchemaClasses_test import fake - - -class DataAccessAnalysisFactory(Factory): - class Meta: - model = DataAccessAnalysis - dataAccessExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_format_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_format_factory.py deleted file mode 100644 index 83f05c0..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_format_factory.py +++ /dev/null @@ -1,13 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import DataFormat -from psiSchemaClasses_test import fake - - -class DataFormatFactory(Factory): - class Meta: - model = DataFormat - name = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - version = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/description_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/description_analysis_factory.py deleted file mode 100644 index 8a75307..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/description_analysis_factory.py +++ /dev/null @@ -1,15 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import DescriptionAnalysis -from psiSchemaClasses_test import fake - - -class DescriptionAnalysisFactory(Factory): - class Meta: - model = DescriptionAnalysis - descriptionExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - descriptionCharacters = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - descriptionFleschReadingEaseScore = lazy_attribute(lambda x: [fake.pyfloat(), None][randint(0, 1)]) - descriptionFleschKincaidReadingGradeLevel = lazy_attribute(lambda x: [fake.pyfloat(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/identification_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/identification_analysis_factory.py deleted file mode 100644 index 07f7019..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/identification_analysis_factory.py +++ /dev/null @@ -1,19 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import IdentificationAnalysis -from psiSchemaClasses_test import fake - - -class IdentificationAnalysisFactory(Factory): - class Meta: - model = IdentificationAnalysis - fileIdentifierExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - fileIdentifierString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - doiExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - doiString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - parentIdentifierExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - parentIdentifierString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - hierarchyLevelNameExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - isGranule = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/instruments_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/instruments_factory.py deleted file mode 100644 index 10b8367..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/instruments_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import Instruments -from psiSchemaClasses_test import fake - - -class InstrumentsFactory(Factory): - class Meta: - model = Instruments - instrumentIdentifier = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instrumentType = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instrumentDescription = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/keywords_element_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/keywords_element_factory.py deleted file mode 100644 index 29a6565..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/keywords_element_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import KeywordsElement -from psiSchemaClasses_test import fake - - -class KeywordsElementFactory(Factory): - class Meta: - model = KeywordsElement - values = lazy_attribute(lambda x: [fake.pystr() for _ in range(randint(1, 5))]) - type = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - namespace = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/operation_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/operation_factory.py deleted file mode 100644 index b6ff961..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/operation_factory.py +++ /dev/null @@ -1,15 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import Operation -from psiSchemaClasses_test import fake - - -class OperationFactory(Factory): - class Meta: - model = Operation - operationDescription = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - operationIdentifier = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - operationStatus = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - operationType = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/__init__.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/__init__.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/__init__.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/__init__.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py deleted file mode 100644 index 5606a48..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson.point_factory import PointFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson.multi_point_factory import MultiPointFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson.line_string_factory import LineStringFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson.multi_line_string_factory import MultiLineStringFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson.polygon_factory import PolygonFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson.multi_polygon_factory import MultiPolygonFactory diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py deleted file mode 100644 index b7a55b0..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.geojson import LineString - - -class LineStringFactory(Factory): - class Meta: - model = LineString - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py deleted file mode 100644 index c4ea8c3..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.geojson import MultiLineString - - -class MultiLineStringFactory(Factory): - class Meta: - model = MultiLineString - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py deleted file mode 100644 index 89f6621..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.geojson import MultiPoint - - -class MultiPointFactory(Factory): - class Meta: - model = MultiPoint - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py deleted file mode 100644 index aa653d0..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.geojson import MultiPolygon - - -class MultiPolygonFactory(Factory): - class Meta: - model = MultiPolygon - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py deleted file mode 100644 index 3dde1b1..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.geojson import Point - - -class PointFactory(Factory): - class Meta: - model = Point - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py deleted file mode 100644 index e481e48..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.geojson import Polygon - - -class PolygonFactory(Factory): - class Meta: - model = Polygon - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/__init__.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/__init__.py deleted file mode 100644 index 2bc3169..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.temporal_bounding_factory import TemporalBoundingFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.link_factory import LinkFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.responsible_party_factory import ResponsiblePartyFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.reference_factory import ReferenceFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.record_type_factory import RecordTypeFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.file_information_factory import FileInformationFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.publishing_factory import PublishingFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.error_event_factory import ErrorEventFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.valid_descriptor_factory import ValidDescriptorFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.method_factory import MethodFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.operation_type_factory import OperationTypeFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.file_location_type_factory import FileLocationTypeFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.checksum_factory import ChecksumFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.checksum_algorithm_factory import ChecksumAlgorithmFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.relationship_type_factory import RelationshipTypeFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.parsed_record_with_id_factory import ParsedRecordWithIdFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.file_location_factory import FileLocationFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.relationship_factory import RelationshipFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.input_event_factory import InputEventFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.input_factory import InputFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.aggregated_input_factory import AggregatedInputFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.analysis_factory import AnalysisFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.discovery_factory import DiscoveryFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.parsed_record_factory import ParsedRecordFactory diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/aggregated_input_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/aggregated_input_factory.py deleted file mode 100644 index df261eb..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/aggregated_input_factory.py +++ /dev/null @@ -1,26 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import AggregatedInput -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ( - ErrorEventFactory, FileInformationFactory, FileLocationFactory, - InputEventFactory, PublishingFactory, RecordTypeFactory, - RelationshipFactory) - - -class AggregatedInputFactory(Factory): - class Meta: - model = AggregatedInput - rawJson = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - rawXml = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - initialSource = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - type = lazy_attribute(lambda x: [RecordTypeFactory(), None][randint(0, 1)]) - fileInformation = lazy_attribute(lambda x: [FileInformationFactory(), None][randint(0, 1)]) - fileLocations = lazy_attribute(lambda x: {fake.pystr(): FileLocationFactory() for _ in range(randint(3, 10))}) - publishing = lazy_attribute(lambda x: [PublishingFactory(), None][randint(0, 1)]) - relationships = lazy_attribute(lambda x: [RelationshipFactory() for _ in range(randint(1, 5))]) - deleted = lazy_attribute(lambda x: fake.pybool()) - events = lazy_attribute(lambda x: [InputEventFactory() for _ in range(randint(1, 5))]) - errors = lazy_attribute(lambda x: [ErrorEventFactory() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/analysis_factory.py deleted file mode 100644 index 00a3199..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/analysis_factory.py +++ /dev/null @@ -1,24 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Analysis -from psiSchemaClasses_test. import (DataAccessAnalysisFactory, - DescriptionAnalysisFactory, - IdentificationAnalysisFactory, - SpatialBoundingAnalysisFactory, - TemporalBoundingAnalysisFactory, - ThumbnailAnalysisFactory, - TitleAnalysisFactory) - - -class AnalysisFactory(Factory): - class Meta: - model = Analysis - identification = lazy_attribute(lambda x: [IdentificationAnalysisFactory(), None][randint(0, 1)]) - titles = lazy_attribute(lambda x: [TitleAnalysisFactory(), None][randint(0, 1)]) - description = lazy_attribute(lambda x: [DescriptionAnalysisFactory(), None][randint(0, 1)]) - dataAccess = lazy_attribute(lambda x: [DataAccessAnalysisFactory(), None][randint(0, 1)]) - thumbnail = lazy_attribute(lambda x: [ThumbnailAnalysisFactory(), None][randint(0, 1)]) - temporalBounding = lazy_attribute(lambda x: [TemporalBoundingAnalysisFactory(), None][randint(0, 1)]) - spatialBounding = lazy_attribute(lambda x: [SpatialBoundingAnalysisFactory(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_algorithm_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_algorithm_factory.py deleted file mode 100644 index 745054c..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_algorithm_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import ChecksumAlgorithm -from psiSchemaClasses_test import fake - - -class ChecksumAlgorithmFactory(Factory): - class Meta: - model = ChecksumAlgorithm - value = lazy_attribute(lambda x: fake.enum_with_schema(ChecksumAlgorithm)) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_factory.py deleted file mode 100644 index a5bba5a..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_factory.py +++ /dev/null @@ -1,13 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Checksum -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import \ - ChecksumAlgorithmFactory - - -class ChecksumFactory(Factory): - class Meta: - model = Checksum - algorithm = lazy_attribute(lambda x: ChecksumAlgorithmFactory()) - value = lazy_attribute(lambda x: fake.pystr()) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/discovery_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/discovery_factory.py deleted file mode 100644 index 6ac992b..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/discovery_factory.py +++ /dev/null @@ -1,67 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Discovery -from psiSchemaClasses_test import fake -from psiSchemaClasses_test. import (DataFormatFactory, InstrumentsFactory, - KeywordsElementFactory, OperationFactory, - PlatformFactory, ServiceFactory) -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson import ( - LineStringFactory, MultiLineStringFactory, MultiPointFactory, - MultiPolygonFactory, PointFactory, PolygonFactory) -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ( - LinkFactory, ReferenceFactory, ResponsiblePartyFactory, - TemporalBoundingFactory) - - -class DiscoveryFactory(Factory): - class Meta: - model = Discovery - fileIdentifier = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - parentIdentifier = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - hierarchyLevelName = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - doi = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - purpose = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - status = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - credit = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - title = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - alternateTitle = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - description = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - keywords = lazy_attribute(lambda x: [KeywordsElementFactory() for _ in range(randint(1, 5))]) - topicCategories = lazy_attribute(lambda x: [fake.pystr() for _ in range(randint(1, 5))]) - temporalBounding = lazy_attribute(lambda x: [TemporalBoundingFactory(), None][randint(0, 1)]) - spatialBounding = lazy_attribute(lambda x: [None, PointFactory(), MultiPointFactory(), LineStringFactory(), MultiLineStringFactory(), PolygonFactory(), MultiPolygonFactory()][randint(0, 6)]) - isGlobal = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - acquisitionInstruments = lazy_attribute(lambda x: [InstrumentsFactory() for _ in range(randint(1, 5))]) - acquisitionOperations = lazy_attribute(lambda x: [OperationFactory() for _ in range(randint(1, 5))]) - acquisitionPlatforms = lazy_attribute(lambda x: [PlatformFactory() for _ in range(randint(1, 5))]) - dataFormats = lazy_attribute(lambda x: [DataFormatFactory() for _ in range(randint(1, 5))]) - links = lazy_attribute(lambda x: [LinkFactory() for _ in range(randint(1, 5))]) - responsibleParties = lazy_attribute(lambda x: [ResponsiblePartyFactory() for _ in range(randint(1, 5))]) - thumbnail = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - thumbnailDescription = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - creationDate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - revisionDate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - publicationDate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - citeAsStatements = lazy_attribute(lambda x: [fake.pystr() for _ in range(randint(1, 5))]) - crossReferences = lazy_attribute(lambda x: [ReferenceFactory() for _ in range(randint(1, 5))]) - largerWorks = lazy_attribute(lambda x: [ReferenceFactory() for _ in range(randint(1, 5))]) - useLimitation = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - legalConstraints = lazy_attribute(lambda x: [fake.pystr() for _ in range(randint(1, 5))]) - accessFeeStatement = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - orderingInstructions = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - edition = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - dsmmAccessibility = lazy_attribute(lambda x: fake.pyint()) - dsmmDataIntegrity = lazy_attribute(lambda x: fake.pyint()) - dsmmDataQualityAssessment = lazy_attribute(lambda x: fake.pyint()) - dsmmDataQualityAssurance = lazy_attribute(lambda x: fake.pyint()) - dsmmDataQualityControlMonitoring = lazy_attribute(lambda x: fake.pyint()) - dsmmPreservability = lazy_attribute(lambda x: fake.pyint()) - dsmmProductionSustainability = lazy_attribute(lambda x: fake.pyint()) - dsmmTransparencyTraceability = lazy_attribute(lambda x: fake.pyint()) - dsmmUsability = lazy_attribute(lambda x: fake.pyint()) - dsmmAverage = lazy_attribute(lambda x: fake.pyfloat()) - updateFrequency = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - presentationForm = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - services = lazy_attribute(lambda x: [ServiceFactory() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/error_event_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/error_event_factory.py deleted file mode 100644 index dbf6280..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/error_event_factory.py +++ /dev/null @@ -1,16 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import ErrorEvent -from psiSchemaClasses_test import fake - - -class ErrorEventFactory(Factory): - class Meta: - model = ErrorEvent - title = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - detail = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - status = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - code = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - source = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_information_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_information_factory.py deleted file mode 100644 index 1da43dd..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_information_factory.py +++ /dev/null @@ -1,18 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import FileInformation -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ChecksumFactory - - -class FileInformationFactory(Factory): - class Meta: - model = FileInformation - name = lazy_attribute(lambda x: fake.pystr()) - size = lazy_attribute(lambda x: fake.pyint()) - checksums = lazy_attribute(lambda x: [ChecksumFactory() for _ in range(randint(1, 5))]) - format = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - headers = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - optionalAttributes = lazy_attribute(lambda x: {fake.pystr(): fake.pystr() for _ in range(randint(3, 10))}) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_factory.py deleted file mode 100644 index 9298690..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_factory.py +++ /dev/null @@ -1,22 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import FileLocation -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import \ - FileLocationTypeFactory - - -class FileLocationFactory(Factory): - class Meta: - model = FileLocation - uri = lazy_attribute(lambda x: fake.pystr()) - type = lazy_attribute(lambda x: [FileLocationTypeFactory(), None][randint(0, 1)]) - deleted = lazy_attribute(lambda x: fake.pybool()) - restricted = lazy_attribute(lambda x: fake.pybool()) - asynchronous = lazy_attribute(lambda x: fake.pybool()) - locality = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - lastModified = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - serviceType = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - optionalAttributes = lazy_attribute(lambda x: {fake.pystr(): fake.pystr() for _ in range(randint(3, 10))}) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_type_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_type_factory.py deleted file mode 100644 index fa9352f..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import FileLocationType - - -class FileLocationTypeFactory(Factory): - class Meta: - model = FileLocationType - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_event_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_event_factory.py deleted file mode 100644 index cc3c6a4..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_event_factory.py +++ /dev/null @@ -1,18 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import InputEvent -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ( - MethodFactory, OperationTypeFactory) - - -class InputEventFactory(Factory): - class Meta: - model = InputEvent - timestamp = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - method = lazy_attribute(lambda x: [MethodFactory(), None][randint(0, 1)]) - source = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - operation = lazy_attribute(lambda x: [OperationTypeFactory(), None][randint(0, 1)]) - failedState = lazy_attribute(lambda x: fake.pybool()) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_factory.py deleted file mode 100644 index d855377..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_factory.py +++ /dev/null @@ -1,19 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Input -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ( - MethodFactory, OperationTypeFactory, RecordTypeFactory) - - -class InputFactory(Factory): - class Meta: - model = Input - type = lazy_attribute(lambda x: [RecordTypeFactory(), None][randint(0, 1)]) - content = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - contentType = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - method = lazy_attribute(lambda x: [MethodFactory(), None][randint(0, 1)]) - source = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - operation = lazy_attribute(lambda x: [OperationTypeFactory(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/link_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/link_factory.py deleted file mode 100644 index fc9057e..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/link_factory.py +++ /dev/null @@ -1,16 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Link -from psiSchemaClasses_test import fake - - -class LinkFactory(Factory): - class Meta: - model = Link - linkName = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - linkProtocol = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - linkUrl = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - linkDescription = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - linkFunction = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/method_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/method_factory.py deleted file mode 100644 index 14e2eb1..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/method_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Method - - -class MethodFactory(Factory): - class Meta: - model = Method - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/operation_type_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/operation_type_factory.py deleted file mode 100644 index d5e5116..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/operation_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import OperationType - - -class OperationTypeFactory(Factory): - class Meta: - model = OperationType - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_factory.py deleted file mode 100644 index 47b0d9a..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_factory.py +++ /dev/null @@ -1,22 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import ParsedRecord -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ( - AnalysisFactory, DiscoveryFactory, ErrorEventFactory, - FileInformationFactory, FileLocationFactory, PublishingFactory, - RecordTypeFactory, RelationshipFactory) - - -class ParsedRecordFactory(Factory): - class Meta: - model = ParsedRecord - type = lazy_attribute(lambda x: [RecordTypeFactory(), None][randint(0, 1)]) - discovery = lazy_attribute(lambda x: [DiscoveryFactory(), None][randint(0, 1)]) - analysis = lazy_attribute(lambda x: [AnalysisFactory(), None][randint(0, 1)]) - fileInformation = lazy_attribute(lambda x: [FileInformationFactory(), None][randint(0, 1)]) - fileLocations = lazy_attribute(lambda x: {fake.pystr(): FileLocationFactory() for _ in range(randint(3, 10))}) - publishing = lazy_attribute(lambda x: PublishingFactory()) - relationships = lazy_attribute(lambda x: [RelationshipFactory() for _ in range(randint(1, 5))]) - errors = lazy_attribute(lambda x: [ErrorEventFactory() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_with_id_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_with_id_factory.py deleted file mode 100644 index 9d95eb9..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_with_id_factory.py +++ /dev/null @@ -1,13 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import ParsedRecordWithId -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import \ - ParsedRecordFactory - - -class ParsedRecordWithIdFactory(Factory): - class Meta: - model = ParsedRecordWithId - id = lazy_attribute(lambda x: fake.pystr()) - record = lazy_attribute(lambda x: ParsedRecordFactory()) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/publishing_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/publishing_factory.py deleted file mode 100644 index dfeb3bd..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/publishing_factory.py +++ /dev/null @@ -1,13 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Publishing -from psiSchemaClasses_test import fake - - -class PublishingFactory(Factory): - class Meta: - model = Publishing - isPrivate = lazy_attribute(lambda x: fake.pybool()) - until = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/record_type_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/record_type_factory.py deleted file mode 100644 index 012f335..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/record_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import RecordType -from psiSchemaClasses_test import fake - - -class RecordTypeFactory(Factory): - class Meta: - model = RecordType - value = lazy_attribute(lambda x: fake.enum_with_schema(RecordType)) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/reference_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/reference_factory.py deleted file mode 100644 index ce55525..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/reference_factory.py +++ /dev/null @@ -1,15 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Reference -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import LinkFactory - - -class ReferenceFactory(Factory): - class Meta: - model = Reference - title = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - date = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - links = lazy_attribute(lambda x: [LinkFactory() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_factory.py deleted file mode 100644 index 4a910f0..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_factory.py +++ /dev/null @@ -1,15 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Relationship -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import \ - RelationshipTypeFactory - - -class RelationshipFactory(Factory): - class Meta: - model = Relationship - type = lazy_attribute(lambda x: [RelationshipTypeFactory(), None][randint(0, 1)]) - id = lazy_attribute(lambda x: fake.pystr()) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_type_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_type_factory.py deleted file mode 100644 index 8f10e24..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import RelationshipType - - -class RelationshipTypeFactory(Factory): - class Meta: - model = RelationshipType - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/responsible_party_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/responsible_party_factory.py deleted file mode 100644 index 12bc55a..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/responsible_party_factory.py +++ /dev/null @@ -1,17 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import ResponsibleParty -from psiSchemaClasses_test import fake - - -class ResponsiblePartyFactory(Factory): - class Meta: - model = ResponsibleParty - individualName = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - organizationName = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - positionName = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - role = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - email = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - phone = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/temporal_bounding_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/temporal_bounding_factory.py deleted file mode 100644 index 1bc7b7b..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/temporal_bounding_factory.py +++ /dev/null @@ -1,18 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import TemporalBounding -from psiSchemaClasses_test import fake - - -class TemporalBoundingFactory(Factory): - class Meta: - model = TemporalBounding - beginDate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - beginIndeterminate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - endDate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - endIndeterminate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instant = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instantIndeterminate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - description = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/valid_descriptor_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/valid_descriptor_factory.py deleted file mode 100644 index 0152b7d..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/valid_descriptor_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import ValidDescriptor - - -class ValidDescriptorFactory(Factory): - class Meta: - model = ValidDescriptor - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/platform_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/platform_factory.py deleted file mode 100644 index ada2e39..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/platform_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import Platform -from psiSchemaClasses_test import fake - - -class PlatformFactory(Factory): - class Meta: - model = Platform - platformIdentifier = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - platformDescription = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - platformSponsor = lazy_attribute(lambda x: [fake.pystr() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/service_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/service_factory.py deleted file mode 100644 index bb81b12..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/service_factory.py +++ /dev/null @@ -1,20 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import Service -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ( - LinkFactory, ResponsiblePartyFactory) - - -class ServiceFactory(Factory): - class Meta: - model = Service - title = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - alternateTitle = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - description = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - date = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - dateType = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - pointOfContact = lazy_attribute(lambda x: [ResponsiblePartyFactory(), None][randint(0, 1)]) - operations = lazy_attribute(lambda x: [LinkFactory() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/spatial_bounding_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/spatial_bounding_analysis_factory.py deleted file mode 100644 index 45fe832..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/spatial_bounding_analysis_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import SpatialBoundingAnalysis -from psiSchemaClasses_test import fake - - -class SpatialBoundingAnalysisFactory(Factory): - class Meta: - model = SpatialBoundingAnalysis - spatialBoundingExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - isValid = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - validationError = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/temporal_bounding_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/temporal_bounding_analysis_factory.py deleted file mode 100644 index bdb6c93..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/temporal_bounding_analysis_factory.py +++ /dev/null @@ -1,46 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import TemporalBoundingAnalysis -from psiSchemaClasses_test import fake -from psiSchemaClasses_test. import TimeRangeDescriptorFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import \ - ValidDescriptorFactory - - -class TemporalBoundingAnalysisFactory(Factory): - class Meta: - model = TemporalBoundingAnalysis - beginDescriptor = lazy_attribute(lambda x: [ValidDescriptorFactory(), None][randint(0, 1)]) - beginPrecision = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - beginIndexable = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - beginZoneSpecified = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - beginUtcDateTimeString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - beginYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - beginDayOfYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - beginDayOfMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - beginMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - endDescriptor = lazy_attribute(lambda x: [ValidDescriptorFactory(), None][randint(0, 1)]) - endPrecision = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - endIndexable = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - endZoneSpecified = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - endUtcDateTimeString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - endYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - endDayOfYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - endDayOfMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - endMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantDescriptor = lazy_attribute(lambda x: [ValidDescriptorFactory(), None][randint(0, 1)]) - instantPrecision = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instantIndexable = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - instantZoneSpecified = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instantUtcDateTimeString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instantEndUtcDateTimeString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instantYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantDayOfYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantEndDayOfYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantDayOfMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantEndDayOfMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantEndMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - rangeDescriptor = lazy_attribute(lambda x: [TimeRangeDescriptorFactory(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/testing_classes.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/testing_classes.py deleted file mode 100644 index 1532f68..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/testing_classes.py +++ /dev/null @@ -1,48 +0,0 @@ -from pyavro_gen.codewriters.namespace import ClassItem - -test_classes = [ - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'TemporalBounding', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'TemporalBoundingFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Link', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'LinkFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'ResponsibleParty', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ResponsiblePartyFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Reference', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ReferenceFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'RecordType', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'RecordTypeFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'FileInformation', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'FileInformationFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Publishing', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'PublishingFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'ErrorEvent', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ErrorEventFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'ValidDescriptor', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ValidDescriptorFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Method', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'MethodFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'OperationType', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'OperationTypeFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'FileLocationType', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'FileLocationTypeFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Checksum', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ChecksumFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'ChecksumAlgorithm', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ChecksumAlgorithmFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'RelationshipType', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'RelationshipTypeFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'ParsedRecordWithId', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ParsedRecordWithIdFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'FileLocation', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'FileLocationFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Relationship', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'RelationshipFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'InputEvent', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'InputEventFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Input', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'InputFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'AggregatedInput', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'AggregatedInputFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Analysis', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'AnalysisFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Discovery', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'DiscoveryFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'ParsedRecord', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ParsedRecordFactory'), - ClassItem('psiSchemaClasses.', 'KeywordsElement', 'psiSchemaClasses_test.', 'KeywordsElementFactory'), - ClassItem('psiSchemaClasses.', 'Instruments', 'psiSchemaClasses_test.', 'InstrumentsFactory'), - ClassItem('psiSchemaClasses.', 'Operation', 'psiSchemaClasses_test.', 'OperationFactory'), - ClassItem('psiSchemaClasses.', 'Platform', 'psiSchemaClasses_test.', 'PlatformFactory'), - ClassItem('psiSchemaClasses.', 'DataFormat', 'psiSchemaClasses_test.', 'DataFormatFactory'), - ClassItem('psiSchemaClasses.', 'IdentificationAnalysis', 'psiSchemaClasses_test.', 'IdentificationAnalysisFactory'), - ClassItem('psiSchemaClasses.', 'TitleAnalysis', 'psiSchemaClasses_test.', 'TitleAnalysisFactory'), - ClassItem('psiSchemaClasses.', 'DescriptionAnalysis', 'psiSchemaClasses_test.', 'DescriptionAnalysisFactory'), - ClassItem('psiSchemaClasses.', 'DataAccessAnalysis', 'psiSchemaClasses_test.', 'DataAccessAnalysisFactory'), - ClassItem('psiSchemaClasses.', 'ThumbnailAnalysis', 'psiSchemaClasses_test.', 'ThumbnailAnalysisFactory'), - ClassItem('psiSchemaClasses.', 'SpatialBoundingAnalysis', 'psiSchemaClasses_test.', 'SpatialBoundingAnalysisFactory'), - ClassItem('psiSchemaClasses.', 'TimeRangeDescriptor', 'psiSchemaClasses_test.', 'TimeRangeDescriptorFactory'), - ClassItem('psiSchemaClasses.', 'Service', 'psiSchemaClasses_test.', 'ServiceFactory'), - ClassItem('psiSchemaClasses.', 'TemporalBoundingAnalysis', 'psiSchemaClasses_test.', 'TemporalBoundingAnalysisFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.geojson', 'Point', 'psiSchemaClasses_test.org.cedar.schemas.avro.geojson', 'PointFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.geojson', 'MultiPoint', 'psiSchemaClasses_test.org.cedar.schemas.avro.geojson', 'MultiPointFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.geojson', 'LineString', 'psiSchemaClasses_test.org.cedar.schemas.avro.geojson', 'LineStringFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.geojson', 'MultiLineString', 'psiSchemaClasses_test.org.cedar.schemas.avro.geojson', 'MultiLineStringFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.geojson', 'Polygon', 'psiSchemaClasses_test.org.cedar.schemas.avro.geojson', 'PolygonFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.geojson', 'MultiPolygon', 'psiSchemaClasses_test.org.cedar.schemas.avro.geojson', 'MultiPolygonFactory'), -] diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/thumbnail_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/thumbnail_analysis_factory.py deleted file mode 100644 index 7d94baf..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/thumbnail_analysis_factory.py +++ /dev/null @@ -1,12 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import ThumbnailAnalysis -from psiSchemaClasses_test import fake - - -class ThumbnailAnalysisFactory(Factory): - class Meta: - model = ThumbnailAnalysis - thumbnailExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/time_range_descriptor_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/time_range_descriptor_factory.py deleted file mode 100644 index e1c1574..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/time_range_descriptor_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import TimeRangeDescriptor -from psiSchemaClasses_test import fake - - -class TimeRangeDescriptorFactory(Factory): - class Meta: - model = TimeRangeDescriptor - value = lazy_attribute(lambda x: fake.enum_with_schema(TimeRangeDescriptor)) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/title_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/title_analysis_factory.py deleted file mode 100644 index e6f5a1f..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/title_analysis_factory.py +++ /dev/null @@ -1,19 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import TitleAnalysis -from psiSchemaClasses_test import fake - - -class TitleAnalysisFactory(Factory): - class Meta: - model = TitleAnalysis - titleExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - titleCharacters = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - alternateTitleExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - alternateTitleCharacters = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - titleFleschReadingEaseScore = lazy_attribute(lambda x: [fake.pyfloat(), None][randint(0, 1)]) - alternateTitleFleschReadingEaseScore = lazy_attribute(lambda x: [fake.pyfloat(), None][randint(0, 1)]) - titleFleschKincaidReadingGradeLevel = lazy_attribute(lambda x: [fake.pyfloat(), None][randint(0, 1)]) - alternateTitleFleschKincaidReadingGradeLevel = lazy_attribute(lambda x: [fake.pyfloat(), None][randint(0, 1)]) From 6c73e71fdd962fc8a80a7d8d789f548bd3958bca Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 28 Jun 2021 11:23:21 -0600 Subject: [PATCH 100/129] 1508-Fixed merge change of variable from prior commit, didn't notice. --- onestop-python-client/onestop/util/S3MessageAdapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index aff0403..80d5a6e 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -94,7 +94,7 @@ def transform(self, recs): fileInformation = FileInformation(name=file_name, size=file_size, checksums=[checkSum], optionalAttributes={}) # Relationship - relationship = Relationship(id=self.collection_id, type=self.type) + relationship = Relationship(id=self.collection_id, type=self.metadata_type) # File Location fileLocationType = FileLocationType(type='ARCHIVE') From cd231b3795c3a5aa59f050186a44231e6fb44a00 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 1 Jul 2021 21:05:46 -0600 Subject: [PATCH 101/129] 1508-Removed mock from python-client test_S3MessageAdapter, simple util class methods. --- .../test/unit/util/test_S3MessageAdapter.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py index 93dfed2..9b7c535 100644 --- a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py +++ b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py @@ -1,6 +1,5 @@ import unittest -from moto import mock_s3 from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter @@ -92,17 +91,7 @@ def test_metadata_type_lowercase(self): self.assertEqual(uppercase_metadata_type, s3MA.metadata_type) - @mock_s3 def test_transform(self): - s3 = self.s3_utils.connect('client', 's3', self.region) - location = {'LocationConstraint': self.region} - bucket = 'nesdis-ncei-csb-dev' - key = 'csv/file1.csv' - key2 = 'csv/file2.csv' - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.put_object(Bucket=bucket, Key=key, Body="body") - s3.put_object(Bucket=bucket, Key=key2, Body="body") - payload = self.s3ma.transform(self.recs1) print(payload) @@ -110,7 +99,6 @@ def test_transform(self): print(payload) self.assertTrue(payload!=None) - @mock_s3 def test_extra_parameters_constructor(self): testParams = dict(self.config_dict) testParams['extra'] = 'extra value' From 11f4283f094b15e2b5066590464bd55f4e24ac41 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 1 Jul 2021 21:21:55 -0600 Subject: [PATCH 102/129] 1508-Cleaned up test_S3MessageAdapter tests since more of a static util class, didn't need so much setup. Moved constructor test up with other constructor test. --- .../test/unit/util/test_S3MessageAdapter.py | 75 +++++-------------- 1 file changed, 19 insertions(+), 56 deletions(-) diff --git a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py index 9b7c535..83a764f 100644 --- a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py +++ b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py @@ -1,13 +1,19 @@ import unittest -from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter class S3MessageAdapterTest(unittest.TestCase): - s3ma = None - config_dict = None - - recs1 = \ + config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', + 's3_message_adapter_metadata_type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'log_level': 'DEBUG' + } + + recs_minimum_fields = \ [{ 'eventVersion': '2.1', 'eventSource': 'aws:s3', @@ -28,48 +34,6 @@ class S3MessageAdapterTest(unittest.TestCase): } }] - recs2 = \ - [{ - 'eventVersion': '2.1', - 'eventSource': 'aws:s3', - 'awsRegion': 'us-east-1', - 'eventTime': '2020-11-10T00:44:20.642Z', - 'eventName': 'ObjectCreated:Put', - 'userIdentity': {'principalId': 'AWS:AIDAUDW4MV7I5RW5LQJIO'}, - 'requestParameters': {'sourceIPAddress': '65.113.158.185'}, - 'responseElements': {'x-amz-request-id': '7D394F43C682BB87', 'x-amz-id-2': 'k2Yn5BGg7DM5fIEAnwv5RloBFLYERjGRG3mT+JsPbdX033USr0eNObqkHiw3m3x+BQ17DD4C0ErB/VdhYt2Az01LJ4mQ/aqS'}, - 's3': {'s3SchemaVersion': '1.0', 'configurationId': 'csbS3notification', - 'bucket': {'name': 'nesdis-ncei-csb-dev', - 'ownerIdentity': {'principalId': 'A3PGJENIF5D10L'}, - 'arn': 'arn:aws:s3:::nesdis-ncei-csb-dev'}, - 'object': {'key': 'csv/file2.csv', 'size': 1386, - 'eTag': '44d2452e8bc2c8013e9c673086fbab7a', - 'versionId': 'q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf', - 'sequencer': '005FA9E26498815778'} - } - }] - - def setUp(self): - print("Set it up!") - - self.config_dict = { - 'access_key': 'test_access_key', - 'secret_key': 'test_secret_key', - 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', - 's3_message_adapter_metadata_type': 'COLLECTION', - 'file_id_prefix': 'gov.noaa.ncei.csb:', - 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', - 'log_level': 'DEBUG' - } - - self.s3_utils = S3Utils(**self.config_dict) - self.s3ma = S3MessageAdapter(**self.config_dict) - - self.region = 'us-east-2' - - def tearDown(self): - print("Tear it down!") - def test_init_metadata_type_valid(self): publisher = S3MessageAdapter(**self.config_dict) @@ -81,6 +45,11 @@ def test_init_metadata_type_invalid(self): self.assertRaises(ValueError, S3MessageAdapter, **wrong_metadata_type_config) + def test_init_extra_parameters_constructor(self): + test_params = dict(self.config_dict) + test_params['extra'] = 'extra value' + self.assertRaises(Exception, S3MessageAdapter(**test_params)) + def test_metadata_type_lowercase(self): metadata_type = 'collection' uppercase_metadata_type = metadata_type.upper() @@ -92,17 +61,11 @@ def test_metadata_type_lowercase(self): self.assertEqual(uppercase_metadata_type, s3MA.metadata_type) def test_transform(self): - payload = self.s3ma.transform(self.recs1) - print(payload) + s3MA = S3MessageAdapter(**self.config_dict) + payload = s3MA.transform(self.recs_minimum_fields) - payload = self.s3ma.transform(self.recs2) - print(payload) - self.assertTrue(payload!=None) + self.assertIsNotNone(payload) - def test_extra_parameters_constructor(self): - testParams = dict(self.config_dict) - testParams['extra'] = 'extra value' - self.assertRaises(Exception, S3MessageAdapter(**testParams)) if __name__ == '__main__': unittest.main() \ No newline at end of file From b153950c5259d6700f6141879f93660b7635c31d Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 1 Jul 2021 21:23:35 -0600 Subject: [PATCH 103/129] 1508-In test_S3MessageAdapter moved init test up with other init tests... Had missed. --- .../test/unit/util/test_S3MessageAdapter.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py index 83a764f..7780de5 100644 --- a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py +++ b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py @@ -45,12 +45,7 @@ def test_init_metadata_type_invalid(self): self.assertRaises(ValueError, S3MessageAdapter, **wrong_metadata_type_config) - def test_init_extra_parameters_constructor(self): - test_params = dict(self.config_dict) - test_params['extra'] = 'extra value' - self.assertRaises(Exception, S3MessageAdapter(**test_params)) - - def test_metadata_type_lowercase(self): + def test_init_metadata_type_lowercase(self): metadata_type = 'collection' uppercase_metadata_type = metadata_type.upper() config = dict(self.config_dict) @@ -60,6 +55,11 @@ def test_metadata_type_lowercase(self): self.assertEqual(uppercase_metadata_type, s3MA.metadata_type) + def test_init_extra_parameters_constructor(self): + test_params = dict(self.config_dict) + test_params['extra'] = 'extra value' + self.assertRaises(Exception, S3MessageAdapter(**test_params)) + def test_transform(self): s3MA = S3MessageAdapter(**self.config_dict) payload = s3MA.transform(self.recs_minimum_fields) From b0594db45155c6d7f68cef42a222ed00b1dc5268 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 1 Jul 2021 21:38:08 -0600 Subject: [PATCH 104/129] 1508-Undid changing relationship to use RecordType, noticed it is lowercase.Changed RelationshipType to be Enum class like RecordType, to reflect schemas repo. --- .../cedar/schemas/avro/psi/relationship.py | 3 ++- .../schemas/avro/psi/relationship_type.py | 20 +++++++++++++++++++ .../onestop/util/S3MessageAdapter.py | 3 ++- .../test/unit/util/test_S3MessageAdapter.py | 2 +- 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship.py index 045d994..498f093 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship.py @@ -2,6 +2,7 @@ from typing import ClassVar, Dict, Optional from undictify import type_checked_constructor +from .relationship_type import RelationshipType @type_checked_constructor() @dataclass @@ -9,7 +10,7 @@ class Relationship: """ Record of a relationship to another object in inventory """ - type: Optional[str] + type: Optional[RelationshipType] #: The id of the related object id: str diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py new file mode 100644 index 0000000..ba96dee --- /dev/null +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py @@ -0,0 +1,20 @@ +from enum import Enum +from typing import ClassVar + + +class RelationshipType(Enum): + """ + The types of metadata relationships which can be represented in the PSI system + """ + collection = 'COLLECTION' + + #: The Avro Schema associated to this class + _schema: ClassVar[str] = """{ + "name": "RelationshipType", + "namespace": "org.cedar.schemas.avro.psi", + "type": "enum", + "doc": " + "symbols": [ + "COLLECTION" + ] + }""" diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index 80d5a6e..dccf8dc 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -5,6 +5,7 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_information import FileInformation from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum import Checksum, ChecksumAlgorithm from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship import Relationship +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.discovery import Discovery, Link @@ -94,7 +95,7 @@ def transform(self, recs): fileInformation = FileInformation(name=file_name, size=file_size, checksums=[checkSum], optionalAttributes={}) # Relationship - relationship = Relationship(id=self.collection_id, type=self.metadata_type) + relationship = Relationship(id=self.collection_id, type=RelationshipType(self.metadata_type)) # File Location fileLocationType = FileLocationType(type='ARCHIVE') diff --git a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py index 7780de5..66031fe 100644 --- a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py +++ b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py @@ -60,7 +60,7 @@ def test_init_extra_parameters_constructor(self): test_params['extra'] = 'extra value' self.assertRaises(Exception, S3MessageAdapter(**test_params)) - def test_transform(self): + def test_transform_happy(self): s3MA = S3MessageAdapter(**self.config_dict) payload = s3MA.transform(self.recs_minimum_fields) From 13d4cfa6c5f25906edc021c37b92264c3e6c5aa6 Mon Sep 17 00:00:00 2001 From: Erin Date: Sat, 3 Jul 2021 16:05:16 -0600 Subject: [PATCH 105/129] 1508-Added more Enum classes to list into schema jsonEncoder.py --- onestop-python-client/onestop/schemas/util/jsonEncoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/schemas/util/jsonEncoder.py b/onestop-python-client/onestop/schemas/util/jsonEncoder.py index d324b88..90522f5 100644 --- a/onestop-python-client/onestop/schemas/util/jsonEncoder.py +++ b/onestop-python-client/onestop/schemas/util/jsonEncoder.py @@ -1,5 +1,5 @@ import json -from enum import Enum + from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm # Diction of all the Enum Classes From c9c015215c457a3132bd606224daee513c886fcd Mon Sep 17 00:00:00 2001 From: Erin Date: Sat, 3 Jul 2021 16:10:20 -0600 Subject: [PATCH 106/129] 1508-Added more Enum classes to list into schema jsonEncoder.py, stupid intellij separated the changes. --- .../onestop/schemas/util/jsonEncoder.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/schemas/util/jsonEncoder.py b/onestop-python-client/onestop/schemas/util/jsonEncoder.py index 90522f5..e9a02a3 100644 --- a/onestop-python-client/onestop/schemas/util/jsonEncoder.py +++ b/onestop-python-client/onestop/schemas/util/jsonEncoder.py @@ -1,11 +1,28 @@ import json from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.record_type import RecordType +from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor +from onestop.schemas.geojsonSchemaClasses.line_string_type import LineStringType +from onestop.schemas.geojsonSchemaClasses.multi_line_string_type import MultiLineStringType +from onestop.schemas.geojsonSchemaClasses.multi_point_type import MultiPointType +from onestop.schemas.geojsonSchemaClasses.multi_polygon_type import MultiPolygonType +from onestop.schemas.geojsonSchemaClasses.point_type import PointType +from onestop.schemas.geojsonSchemaClasses.polygon_type import PolygonType # Diction of all the Enum Classes ENUMS = { 'ChecksumAlgorithm': ChecksumAlgorithm, - + 'RELATIONSHIPTYPE': RelationshipType, + 'RecordType': RecordType, + 'TimeRangeDescriptor': TimeRangeDescriptor, + 'LineStringType': LineStringType, + 'MultiLineStringType': MultiLineStringType, + 'MultiPointType': MultiPointType, + 'MultiPolygonType': MultiPolygonType, + 'PointType': PointType, + 'PolygonType': PolygonType } # Used as an argument in json.dumps, transform Enum instance for later use From ab6322f6e3a4cc3b157da8c8f0cb88de4eb6db40 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 7 Jul 2021 14:41:09 -0600 Subject: [PATCH 107/129] 1508-Fixed bug of test_messaged defined with a variable that wasn't yet defined, config_dict wasn't defined yet. --- .../s3_notification_handler.py | 67 ++++++++++--------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/scripts/sqs-to-registry/s3_notification_handler.py b/scripts/sqs-to-registry/s3_notification_handler.py index c2785a0..e5e0490 100644 --- a/scripts/sqs-to-registry/s3_notification_handler.py +++ b/scripts/sqs-to-registry/s3_notification_handler.py @@ -14,39 +14,6 @@ config_dict = {} -test_message = { - "Type": "Notification", - "MessageId": "e12f0129-0236-529c-aeed-5978d181e92a", - "TopicArn": "arn:aws:sns:" + config_dict['s3_region'] + ":798276211865:cloud-archive-client-sns", - "Subject": "Amazon S3 Notification", - "Message": '''{ - "Records": [{ - "eventVersion": "2.1", "eventSource": "aws:s3", "awsRegion": "''' + config_dict['s3_region'] + '''", - "eventTime": "2020-12-14T20:56:08.725Z", - "eventName": "ObjectRemoved:Delete", - "userIdentity": {"principalId": "AX8TWPQYA8JEM"}, - "requestParameters": {"sourceIPAddress": "65.113.158.185"}, - "responseElements": {"x-amz-request-id": "D8059E6A1D53597A", - "x-amz-id-2": "7DZF7MAaHztZqVMKlsK45Ogrto0945RzXSkMnmArxNCZ+4/jmXeUn9JM1NWOMeKK093vW8g5Cj5KMutID+4R3W1Rx3XDZOio"}, - "s3": { - "s3SchemaVersion": "1.0", "configurationId": "archive-testing-demo-event", - "bucket": {"name": "''' + config_dict['s3_bucket'] + '''", - "ownerIdentity": {"principalId": "AX8TWPQYA8JEM"}, - "arn": "arn:aws:s3:::''' + config_dict['s3_bucket'] + '''"}, - "object": {"key": "123", - "sequencer": "005FD7D1765F04D8BE", - "eTag": "44d2452e8bc2c8013e9c673086fbab7a", - "size": 1385, - "versionId": "q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf"} - } - }] - }''', - "Timestamp": "2020-12-14T20:56:23.786Z", - "SignatureVersion": "1", - "Signature": "MB5P0H5R5q3zOFoo05lpL4YuZ5TJy+f2c026wBWBsQ7mbNQiVxAy4VbbK0U1N3YQwOslq5ImVjMpf26t1+zY1hoHoALfvHY9wPtc8RNlYqmupCaZgtwEl3MYQz2pHIXbcma4rt2oh+vp/n+viARCToupyysEWTvw9a9k9AZRuHhTt8NKe4gpphG0s3/C1FdvrpQUvxoSGVizkaX93clU+hAFsB7V+yTlbKP+SNAqP/PaLtai6aPY9Lb8reO2ZjucOl7EgF5IhBVT43HhjBBj4JqYBNbMPcId5vMfBX8qI8ANIVlGGCIjGo1fpU0ROxSHsltuRjkmErpxUEe3YJJM3Q==", - "SigningCertURL": "https://sns.us-east-2.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem", - "UnsubscribeURL": "https://sns.us-east-2.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-2:798276211865:cloud-archive-client-sns:461222e7-0abf-40c6-acf7-4825cef65cce" -} def handler(recs, log_level): logger = ClientLogger.get_logger('s3_notification_handler.handler', log_level, False) @@ -117,6 +84,40 @@ def handler(recs, log_level): queue = sqs_consumer.connect(s3_resource, config_dict['sqs_name']) # Send a test message + test_message = { + "Type": "Notification", + "MessageId": "e12f0129-0236-529c-aeed-5978d181e92a", + "TopicArn": "arn:aws:sns:" + config_dict['s3_region'] + ":798276211865:cloud-archive-client-sns", + "Subject": "Amazon S3 Notification", + "Message": '''{ + "Records": [{ + "eventVersion": "2.1", "eventSource": "aws:s3", "awsRegion": "''' + config_dict['s3_region'] + '''", + "eventTime": "2020-12-14T20:56:08.725Z", + "eventName": "ObjectRemoved:Delete", + "userIdentity": {"principalId": "AX8TWPQYA8JEM"}, + "requestParameters": {"sourceIPAddress": "65.113.158.185"}, + "responseElements": {"x-amz-request-id": "D8059E6A1D53597A", + "x-amz-id-2": "7DZF7MAaHztZqVMKlsK45Ogrto0945RzXSkMnmArxNCZ+4/jmXeUn9JM1NWOMeKK093vW8g5Cj5KMutID+4R3W1Rx3XDZOio"}, + "s3": { + "s3SchemaVersion": "1.0", "configurationId": "archive-testing-demo-event", + "bucket": {"name": "''' + config_dict['s3_bucket'] + '''", + "ownerIdentity": {"principalId": "AX8TWPQYA8JEM"}, + "arn": "arn:aws:s3:::''' + config_dict['s3_bucket'] + '''"}, + "object": {"key": "123", + "sequencer": "005FD7D1765F04D8BE", + "eTag": "44d2452e8bc2c8013e9c673086fbab7a", + "size": 1385, + "versionId": "q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf"} + } + }] + }''', + "Timestamp": "2020-12-14T20:56:23.786Z", + "SignatureVersion": "1", + "Signature": "MB5P0H5R5q3zOFoo05lpL4YuZ5TJy+f2c026wBWBsQ7mbNQiVxAy4VbbK0U1N3YQwOslq5ImVjMpf26t1+zY1hoHoALfvHY9wPtc8RNlYqmupCaZgtwEl3MYQz2pHIXbcma4rt2oh+vp/n+viARCToupyysEWTvw9a9k9AZRuHhTt8NKe4gpphG0s3/C1FdvrpQUvxoSGVizkaX93clU+hAFsB7V+yTlbKP+SNAqP/PaLtai6aPY9Lb8reO2ZjucOl7EgF5IhBVT43HhjBBj4JqYBNbMPcId5vMfBX8qI8ANIVlGGCIjGo1fpU0ROxSHsltuRjkmErpxUEe3YJJM3Q==", + "SigningCertURL": "https://sns.us-east-2.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem", + "UnsubscribeURL": "https://sns.us-east-2.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-2:798276211865:cloud-archive-client-sns:461222e7-0abf-40c6-acf7-4825cef65cce" + } + # sqs_client = s3_utils.connect('client', 'sqs' , config_dict['s3_region']) # sqs_client.send_message( # QueueUrl='https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs', From ec3af19f6c62c282388da18d385f2d1284bdcbe7 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 7 Jul 2021 14:43:13 -0600 Subject: [PATCH 108/129] 1508-Updated sme python script to reflect avro "relationships" type changing. --- scripts/sme/sme.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/sme/sme.py b/scripts/sme/sme.py index 12f7859..4e341cd 100644 --- a/scripts/sme/sme.py +++ b/scripts/sme/sme.py @@ -37,7 +37,7 @@ def handler(key, value, log_level = 'INFO'): # This is an example for testing purposes. value = { "type": "granule", - "content": "{ \"discovery\": {\n \"fileIdentifier\": \"92ade5dc-946d-11ea-abe4-0242ac120004\",\n \"links\": [\n {\n \"linkFunction\": \"download\",\n \"linkName\": \"Amazon S3\",\n \"linkProtocol\": \"HTTPS\",\n \"linkUrl\": \"https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n }\n ],\n \"parentIdentifier\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"spatialBounding\": null,\n \"temporalBounding\": {\n \"beginDate\": \"2020-05-12T16:20:15.158Z\", \n \"endDate\": \"2020-05-12T16:21:51.494Z\"\n },\n \"title\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"fileInformation\": {\n \"checksums\": [{\"algorithm\": \"MD5\",\"value\": \"44d2452e8bc2c8013e9c673086fbab7a\"}]\n, \"optionalAttributes\":{}, \"format\": \"HSD\",\n \"name\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\",\n \"size\": 208918\n },\n \"fileLocations\": {\n \"optionalAttributes\":{}, \"uri\":\"//nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\", \"asynchronous\": false,\n \"deleted\": false,\n \"lastModified\": 1589300890000,\n \"locality\": \"us-east-1\",\n \"restricted\": false,\n \"serviceType\": \"Amazon:AWS:S3\",\n \"type\": {\"type\":\"ACCESS\"},\n \"uri\": \"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"relationships\": [\n {\n \"id\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"type\": {\"type\":\"COLLECTION\"}\n }\n ]\n }", + "content": "{ \"discovery\": {\n \"fileIdentifier\": \"92ade5dc-946d-11ea-abe4-0242ac120004\",\n \"links\": [\n {\n \"linkFunction\": \"download\",\n \"linkName\": \"Amazon S3\",\n \"linkProtocol\": \"HTTPS\",\n \"linkUrl\": \"https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n }\n ],\n \"parentIdentifier\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"spatialBounding\": null,\n \"temporalBounding\": {\n \"beginDate\": \"2020-05-12T16:20:15.158Z\", \n \"endDate\": \"2020-05-12T16:21:51.494Z\"\n },\n \"title\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"fileInformation\": {\n \"checksums\": [{\"algorithm\": \"MD5\",\"value\": \"44d2452e8bc2c8013e9c673086fbab7a\"}]\n, \"optionalAttributes\":{}, \"format\": \"HSD\",\n \"name\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\",\n \"size\": 208918\n },\n \"fileLocations\": {\n \"optionalAttributes\":{}, \"uri\":\"//nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\", \"asynchronous\": false,\n \"deleted\": false,\n \"lastModified\": 1589300890000,\n \"locality\": \"us-east-1\",\n \"restricted\": false,\n \"serviceType\": \"Amazon:AWS:S3\",\n \"type\": {\"type\":\"ACCESS\"},\n \"uri\": \"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"relationships\": [\n {\n \"id\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"type\": \"COLLECTION\"\n }\n ]\n }", "contentType": "application/json", "method": "PUT", "source": "unknown", @@ -85,8 +85,8 @@ def handler(key, value, log_level = 'INFO'): parsed_record.type = value['type'] content_dict = parsed_record.to_dict() # reformat Relationship field - relationship_type = content_dict['relationships'][0]['type']['type'] - content_dict['relationships'][0]['type'] = relationship_type +# relationship_type = content_dict['relationships'][0]['type'] +# content_dict['relationships'][0]['type'] = relationship_type # reformat File Locations filelocation_type = content_dict['fileLocations']['type']['type'] From f3ebdaee382cff03a6d450a948276c8bf2ff1446 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 7 Jul 2021 15:58:47 -0600 Subject: [PATCH 109/129] 1508-Changed S3MessageAdapter.transform to take a single record, since that's all it is transforming on. Renamed var in test_S3MessageAdapter to be more general name. --- .../onestop/util/S3MessageAdapter.py | 13 +++-- .../onestop/util/SqsConsumer.py | 2 +- .../onestop/util/SqsHandlers.py | 2 +- .../test/unit/test_SqsHandlers.py | 4 +- .../test/unit/util/test_S3MessageAdapter.py | 49 +++++++++++-------- scripts/launch_e2e.py | 10 ++-- scripts/launch_pyconsumer.py | 2 +- 7 files changed, 46 insertions(+), 36 deletions(-) diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index dccf8dc..9de20f0 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -65,21 +65,21 @@ def __init__(self, access_bucket, s3_message_adapter_metadata_type, file_id_pre if wildargs: self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) - def transform(self, recs): + def transform(self, rec): """ - Transforms sqs message triggered by s3 event to correct format for publishing to IM registry + Transforms a single sqs message, triggered by a s3 event, to correct format for publishing to IM registry Parameters: ---------- - recs: dict - sqs event message to transform + rec: dict + Single record in a sqs event message to transform :return: ParsedRecord Object The Parsed Record class is an avro schema generated class """ - self.logger.info("Transform!") - rec = recs[0] # This is standard format 1 record per message for now according to AWS docs + self.logger.info("Transforming Record") + self.logger.debug("Record: %s"%rec) s3_bucket = rec['s3']['bucket']['name'] s3_key = rec['s3']['object']['key'] @@ -88,7 +88,6 @@ def transform(self, recs): checkSumAlgorithm = ChecksumAlgorithm(value='MD5') alg_value = rec['s3']['object']['eTag'] checkSum = Checksum(algorithm=checkSumAlgorithm, value=alg_value) - checkSum_dict = checkSum.to_dict() file_name = str(s3_key)[pos:] file_size = rec['s3']['object']['size'] diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index 4f2b6ac..db1431a 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -55,7 +55,7 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): :param cb: function call back function - :return: If the Message has a Records key then the call back function gets called on the Message. + :return: If the Message has a Records key then the call back function gets called on a single message. """ self.logger.info("Receive messages") diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index 9170f8d..6feca23 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -83,7 +83,7 @@ def upload(records, log_level='DEBUG'): object_uuid = s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) # Convert s3 message to IM message - im_message = s3_message_adapter.transform(records) + im_message = s3_message_adapter.transform(rec) json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder) logger.debug('transformed message, json_payload: %s'%json_payload) diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py index 231e6cf..6e8481b 100644 --- a/onestop-python-client/test/unit/test_SqsHandlers.py +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -251,7 +251,7 @@ def test_upload_handler_happy(self, mock_s3_utils, mock_wp): mock_wp.publish_registry.assert_called_with( 'granule', mock_s3_utils.get_uuid_metadata(), - json.dumps(self.s3_message_adapter.transform(json.loads(message['Message'])['Records']).to_dict(), cls=EnumEncoder), + json.dumps(self.s3_message_adapter.transform(json.loads(message['Message'])['Records'][0]).to_dict(), cls=EnumEncoder), 'POST' ) @@ -312,7 +312,7 @@ def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_wp): mock_wp.publish_registry.assert_called_with( 'granule', mock_s3_utils.get_uuid_metadata(), - json.dumps(self.s3_message_adapter.transform(json.loads(message['Message'])['Records']).to_dict(), cls=EnumEncoder), + json.dumps(self.s3_message_adapter.transform(json.loads(message['Message'])['Records'][0]).to_dict(), cls=EnumEncoder), 'PATCH' ) diff --git a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py index 66031fe..8dee317 100644 --- a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py +++ b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py @@ -13,26 +13,33 @@ class S3MessageAdapterTest(unittest.TestCase): 'log_level': 'DEBUG' } - recs_minimum_fields = \ - [{ - 'eventVersion': '2.1', - 'eventSource': 'aws:s3', - 'awsRegion': 'us-east-1', - 'eventTime': '2020-11-10T00:44:20.642Z', - 'eventName': 'ObjectCreated:Put', - 'userIdentity': {'principalId': 'AWS:AIDAUDW4MV7I5RW5LQJIO'}, - 'requestParameters': {'sourceIPAddress': '65.113.158.185'}, - 'responseElements': {'x-amz-request-id': '7D394F43C682BB87', 'x-amz-id-2': 'k2Yn5BGg7DM5fIEAnwv5RloBFLYERjGRG3mT+JsPbdX033USr0eNObqkHiw3m3x+BQ17DD4C0ErB/VdhYt2Az01LJ4mQ/aqS'}, - 's3': {'s3SchemaVersion': '1.0', 'configurationId': 'csbS3notification', - 'bucket': {'name': 'nesdis-ncei-csb-dev', - 'ownerIdentity': {'principalId': 'A3PGJENIF5D10L'}, - 'arn': 'arn:aws:s3:::nesdis-ncei-csb-dev'}, - 'object': {'key': 'csv/file1.csv', 'size': 1385, - 'eTag': '44d2452e8bc2c8013e9c673086fbab7a', - 'versionId': 'q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf', - 'sequencer': '005FA9E26498815778'} - } - }] + record = { + 'eventVersion': '2.1', + 'eventSource': 'aws:s3', + 'awsRegion': 'us-east-1', + 'eventTime': '2020-11-10T00:44:20.642Z', + 'eventName': 'ObjectCreated:Put', + 'userIdentity': {'principalId': 'AWS:AIDAUDW4MV7I5RW5LQJIO'}, + 'requestParameters': {'sourceIPAddress': '65.113.158.185'}, + 'responseElements': {'x-amz-request-id': '7D394F43C682BB87', 'x-amz-id-2': 'k2Yn5BGg7DM5fIEAnwv5RloBFLYERjGRG3mT+JsPbdX033USr0eNObqkHiw3m3x+BQ17DD4C0ErB/VdhYt2Az01LJ4mQ/aqS'}, + 's3': { + 's3SchemaVersion': '1.0', + 'configurationId': 'csbS3notification', + 'bucket': { + 'name': 'nesdis-ncei-csb-dev', + 'ownerIdentity': { + 'principalId': 'A3PGJENIF5D10L' + }, + 'arn': 'arn:aws:s3:::nesdis-ncei-csb-dev' + }, + 'object': { + 'key': 'csv/file1.csv', 'size': 1385, + 'eTag': '44d2452e8bc2c8013e9c673086fbab7a', + 'versionId': 'q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf', + 'sequencer': '005FA9E26498815778' + } + } + } def test_init_metadata_type_valid(self): publisher = S3MessageAdapter(**self.config_dict) @@ -62,7 +69,7 @@ def test_init_extra_parameters_constructor(self): def test_transform_happy(self): s3MA = S3MessageAdapter(**self.config_dict) - payload = s3MA.transform(self.recs_minimum_fields) + payload = s3MA.transform(self.record) self.assertIsNotNone(payload) diff --git a/scripts/launch_e2e.py b/scripts/launch_e2e.py index 820fd22..f43af3c 100644 --- a/scripts/launch_e2e.py +++ b/scripts/launch_e2e.py @@ -31,7 +31,7 @@ def handler(recs, log_level): if recs is None: logger.info('No records retrieved, doing nothing.') return - + rec = recs[0] logger.debug('Record: %s'%rec) bucket = rec['s3']['bucket']['name'] @@ -46,7 +46,7 @@ def handler(recs, log_level): s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) s3ma = S3MessageAdapter(**config_dict) - im_message = s3ma.transform(recs) + im_message = s3ma.transform(rec) logger.debug('S3MessageAdapter.transform: %s'%im_message) json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder) logger.debug('S3MessageAdapter.transform.json dump: %s'%json_payload) @@ -61,7 +61,11 @@ def handler(recs, log_level): vault_name = config_dict['vault_name'] resp_dict = s3_utils.upload_archive(glacier, vault_name, file_data) - logger.debug('Upload response: %s'%resp_dict) + logger.debug('Upload to cloud, Response: %s'%resp_dict) + if resp_dict == None: + logger.error('Error uploading to s3 archive, see prior log statements.') + return + logger.info('upload archived location: %s'% resp_dict['location']) logger.info('archiveId: %s'% resp_dict['archiveId']) logger.info('sha256: %s'% resp_dict['checksum']) diff --git a/scripts/launch_pyconsumer.py b/scripts/launch_pyconsumer.py index 5d22317..794ca99 100644 --- a/scripts/launch_pyconsumer.py +++ b/scripts/launch_pyconsumer.py @@ -49,7 +49,7 @@ def handler(recs, log_level): # Convert s3 message to IM message s3ma = S3MessageAdapter(**config_dict) - im_message = s3ma.transform(recs) + im_message = s3ma.transform(rec) logger.debug('S3MessageAdapter.transform: %s'%im_message) json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder) logger.debug('S3MessageAdapter.transform.json dump: %s'%json_payload) From 578f8a5cb66b6c5bf74860989ece5dc43e021b78 Mon Sep 17 00:00:00 2001 From: Erin Date: Wed, 7 Jul 2021 16:21:57 -0600 Subject: [PATCH 110/129] 1508-Changed SqsHandlers to take a single record, since that's all it is operating on. --- .../onestop/util/SqsConsumer.py | 7 +++++- .../onestop/util/SqsHandlers.py | 24 +++++++++---------- .../test/unit/util/test_SqsConsumer.py | 4 ++-- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index db1431a..b0449d0 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -98,7 +98,12 @@ def receive_messages(self, sqs_queue, sqs_max_polls, cb): if 'Records' in message_content: recs = message_content['Records'] self.logger.debug('Message "Records": %s' % recs) - cb(recs, self.log_level) + if len(recs) > 0: + rec = recs[0] + self.logger.debug('Single Message "Record": %s' % rec) + cb(rec, self.log_level) + else: + self.logger.info("Message body's records is empty.") else: self.logger.info("s3 event message without 'Records' content received.") diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index 6feca23..2a61f08 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -12,25 +12,24 @@ def create_delete_handler(web_publisher): :param: web_publisher: WebPublisher object """ - def delete(records, log_level='INFO'): + def delete(rec, log_level='INFO'): logger = ClientLogger.get_logger('SqsHandlers.create_delete_handler.delete', log_level, False) logger.info("In create_delete_handler.delete() handler") - logger.debug("Records: %s"%records) + logger.debug("Record: %s"%rec) - if not records or records is None: - logger.info("Ending handler, records empty, records=%s"%records) + if not rec or rec is None: + logger.info("Ending handler, record empty, record=%s"%rec) return - record = records[0] - if record['eventName'] != 'ObjectRemoved:Delete': - logger.info("Ending handler, eventName=%s"%record['eventName']) + if rec['eventName'] != 'ObjectRemoved:Delete': + logger.info("Ending handler, eventName=%s"%rec['eventName']) return - logger.info('Attempting to delete record %s'%record) + logger.info('Attempting to delete record %s'%rec) - bucket = record['s3']['bucket']['name'] - s3_key = record['s3']['object']['key'] + bucket = rec['s3']['bucket']['name'] + s3_key = rec['s3']['object']['key'] s3_url = "s3://" + bucket + "/" + s3_key payload = '{"queries":[{"type": "fieldQuery", "field": "links.linkUrl", "value": "' + s3_url + '"}] }' search_response = web_publisher.search_onestop('granule', payload) @@ -59,12 +58,11 @@ def create_upload_handler(web_publisher, s3_utils, s3_message_adapter): :param: s3ma: S3MessageAdapter object """ - def upload(records, log_level='DEBUG'): + def upload(rec, log_level='DEBUG'): logger = ClientLogger.get_logger('SqsHandlers.create_upload_handler.upload', log_level, False) logger.info("In create_upload_handler.upload() handler") - logger.debug("Records: %s"%records) + logger.debug("Records: %s"%rec) - rec = records[0] s3_key = rec['s3']['object']['key'] logger.info("Received message for " + s3_key) logger.info("Event type: " + rec['eventName']) diff --git a/onestop-python-client/test/unit/util/test_SqsConsumer.py b/onestop-python-client/test/unit/util/test_SqsConsumer.py index 03ee897..e3dee71 100644 --- a/onestop-python-client/test/unit/util/test_SqsConsumer.py +++ b/onestop-python-client/test/unit/util/test_SqsConsumer.py @@ -128,7 +128,7 @@ def test_receive_messages_callback_occurs(self): self.sqs_consumer.receive_messages(queue, 1, mock_cb) # Verify callback function was called once with expected message attributes - mock_cb.assert_called_with(self.records, ANY) + mock_cb.assert_called_with(self.records[0], ANY) @mock_sqs def test_happy_path(self): @@ -149,7 +149,7 @@ def test_happy_path(self): self.sqs_consumer.receive_messages(queue, 1, mock_cb) # Verify callback function was called once with expected message attributes - mock_cb.assert_called_with(self.records, ANY) + mock_cb.assert_called_with(self.records[0], ANY) # An example using external send/receive methods @unittest.skip From 91fe9bc76d1a68be8d491c862b04ddbb217ac192 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 8 Jul 2021 18:26:17 -0600 Subject: [PATCH 111/129] 1508-added avro psi schema unit tests and Enum jsonEncoder tests. --- .../test/unit/schemas/__init__.py | 0 .../unit/schemas/psiSchemaClasses/__init__.py | 0 .../schemas/psiSchemaClasses/org/__init__.py | 0 .../psiSchemaClasses/org/cedar/__init__.py | 0 .../org/cedar/schemas/__init__.py | 0 .../org/cedar/schemas/avro/__init__.py | 0 .../org/cedar/schemas/avro/psi/__init__.py | 0 .../schemas/avro/psi/test_ParsedRecord.py | 70 ++++++++---- .../schemas/psiSchemaClasses/util/__init__.py | 0 .../psiSchemaClasses/util/test_jsonEncoder.py | 105 ++++++++++++++++++ 10 files changed, 156 insertions(+), 19 deletions(-) create mode 100644 onestop-python-client/test/unit/schemas/__init__.py create mode 100644 onestop-python-client/test/unit/schemas/psiSchemaClasses/__init__.py create mode 100644 onestop-python-client/test/unit/schemas/psiSchemaClasses/org/__init__.py create mode 100644 onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/__init__.py create mode 100644 onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/__init__.py create mode 100644 onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/__init__.py create mode 100644 onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/__init__.py create mode 100644 onestop-python-client/test/unit/schemas/psiSchemaClasses/util/__init__.py create mode 100644 onestop-python-client/test/unit/schemas/psiSchemaClasses/util/test_jsonEncoder.py diff --git a/onestop-python-client/test/unit/schemas/__init__.py b/onestop-python-client/test/unit/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/__init__.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/__init__.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/__init__.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/__init__.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/__init__.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/__init__.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py index 80aec1d..a53ad88 100644 --- a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py +++ b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py @@ -1,9 +1,44 @@ import unittest from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship import Relationship + class test_ParsedRecord(unittest.TestCase): + fileLocation_dict = { + "fileLocations":{ + "s3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc":{ + "serviceType":"Amazon:AWS:S3", + "deleted":False, + "restricted":False, + "asynchronous":False, + "locality":"us-east-1", + "lastModified":1572430074000, + #Todo: change this type. + "type": {"type":"ACCESS"}, + "optionalAttributes":{ + }, + "uri":"s3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc" + } + } + } + + relationship_dict = { + "id": "5b58de08-afef-49fb-99a1-9c5d5c003bde", + "type": RelationshipType.collection + } + relationships_dict = { + "relationships":[ + relationship_dict, + { + "id":"6668de08-afef-49fb-99a1-9c5d5c003bde", + "type":{"type":"collection"} + } + ] + } + @unittest.skip def test_type(self): content_dict = { "fileInformation":{ @@ -22,10 +57,11 @@ def test_type(self): } } - parsed_record = ParsedRecord().from_dict(content_dict) + parsed_record = ParsedRecord.from_dict(content_dict) self.assertIsNotNone(parsed_record) + @unittest.skip def test_discovery(self): content_dict = { "discovery":{ @@ -74,13 +110,14 @@ def test_discovery(self): } } - parsed_record = ParsedRecord().from_dict(content_dict) + parsed_record = ParsedRecord.from_dict(content_dict) self.assertIsNotNone(parsed_record) # TODO # def test_analysis(self): + @unittest.skip def test_fileInformation(self): content_dict = { "fileInformation":{ @@ -99,10 +136,11 @@ def test_fileInformation(self): } } - parsed_record = ParsedRecord().from_dict(content_dict) + parsed_record = ParsedRecord.from_dict(content_dict) self.assertIsNotNone(parsed_record) + @unittest.skip def test_fileLocations(self): content_dict = { "fileLocations":{ @@ -121,27 +159,21 @@ def test_fileLocations(self): } } - parsed_record = ParsedRecord().from_dict(content_dict) + parsed_record = ParsedRecord.from_dict(content_dict) print("parsed_record:%s"%parsed_record) self.assertIsNotNone(parsed_record) # TODO # def test_publishing(self): - def test_relationships(self): - content_dict = { - "relationships":[ - { - "id":"5b58de08-afef-49fb-99a1-9c5d5c003bde", - "type":"COLLECTION" - }, - { - "id":"6668de08-afef-49fb-99a1-9c5d5c003bde" - } - ] - } + def test_relationships_all_vars_set(self): + relationship = Relationship(**self.relationship_dict) - # Seems to want: ‘relationships': [{"type": {"type":"COLLECTION"}, "id":'5b58de08-afef-49fb-99a1-9c5d5c003bde'}] - parsed_record = ParsedRecord().from_dict(content_dict) + self.assertEqual(relationship.id, self.relationship_dict['id']) + self.assertEqual(relationship.type, self.relationship_dict['type']) - self.assertIsNotNone(parsed_record) + def test_relationships_optionals(self): + id = '12' + relationship = Relationship(id=id, type=None) + + self.assertEqual(relationship.id, id) diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/util/__init__.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/util/test_jsonEncoder.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/util/test_jsonEncoder.py new file mode 100644 index 0000000..9e98860 --- /dev/null +++ b/onestop-python-client/test/unit/schemas/psiSchemaClasses/util/test_jsonEncoder.py @@ -0,0 +1,105 @@ +import json +import unittest + +from onestop.schemas.util.jsonEncoder import EnumEncoder +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.record_type import RecordType +from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor +from onestop.schemas.geojsonSchemaClasses.line_string_type import LineStringType +from onestop.schemas.geojsonSchemaClasses.multi_line_string_type import MultiLineStringType +from onestop.schemas.geojsonSchemaClasses.multi_point_type import MultiPointType +from onestop.schemas.geojsonSchemaClasses.multi_polygon_type import MultiPolygonType +from onestop.schemas.geojsonSchemaClasses.point_type import PointType +from onestop.schemas.geojsonSchemaClasses.polygon_type import PolygonType +class jsonEncoderTest(unittest.TestCase): + + def test_checksumalgorithm_enum_class_encodes(self): + type = ChecksumAlgorithm.MD5.value + obj = ChecksumAlgorithm(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "ChecksumAlgorithm.%s"}'%type) + + def test_relationshiptype_enum_class_encodes(self): + type = RelationshipType.collection.value + obj = RelationshipType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "RelationshipType.%s"}'%type.lower()) + + def test_recordtype_enum_class_encodes(self): + type = RecordType.granule.value + obj = RecordType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "RecordType.%s"}'%type.lower()) + + def test_timerangedescriptor_enum_class_encodes(self): + type = TimeRangeDescriptor.AMBIGUOUS.value + obj = TimeRangeDescriptor(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "TimeRangeDescriptor.%s"}'%type) + + def test_linestring_enum_class_encodes(self): + type = LineStringType.LineString.value + obj = LineStringType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "LineStringType.%s"}'%type) + + def test_multilinestringtype_enum_class_encodes(self): + type = MultiLineStringType.MultiLineString.value + obj = MultiLineStringType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "MultiLineStringType.%s"}'%type) + + def test_multipointtype_enum_class_encodes(self): + type = MultiPointType.MultiPoint.value + obj = MultiPointType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "MultiPointType.%s"}'%type) + + def test_multipolygontype_enum_class_encodes(self): + type = MultiPolygonType.MultiPolygon.value + obj = MultiPolygonType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "MultiPolygonType.%s"}'%type) + + def test_pointtype_enum_class_encodes(self): + type = PointType.Point.value + obj = PointType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "PointType.%s"}'%type) + + def test_polygontype_enum_class_encodes(self): + type = PolygonType.Polygon.value + obj = PolygonType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "PolygonType.%s"}'%type) From 2d6b18becaf72a032ebefc1b55c04b71eadbe87f Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 8 Jul 2021 20:38:22 -0600 Subject: [PATCH 112/129] 1508-Changed avro schema FileLocationType to Enum --- .../schemas/avro/psi/file_location_type.py | 46 ++++++++----------- .../onestop/schemas/util/jsonEncoder.py | 4 +- .../onestop/util/S3MessageAdapter.py | 2 +- .../psiSchemaClasses/util/test_jsonEncoder.py | 11 +++++ 4 files changed, 35 insertions(+), 28 deletions(-) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/file_location_type.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/file_location_type.py index f3b19a8..3c7aa38 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/file_location_type.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/file_location_type.py @@ -1,29 +1,23 @@ -from dataclasses import asdict, dataclass -from typing import Dict +from enum import Enum +from typing import ClassVar -from undictify import type_checked_constructor +class FileLocationType(Enum): + INGEST = 'INGEST' + ARCHIVE = 'ARCHIVE' + ACCESS = 'ACCESS' + WORKING = 'WORKING' -@type_checked_constructor() -@dataclass -class FileLocationType: - # manually added this - type: str - - def to_dict(self) -> Dict: - """ - Returns a dictionary version of this instance. - """ - return asdict(self) - - @classmethod - def from_dict( - cls, - the_dict: Dict - ) -> 'FileLocationType': - """ - Returns an instance of this class from a dictionary. - - :param the_dict: The dictionary from which to create an instance of this class. - """ - return cls(**the_dict) + #: The Avro Schema associated to this class + _schema: ClassVar[str] = """{ + "type": "enum", + "namespace": "org.cedar.schemas.avro.psi", + "name": "FileLocationType", + "doc": "The type of the file location, e.g. an ingest location, access location, etc.", + "symbols": [ + "INGEST", + "ARCHIVE", + "ACCESS", + "WORKING" + ] + }""" diff --git a/onestop-python-client/onestop/schemas/util/jsonEncoder.py b/onestop-python-client/onestop/schemas/util/jsonEncoder.py index e9a02a3..5457902 100644 --- a/onestop-python-client/onestop/schemas/util/jsonEncoder.py +++ b/onestop-python-client/onestop/schemas/util/jsonEncoder.py @@ -4,6 +4,7 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.record_type import RecordType from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location_type import FileLocationType from onestop.schemas.geojsonSchemaClasses.line_string_type import LineStringType from onestop.schemas.geojsonSchemaClasses.multi_line_string_type import MultiLineStringType from onestop.schemas.geojsonSchemaClasses.multi_point_type import MultiPointType @@ -22,7 +23,8 @@ 'MultiPointType': MultiPointType, 'MultiPolygonType': MultiPolygonType, 'PointType': PointType, - 'PolygonType': PolygonType + 'PolygonType': PolygonType, + 'FileLocationType': FileLocationType } # Used as an argument in json.dumps, transform Enum instance for later use diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index 9de20f0..9357b29 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -97,7 +97,7 @@ def transform(self, rec): relationship = Relationship(id=self.collection_id, type=RelationshipType(self.metadata_type)) # File Location - fileLocationType = FileLocationType(type='ARCHIVE') + fileLocationType = FileLocationType(FileLocationType.ARCHIVE) s3_obj_uri = "s3://" + s3_bucket + "/" + s3_key fileLocation = FileLocation(uri=s3_obj_uri, type=fileLocationType, deleted=False, restricted=True, asynchronous=False, serviceType='Amazon:AWS:S3', optionalAttributes={}) diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/util/test_jsonEncoder.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/util/test_jsonEncoder.py index 9e98860..bc2b1e4 100644 --- a/onestop-python-client/test/unit/schemas/psiSchemaClasses/util/test_jsonEncoder.py +++ b/onestop-python-client/test/unit/schemas/psiSchemaClasses/util/test_jsonEncoder.py @@ -6,12 +6,14 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.record_type import RecordType from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location_type import FileLocationType from onestop.schemas.geojsonSchemaClasses.line_string_type import LineStringType from onestop.schemas.geojsonSchemaClasses.multi_line_string_type import MultiLineStringType from onestop.schemas.geojsonSchemaClasses.multi_point_type import MultiPointType from onestop.schemas.geojsonSchemaClasses.multi_polygon_type import MultiPolygonType from onestop.schemas.geojsonSchemaClasses.point_type import PointType from onestop.schemas.geojsonSchemaClasses.polygon_type import PolygonType + class jsonEncoderTest(unittest.TestCase): def test_checksumalgorithm_enum_class_encodes(self): @@ -103,3 +105,12 @@ def test_polygontype_enum_class_encodes(self): cls=EnumEncoder) self.assertEqual(jsonStr, '{"__enum__": "PolygonType.%s"}'%type) + + def test_filelocationtype_enum_class_encodes(self): + type = FileLocationType.INGEST.value + obj = FileLocationType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "FileLocationType.%s"}'%type) From bc561a0415355e2216fb4f0f45df463e5c4e0a5a Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 8 Jul 2021 21:18:35 -0600 Subject: [PATCH 113/129] 1508-Changed avro schema FileLocations in parsed_record to match schema_registry, which is a Map/Dict {str, FileLocation}. Also fixed sme script for last change of FileLocationType being an enum, not class with var type. --- .../cedar/schemas/avro/psi/parsed_record.py | 6 +---- .../onestop/util/S3MessageAdapter.py | 14 +++++++--- scripts/sme/sme.py | 26 ++++++++++++------- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/parsed_record.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/parsed_record.py index edc9e46..5402719 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/parsed_record.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/parsed_record.py @@ -26,11 +26,7 @@ class ParsedRecord: fileInformation: Optional[FileInformation] #: A list of location objects describing where the file is located - - # Commenting out for now because causing errors - #fileLocations: Dict[str, FileLocation] - - fileLocations: Optional[FileLocation] + fileLocations: Dict[str, FileLocation] publishing: Optional[Publishing] #: A record of this objects relationships to other objects in the inventory diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index 9357b29..593f29e 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -99,8 +99,14 @@ def transform(self, rec): # File Location fileLocationType = FileLocationType(FileLocationType.ARCHIVE) s3_obj_uri = "s3://" + s3_bucket + "/" + s3_key - fileLocation = FileLocation(uri=s3_obj_uri, type=fileLocationType, deleted=False, restricted=True, - asynchronous=False, serviceType='Amazon:AWS:S3', optionalAttributes={}) + fileLocation = {s3_obj_uri: + FileLocation(uri=s3_obj_uri, + type=fileLocationType, + deleted=False, + restricted=True, + asynchronous=False, + serviceType='Amazon:AWS:S3', + optionalAttributes={})} # Error Event errorEvent = ErrorEvent() @@ -123,7 +129,9 @@ def transform(self, rec): legalConstraints=[], dsmmAccessibility=0, dsmmDataIntegrity=0, dsmmDataQualityAssessment=0, dsmmDataQualityAssurance=0, dsmmDataQualityControlMonitoring=0, dsmmPreservability=0, dsmmProductionSustainability=0, dsmmTransparencyTraceability=0, dsmmUsability=0, dsmmAverage=0.0, services=[]) - +#fileLocations=dict +#fileLocations={'nesdis-ncei-csb-dev/csv/file2.csv': new FileLocation(...)} +#integration/test_WebPublisher parsedRecord = ParsedRecord(fileInformation=fileInformation, fileLocations=fileLocation, relationships=[relationship], errors=[errorEvent], publishing=publishing, discovery=discovery) diff --git a/scripts/sme/sme.py b/scripts/sme/sme.py index 4e341cd..25e68aa 100644 --- a/scripts/sme/sme.py +++ b/scripts/sme/sme.py @@ -37,7 +37,22 @@ def handler(key, value, log_level = 'INFO'): # This is an example for testing purposes. value = { "type": "granule", - "content": "{ \"discovery\": {\n \"fileIdentifier\": \"92ade5dc-946d-11ea-abe4-0242ac120004\",\n \"links\": [\n {\n \"linkFunction\": \"download\",\n \"linkName\": \"Amazon S3\",\n \"linkProtocol\": \"HTTPS\",\n \"linkUrl\": \"https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n }\n ],\n \"parentIdentifier\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"spatialBounding\": null,\n \"temporalBounding\": {\n \"beginDate\": \"2020-05-12T16:20:15.158Z\", \n \"endDate\": \"2020-05-12T16:21:51.494Z\"\n },\n \"title\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"fileInformation\": {\n \"checksums\": [{\"algorithm\": \"MD5\",\"value\": \"44d2452e8bc2c8013e9c673086fbab7a\"}]\n, \"optionalAttributes\":{}, \"format\": \"HSD\",\n \"name\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\",\n \"size\": 208918\n },\n \"fileLocations\": {\n \"optionalAttributes\":{}, \"uri\":\"//nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\", \"asynchronous\": false,\n \"deleted\": false,\n \"lastModified\": 1589300890000,\n \"locality\": \"us-east-1\",\n \"restricted\": false,\n \"serviceType\": \"Amazon:AWS:S3\",\n \"type\": {\"type\":\"ACCESS\"},\n \"uri\": \"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"relationships\": [\n {\n \"id\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"type\": \"COLLECTION\"\n }\n ]\n }", + "content": "{ \"discovery\": {\n \"fileIdentifier\": \"92ade5dc-946d-11ea-abe4-0242ac120004\",\n \"links\": [\n {\n \"linkFunction\": \"download\",\n \"linkName\": \"Amazon S3\",\n \"linkProtocol\": \"HTTPS\",\n \"linkUrl\": \"https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n }\n ],\n \"parentIdentifier\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"spatialBounding\": null,\n \"temporalBounding\": {\n \"beginDate\": \"2020-05-12T16:20:15.158Z\", \n \"endDate\": \"2020-05-12T16:21:51.494Z\"\n },\n \"title\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"fileInformation\": {\n \"checksums\": [{\"algorithm\": \"MD5\",\"value\": \"44d2452e8bc2c8013e9c673086fbab7a\"}]\n, \"optionalAttributes\":{}, \"format\": \"HSD\",\n \"name\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\",\n \"size\": 208918\n },\n " + "\"fileLocations\": {\n " + "\"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\": {\n" + "\"optionalAttributes\":{}, " + "\"uri\":\"//nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\", " + "\"asynchronous\": false,\n " + "\"deleted\": false,\n " + "\"lastModified\": 1589300890000,\n " + "\"locality\": \"us-east-1\",\n " + "\"restricted\": false,\n " + "\"serviceType\": \"Amazon:AWS:S3\",\n " + "\"type\": {\"type\":\"ACCESS\"},\n " + "\"uri\": \"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n " + "}\n " + "},\n " + "\"relationships\": [\n {\n \"id\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"type\": \"COLLECTION\"\n }\n ]\n }", "contentType": "application/json", "method": "PUT", "source": "unknown", @@ -84,15 +99,8 @@ def handler(key, value, log_level = 'INFO'): # update content dict parsed_record.type = value['type'] content_dict = parsed_record.to_dict() - # reformat Relationship field -# relationship_type = content_dict['relationships'][0]['type'] -# content_dict['relationships'][0]['type'] = relationship_type - # reformat File Locations - filelocation_type = content_dict['fileLocations']['type']['type'] - content_dict['fileLocations']['type'] = filelocation_type - - # Transform content_dict to appropiate payload + # Transform content_dict to appropriate payload # cls=EnumEncoderValue argument looks for instances of Enum classes and extracts only the value of the Enum content_dict = json.dumps(content_dict, cls=EnumEncoderValue) content_dict = json.loads(content_dict) From ebedebb018e649005b577de388833a0aee47f42c Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 9 Jul 2021 21:09:05 -0600 Subject: [PATCH 114/129] 1508-changed avro ValidDescriptor to an enum, as it is in schema registry. Not a blank class. --- .../schemas/avro/psi/valid_descriptor.py | 41 ++++++++----------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/valid_descriptor.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/valid_descriptor.py index 522712c..7a8b3ec 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/valid_descriptor.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/valid_descriptor.py @@ -1,26 +1,21 @@ -from dataclasses import asdict, dataclass -from typing import Dict +from enum import Enum +from typing import ClassVar -from undictify import type_checked_constructor +class ValidDescriptor(Enum): + VALID = 'VALID' + INVALID = 'INVALID' + UNDEFINED = 'UNDEFINED' -@type_checked_constructor() -@dataclass -class ValidDescriptor: - def to_dict(self) -> Dict: - """ - Returns a dictionary version of this instance. - """ - return asdict(self) - - @classmethod - def from_dict( - cls, - the_dict: Dict - ) -> 'ValidDescriptor': - """ - Returns an instance of this class from a dictionary. - - :param the_dict: The dictionary from which to create an instance of this class. - """ - return cls(**the_dict) + #: The Avro Schema associated to this class + _schema: ClassVar[str] = """{ + "name": "ValidDescriptor", + "namespace": "org.cedar.schemas.avro.psi", + "type": "enum", + "doc": "The types of metadata records which can be represented in the PSI system", + "symbols": [ + "VALID", + "INVALID", + "UNDEFINED" + ] + }""" \ No newline at end of file From f16c6ce7e0212e2c4c63c8c9234a770805ab736e Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 9 Jul 2021 21:40:33 -0600 Subject: [PATCH 115/129] 1508-Moved test_jsonEncoded test folder up one directory, is a util of schemas not psiSchemaClasses. --- .../test/unit/schemas/{psiSchemaClasses => }/util/__init__.py | 0 .../unit/schemas/{psiSchemaClasses => }/util/test_jsonEncoder.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename onestop-python-client/test/unit/schemas/{psiSchemaClasses => }/util/__init__.py (100%) rename onestop-python-client/test/unit/schemas/{psiSchemaClasses => }/util/test_jsonEncoder.py (100%) diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/util/__init__.py b/onestop-python-client/test/unit/schemas/util/__init__.py similarity index 100% rename from onestop-python-client/test/unit/schemas/psiSchemaClasses/util/__init__.py rename to onestop-python-client/test/unit/schemas/util/__init__.py diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/util/test_jsonEncoder.py b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py similarity index 100% rename from onestop-python-client/test/unit/schemas/psiSchemaClasses/util/test_jsonEncoder.py rename to onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py From d4ac9c61bfbbb0a3f0ef9a65c6ab687f50c87c2f Mon Sep 17 00:00:00 2001 From: Erin Date: Fri, 9 Jul 2021 21:45:16 -0600 Subject: [PATCH 116/129] 1508-Added ValidDescriptor to jsonEncoder Enum list plus to test_jsonEncoder file. --- .../onestop/schemas/util/jsonEncoder.py | 4 +++- .../test/unit/schemas/util/test_jsonEncoder.py | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/schemas/util/jsonEncoder.py b/onestop-python-client/onestop/schemas/util/jsonEncoder.py index 5457902..e4a4c8e 100644 --- a/onestop-python-client/onestop/schemas/util/jsonEncoder.py +++ b/onestop-python-client/onestop/schemas/util/jsonEncoder.py @@ -3,6 +3,7 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.record_type import RecordType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.valid_descriptor import ValidDescriptor from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location_type import FileLocationType from onestop.schemas.geojsonSchemaClasses.line_string_type import LineStringType @@ -24,7 +25,8 @@ 'MultiPolygonType': MultiPolygonType, 'PointType': PointType, 'PolygonType': PolygonType, - 'FileLocationType': FileLocationType + 'FileLocationType': FileLocationType, + 'ValidDescriptor': ValidDescriptor } # Used as an argument in json.dumps, transform Enum instance for later use diff --git a/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py index bc2b1e4..5d9db01 100644 --- a/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py +++ b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py @@ -5,6 +5,7 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.record_type import RecordType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.valid_descriptor import ValidDescriptor from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location_type import FileLocationType from onestop.schemas.geojsonSchemaClasses.line_string_type import LineStringType @@ -114,3 +115,12 @@ def test_filelocationtype_enum_class_encodes(self): cls=EnumEncoder) self.assertEqual(jsonStr, '{"__enum__": "FileLocationType.%s"}'%type) + + def test_validdescriptor_enum_class_encodes(self): + type = ValidDescriptor.INVALID.value + obj = ValidDescriptor(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "ValidDescriptor.%s"}'%type) From da51dbd4c569d7de35b7b454052d04c987ca2edb Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 12 Jul 2021 09:57:54 -0600 Subject: [PATCH 117/129] 1508-added ParsedRecord test current progress. --- .../schemas/avro/psi/test_ParsedRecord.py | 494 ++++++++++++++---- 1 file changed, 384 insertions(+), 110 deletions(-) diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py index a53ad88..fb7f3c9 100644 --- a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py +++ b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py @@ -1,27 +1,50 @@ import unittest from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord -from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType +from onestop.schemas.psiSchemaClasses.keywords_element import KeywordsElement +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.discovery import Discovery +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location import FileLocation +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.temporal_bounding import TemporalBounding from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship import Relationship +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.record_type import RecordType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.link import Link +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.responsible_party import ResponsibleParty +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.reference import Reference +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location_type import FileLocationType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.valid_descriptor import ValidDescriptor +from onestop.schemas.psiSchemaClasses.operation import Operation +from onestop.schemas.psiSchemaClasses.data_format import DataFormat +from onestop.schemas.psiSchemaClasses.platform import Platform +from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor +from onestop.schemas.psiSchemaClasses.instruments import Instruments +from onestop.schemas.geojsonSchemaClasses.line_string_type import LineStringType +from onestop.schemas.geojsonSchemaClasses.multi_line_string_type import MultiLineStringType +from onestop.schemas.geojsonSchemaClasses.multi_point_type import MultiPointType +from onestop.schemas.geojsonSchemaClasses.multi_polygon_type import MultiPolygonType +from onestop.schemas.geojsonSchemaClasses.point_type import PointType +from onestop.schemas.geojsonSchemaClasses.polygon_type import PolygonType +from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.point import Point +from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.multi_point import MultiPoint +from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.line_string import LineString +from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.multi_line_string import MultiLineString +from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.polygon import Polygon +from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.multi_polygon import MultiPolygon class test_ParsedRecord(unittest.TestCase): + fileLocation_dict = { - "fileLocations":{ - "s3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc":{ - "serviceType":"Amazon:AWS:S3", - "deleted":False, - "restricted":False, - "asynchronous":False, - "locality":"us-east-1", - "lastModified":1572430074000, - #Todo: change this type. - "type": {"type":"ACCESS"}, - "optionalAttributes":{ - }, - "uri":"s3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc" - } - } + "serviceType":"Amazon:AWS:S3", + "deleted":False, + "restricted":False, + "asynchronous":False, + "locality":"us-east-1", + "lastModified":1572430074000, + "type": FileLocationType.INGEST, + "optionalAttributes":{ + }, + "uri":"s3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc" } relationship_dict = { @@ -38,87 +61,349 @@ class test_ParsedRecord(unittest.TestCase): ] } - @unittest.skip - def test_type(self): - content_dict = { - "fileInformation":{ - "checksums":[ - { - "value":"4809084627a18d54db59659819f8a4b5d2c76367", - "algorithm":"SHA1" - } - ], - "headers":"NetCDF file reader", - "size":22876986, - "name":"OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc", - "format":"NetCDF", - "optionalAttributes":{ - } + # Discovery Related items + link_dict = { + "linkName": "Amazon S3", + "linkProtocol": "HTTPS", + "linkUrl": "https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2", + "linkDescription": "who knows", + "linkFunction": "download" + } + keywordsElement_dict = { + 'values': ['value1'], + 'type': 'type1', + 'namespace': 'name space' + } + temporalBounding_dict = { + 'beginDate': 'begin date', + 'beginIndeterminate': 'begin ind', + 'endDate': 'end date', + 'endIndeterminate': 'end ind', + 'instant': 'instant', + 'instantIndeterminate': 'instant ind', + 'description': 'desc' + } + point_dict = { + 'type': PointType.Point, + 'coordinates': [0.0, 1.1, 2.2, 3.3] + } + multiPoint_dict = { + 'type': MultiPointType.MultiPoint, + 'coordinates': [[0.0, 1.0], [2.0, 1.0]] + } + lineString_dict = { + 'type': LineStringType.LineString, + 'coordinates': [[0.0, 1.0], [2.0, 1.0]] + } + multiLineString_dict = { + 'type': MultiLineStringType.MultiLineString, + 'coordinates': [[[0.0, 1.0], [2.0, 1.0]], [[0.0, 2.0], [2.0, 2.0]]] + } + polygon_dict = { + 'type': PolygonType.Polygon, + 'coordinates': [[[0.0, 1.0], [2.0, 1.0]], [[0.0, 2.0], [2.0, 2.0]]] + } + multiPolygon_dict = { + 'type': MultiPolygonType.MultiPolygon, + 'coordinates': [[[[0.0, 1.0], [2.0, 1.0]], [[0.0, 2.0], [2.0, 2.0]]], [[[1.0, 1.0], [2.0, 1.0]], [[0.0, 2.0], [2.0, 2.0]]]] + } + instruments_dict = { + 'instrumentIdentifier': 'ident', + 'instrumentType': 'type', + 'instrumentDescription': 'desc' + } + operation_dict = { + 'operationDescription': 'desc', + 'operationIdentifier': 'iden', + 'operationStatus': 'status', + 'operationType': 'type' + } + platform_dict = { + 'platformIdentifier': 'ident', + 'platformDescription': 'desc', + 'platformSponsor': ['sponsor1'] + } + dateFormat_dict = { + 'name': 'date1', + 'version': 'version1' + } + responsibleParty_dict = { + 'individualName': 'person name', + 'organizationName': 'organization', + 'positionName': 'position name', + 'role': 'role', + 'email': 'email addy', + 'phone': 'phone' + } + reference_dict = { + 'title': 'a title', + 'date': 'date', + 'links': [link_dict] + } + discovery_dict = { + 'fileIdentifier': 'gov.noaa.nodc:NDBC-COOPS', + 'parentIdentifier': 'gov.noaa.nodc:NDBC-COOPS', + 'hierarchyLevelName': '', + 'doi': 'doi', + 'purpose': 'purpose', + 'status': 'status', + 'credit': 'credit', + 'title': 'title', + 'alternateTitle': 'alternate title', + 'description': 'description', + 'keywords': [keywordsElement_dict], + 'topicCategories': ['category1'], + 'temporalBounding': temporalBounding_dict, + 'spatialBounding': None, + 'isGlobal': False, + 'acquisitionInstruments': [instruments_dict], + 'acquisitionOperations': [operation_dict], + 'acquisitionPlatforms': [platform_dict], + 'dataFormats': [dateFormat_dict], + 'links': [link_dict], + 'responsibleParties': [responsibleParty_dict], + 'thumbnail': 'thumbnail', + 'thumbnailDescription': 'thumbnail description', + 'creationDate': 'creation date', + 'revisionDate': 'revision date', + 'publicationDate': 'publicationd date', + 'citeAsStatements': ['cite as statements'], + 'crossReferences': [reference_dict], + 'largerWorks': [reference_dict], + 'useLimitation': 'use limitation', + 'legalConstraints': ['legal constraints'], + 'accessFeeStatement': 'access fee', + 'orderingInstructions': 'no instructions', + 'edition': 'edition1', + 'dsmmAccessibility': -4, + 'dsmmDataIntegrity': -3, + 'dsmmDataQualityAssessment': -2, + 'dsmmDataQualityAssurance': -1, + 'dsmmDataQualityControlMonitoring': 1, + 'dsmmPreservability': 2, + 'dsmmProductionSustainability': 3, + 'dsmmTransparencyTraceability': 4, + 'dsmmUsability': 5, + 'dsmmAverage': 5.0, + 'updateFrequency': 'update freq', + 'presentationForm': 'presentation form' + } + + identificationAnalysis_dict = { + 'fileIdentifierExists': True, + 'fileIdentifierString': 'file iden', + 'doiExists': False, + 'doiString': 'doi', + 'parentIdentifierExists': True, + 'parentIdentifierString': 'parent iden', + 'hierarchyLevelNameExists': False, + 'isGranule': True + } + titleAnalysis_dict = { + 'titleExists': True, + 'titleCharacters': 1, + 'alternateTitleExists': True, + 'alternateTitleCharacters': 2, + 'titleFleschReadingEaseScore': 3.0, + 'alternateTitleFleschReadingEaseScore': 4.0, + 'titleFleschKincaidReadingGradeLevel': 5.0, + 'alternateTitleFleschKincaidReadingGradeLevel': 6.0 + } + descriptionAnalysis_dict = { + 'descriptionExists': True, + 'descriptionCharacters': 3, + 'descriptionFleschReadingEaseScore': 1.0, + 'descriptionFleschKincaidReadingGradeLevel': 2.0 + } + dataAccessAnalysis_dict = { + 'dataAccessExists': False + } + thumbnail_dict = { + 'thumbnailExists': True + } + temporalBoundingAnalysis_dict = { + 'beginDescriptor': ValidDescriptor.VALID, + 'beginPrecision': 'begin prec', + 'beginIndexable': True, + 'beginZoneSpecified': 'begin zone', + 'beginUtcDateTimeString': 'begin utc', + 'beginYear': 2021, + 'beginDayOfYear': 2, + 'beginDayOfMonth': 2, + 'beginMonth': 2, + 'endDescriptor': ValidDescriptor.INVALID, + 'endPrecision': 'end prec', + 'endIndexable': False, + 'endZoneSpecified': 'end zone', + 'endUtcDateTimeString': 'end utc', + 'endYear': 2025, + 'endDayOfYear': 2, + 'endDayOfMonth': 2, + 'endMonth': 2, + 'instantDescriptor': ValidDescriptor.UNDEFINED, + 'instantPrecision': 'instant prec', + 'instantIndexable': False, + 'instantZoneSpecified': 'instant zone', + 'instantUtcDateTimeString': 'instant utc', + 'instantEndUtcDateTimeString': 'instant end utc', + 'instantYear': 2, + 'instantDayOfYear': 2, + 'instantEndDayOfYear': 2, + 'instantDayOfMonth': 2, + 'instantEndDayOfMonth': 2, + 'instantMonth': 2, + 'instantEndMonth': 2, + 'rangeDescriptor': TimeRangeDescriptor.AMBIGUOUS + } + spatialBounding_dict = { + 'spatialBoundingExists': False, + 'isValid': True, + 'validationError': 'validation' + } + analysis_dict = { + 'identification': identificationAnalysis_dict, + 'titles': titleAnalysis_dict, + 'description': descriptionAnalysis_dict, + 'dataAccess': dataAccessAnalysis_dict, + 'thumbnail': thumbnail_dict, + 'temporalBounding': temporalBoundingAnalysis_dict, + 'spatialBounding': spatialBounding_dict + } + parsedRecord_dict = { + 'type': RecordType.collection, + 'discovery': discovery_dict, + 'analysis': analysis_dict, + 'fileLocations': { + 's3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc': { + **fileLocation_dict } } + } - parsed_record = ParsedRecord.from_dict(content_dict) + # Note: Didn't make unit tests for ENUMS since they don't execute any methods. + def test_parsed_record_all_vars_set(self): + parsedRecord = ParsedRecord(**self.parsedRecord_dict) - self.assertIsNotNone(parsed_record) + self.assertEqual(parsedRecord.type, self.parsedRecord_dict['type']) - @unittest.skip - def test_discovery(self): - content_dict = { - "discovery":{ - "fileIdentifier":"1034194888", - "temporalBounding":{ - "beginDate":"2019-10-30T05:50:39Z", - "endDate":"2019-10-30T06:00:11Z" - }, - "parentIdentifier":"5b58de08-afef-49fb-99a1-9c5d5c003bde", - "links":[ - { - "linkFunction":"download", - "linkUrl":"s3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc", - "linkName":"Amazon S3", - "linkProtocol":"HTTPS" - } - ], - "spatialBounding":{ - "coordinates":[ - [ - [ - -156.2995, - -81.3282 - ], - [ - 6.2995, - -81.3282 - ], - [ - 6.2995, - 81.3282 - ], - [ - -156.2995, - 81.3282 - ], - [ - -156.2995, - -81.3282 - ] - ] - ], - "type":"Polygon" - }, - "title":"OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc" - } - } + def test_discovery_all_vars_set(self): + discovery = Discovery(**self.discovery_dict) - parsed_record = ParsedRecord.from_dict(content_dict) + self.assertIsNotNone(discovery) - self.assertIsNotNone(parsed_record) + def test_keywords_all_vars_set(self): + keywords = KeywordsElement(**self.keywordsElement_dict) + + self.assertEqual(keywords.values, self.keywordsElement_dict['values']) + self.assertEqual(keywords.type, self.keywordsElement_dict['type']) + self.assertEqual(keywords.namespace, self.keywordsElement_dict['namespace']) + + def test_temporalBounding_all_vars_set(self): + temporal = TemporalBounding(**self.temporalBounding_dict) + + self.assertEqual(temporal.beginDate, self.temporalBounding_dict['beginDate']) + self.assertEqual(temporal.beginIndeterminate, self.temporalBounding_dict['beginIndeterminate']) + self.assertEqual(temporal.endDate, self.temporalBounding_dict['endDate']) + self.assertEqual(temporal.endIndeterminate, self.temporalBounding_dict['endIndeterminate']) + self.assertEqual(temporal.instant, self.temporalBounding_dict['instant']) + self.assertEqual(temporal.instantIndeterminate, self.temporalBounding_dict['instantIndeterminate']) + self.assertEqual(temporal.description, self.temporalBounding_dict['description']) + + def test_point_all_vars_set(self): + point = Point(**self.point_dict) + + self.assertEqual(point.type, self.point_dict['type']) + + def test_multiPoint_all_vars_set(self): + multi_point = MultiPoint(**self.multiPoint_dict) + + self.assertEqual(multi_point.type, self.multiPoint_dict['type']) + self.assertEqual(multi_point.coordinates, self.multiPoint_dict['coordinates']) + + def test_lineString_all_vars_set(self): + line_string = LineString(**self.lineString_dict) + + self.assertEqual(line_string.type, self.lineString_dict['type']) + self.assertEqual(line_string.coordinates, self.lineString_dict['coordinates']) + + def test_multiLineString_all_vars_set(self): + multi_line_string = MultiLineString(**self.multiLineString_dict) + + self.assertEqual(multi_line_string.type, self.multiLineString_dict['type']) + self.assertEqual(multi_line_string.coordinates, self.multiLineString_dict['coordinates']) + + def test_polygon_all_vars_set(self): + polygon = Polygon(**self.polygon_dict) -# TODO -# def test_analysis(self): + self.assertEqual(polygon.type, self.polygon_dict['type']) + self.assertEqual(polygon.coordinates, self.polygon_dict['coordinates']) + + def test_multiPolygon_all_vars_set(self): + multi_polygon = MultiPolygon(**self.multiPolygon_dict) + + self.assertEqual(multi_polygon.type, self.multiPolygon_dict['type']) + self.assertEqual(multi_polygon.coordinates, self.multiPolygon_dict['coordinates']) + + def test_instruments_all_vars_set(self): + instruments = Instruments(**self.instruments_dict) + + self.assertEqual(instruments.instrumentIdentifier, self.instruments_dict['instrumentIdentifier']) + self.assertEqual(instruments.instrumentType, self.instruments_dict['instrumentType']) + self.assertEqual(instruments.instrumentDescription, self.instruments_dict['instrumentDescription']) + + def test_operation_all_vars_set(self): + operation = Operation(**self.operation_dict) + + self.assertEqual(operation.operationDescription, self.operation_dict['operationDescription']) + self.assertEqual(operation.operationIdentifier, self.operation_dict['operationIdentifier']) + self.assertEqual(operation.operationStatus, self.operation_dict['operationStatus']) + self.assertEqual(operation.operationType, self.operation_dict['operationType']) + + def test_platform_all_vars_set(self): + platform = Platform(**self.platform_dict) + + self.assertEqual(platform.platformIdentifier, self.platform_dict['platformIdentifier']) + self.assertEqual(platform.platformDescription, self.platform_dict['platformDescription']) + self.assertEqual(platform.platformSponsor, self.platform_dict['platformSponsor']) + + def test_dateFormat_all_vars_set(self): + dateformat = DataFormat(**self.dateFormat_dict) + + self.assertEqual(dateformat.name, self.dateFormat_dict['name']) + self.assertEqual(dateformat.version, self.dateFormat_dict['version']) + + def test_link_all_vars_set(self): + link = Link(**self.link_dict) + + self.assertEqual(link.linkName, self.link_dict['linkName']) + self.assertEqual(link.linkProtocol, self.link_dict['linkProtocol']) + self.assertEqual(link.linkUrl, self.link_dict['linkUrl']) + self.assertEqual(link.linkDescription, self.link_dict['linkDescription']) + self.assertEqual(link.linkFunction, self.link_dict['linkFunction']) + + def test_responsibleParty_all_vars_set(self): + responsibleParty = ResponsibleParty(**self.responsibleParty_dict) + + self.assertEqual(responsibleParty.individualName, self.responsibleParty_dict['individualName']) + self.assertEqual(responsibleParty.organizationName, self.responsibleParty_dict['organizationName']) + self.assertEqual(responsibleParty.positionName, self.responsibleParty_dict['positionName']) + self.assertEqual(responsibleParty.role, self.responsibleParty_dict['role']) + self.assertEqual(responsibleParty.email, self.responsibleParty_dict['email']) + self.assertEqual(responsibleParty.phone, self.responsibleParty_dict['phone']) + + def test_reference_all_vars_set(self): + reference = Reference(**self.reference_dict) + + self.assertEqual(reference.title, self.reference_dict['title']) + self.assertEqual(reference.date, self.reference_dict['date']) + self.assertEqual(reference.links[0].linkName, self.reference_dict['links'][0]['linkName']) + self.assertEqual(reference.links[0].linkProtocol, self.reference_dict['links'][0]['linkProtocol']) + self.assertEqual(reference.links[0].linkUrl, self.reference_dict['links'][0]['linkUrl']) + self.assertEqual(reference.links[0].linkDescription, self.reference_dict['links'][0]['linkDescription']) + self.assertEqual(reference.links[0].linkFunction, self.reference_dict['links'][0]['linkFunction']) @unittest.skip - def test_fileInformation(self): + def test_fileInformation_all_vars_set(self): content_dict = { "fileInformation":{ "checksums":[ @@ -140,31 +425,20 @@ def test_fileInformation(self): self.assertIsNotNone(parsed_record) - @unittest.skip - def test_fileLocations(self): - content_dict = { - "fileLocations":{ - "s3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc":{ - "serviceType":"Amazon:AWS:S3", - "deleted":False, - "restricted":False, - "asynchronous":False, - "locality":"us-east-1", - "lastModified":1572430074000, - "type":"ACCESS", - "optionalAttributes":{ - }, - "uri":"s3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc" - } - } - } + fileInformation = FileI - parsed_record = ParsedRecord.from_dict(content_dict) - print("parsed_record:%s"%parsed_record) - self.assertIsNotNone(parsed_record) + def test_fileLocation_all_vars_set(self): + fileLocations = FileLocation(**self.fileLocation_dict) -# TODO -# def test_publishing(self): + self.assertEqual(fileLocations.uri, self.fileLocation_dict['uri']) + self.assertEqual(fileLocations.type, self.fileLocation_dict['type']) + self.assertEqual(fileLocations.deleted, self.fileLocation_dict['deleted']) + self.assertEqual(fileLocations.restricted, self.fileLocation_dict['restricted']) + self.assertEqual(fileLocations.asynchronous, self.fileLocation_dict['asynchronous']) + self.assertEqual(fileLocations.locality, self.fileLocation_dict['locality']) + self.assertEqual(fileLocations.lastModified, self.fileLocation_dict['lastModified']) + self.assertEqual(fileLocations.serviceType, self.fileLocation_dict['serviceType']) + self.assertEqual(fileLocations.optionalAttributes, self.fileLocation_dict['optionalAttributes']) def test_relationships_all_vars_set(self): relationship = Relationship(**self.relationship_dict) From 20b8cdb1feb0a38ada7846c091ebecb332c39527 Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 12 Jul 2021 12:46:14 -0600 Subject: [PATCH 118/129] 1508-Changed avro schema Method to Enum to match schema registry. Added tests. --- .../org/cedar/schemas/avro/psi/method.py | 56 +++++++++++-------- .../onestop/schemas/util/jsonEncoder.py | 2 + .../unit/schemas/util/test_jsonEncoder.py | 10 ++++ 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/method.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/method.py index a741e7e..3d21d61 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/method.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/method.py @@ -1,26 +1,36 @@ -from dataclasses import asdict, dataclass -from typing import Dict +from enum import Enum +from typing import ClassVar -from undictify import type_checked_constructor +class Method(Enum): + """ + The types of metadata relationships which can be represented in the PSI system + """ + HEAD = 'HEAD' + OPTIONS = 'OPTIONS' + GET = 'GET' + POST = 'POST' + PUT = 'PUT' + PATCH = 'PATCH' + DELETE = 'DELETE' + TRACE = 'TRACE' + CONNECT = 'CONNECT' -@type_checked_constructor() -@dataclass -class Method: - def to_dict(self) -> Dict: - """ - Returns a dictionary version of this instance. - """ - return asdict(self) - - @classmethod - def from_dict( - cls, - the_dict: Dict - ) -> 'Method': - """ - Returns an instance of this class from a dictionary. - - :param the_dict: The dictionary from which to create an instance of this class. - """ - return cls(**the_dict) + #: The Avro Schema associated to this class + _schema: ClassVar[str] = """{ + "type": "enum", + "namespace": "org.cedar.schemas.avro.psi", + "name": "Method", + "doc": "An HTTP request method", + "symbols": [ + "HEAD", + "OPTIONS", + "GET", + "POST", + "PUT", + "PATCH", + "DELETE", + "TRACE", + "CONNECT" + ] + }""" diff --git a/onestop-python-client/onestop/schemas/util/jsonEncoder.py b/onestop-python-client/onestop/schemas/util/jsonEncoder.py index e4a4c8e..cf1041c 100644 --- a/onestop-python-client/onestop/schemas/util/jsonEncoder.py +++ b/onestop-python-client/onestop/schemas/util/jsonEncoder.py @@ -6,6 +6,7 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.valid_descriptor import ValidDescriptor from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location_type import FileLocationType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.method import Method from onestop.schemas.geojsonSchemaClasses.line_string_type import LineStringType from onestop.schemas.geojsonSchemaClasses.multi_line_string_type import MultiLineStringType from onestop.schemas.geojsonSchemaClasses.multi_point_type import MultiPointType @@ -20,6 +21,7 @@ 'RecordType': RecordType, 'TimeRangeDescriptor': TimeRangeDescriptor, 'LineStringType': LineStringType, + 'Method': Method, 'MultiLineStringType': MultiLineStringType, 'MultiPointType': MultiPointType, 'MultiPolygonType': MultiPolygonType, diff --git a/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py index 5d9db01..78cd6ec 100644 --- a/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py +++ b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py @@ -8,6 +8,7 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.valid_descriptor import ValidDescriptor from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location_type import FileLocationType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.method import Method from onestop.schemas.geojsonSchemaClasses.line_string_type import LineStringType from onestop.schemas.geojsonSchemaClasses.multi_line_string_type import MultiLineStringType from onestop.schemas.geojsonSchemaClasses.multi_point_type import MultiPointType @@ -62,6 +63,15 @@ def test_linestring_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "LineStringType.%s"}'%type) + def test_method_enum_class_encodes(self): + type = Method.CONNECT + obj = Method(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_multilinestringtype_enum_class_encodes(self): type = MultiLineStringType.MultiLineString.value obj = MultiLineStringType(type) From c3a22c8de52cf0aeb622bb6d9cc2f41dedc9ec6f Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 12 Jul 2021 12:51:13 -0600 Subject: [PATCH 119/129] 1508-Changed test_jsonEncoder tests to not need manually typing the class name in the assertion. --- .../unit/schemas/util/test_jsonEncoder.py | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py index 78cd6ec..76ce9d0 100644 --- a/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py +++ b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py @@ -19,49 +19,49 @@ class jsonEncoderTest(unittest.TestCase): def test_checksumalgorithm_enum_class_encodes(self): - type = ChecksumAlgorithm.MD5.value + type = ChecksumAlgorithm.MD5 obj = ChecksumAlgorithm(type) jsonStr = json.dumps(obj, cls=EnumEncoder) - self.assertEqual(jsonStr, '{"__enum__": "ChecksumAlgorithm.%s"}'%type) + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_relationshiptype_enum_class_encodes(self): - type = RelationshipType.collection.value + type = RelationshipType.collection obj = RelationshipType(type) jsonStr = json.dumps(obj, cls=EnumEncoder) - self.assertEqual(jsonStr, '{"__enum__": "RelationshipType.%s"}'%type.lower()) + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_recordtype_enum_class_encodes(self): - type = RecordType.granule.value + type = RecordType.granule obj = RecordType(type) jsonStr = json.dumps(obj, cls=EnumEncoder) - self.assertEqual(jsonStr, '{"__enum__": "RecordType.%s"}'%type.lower()) + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_timerangedescriptor_enum_class_encodes(self): - type = TimeRangeDescriptor.AMBIGUOUS.value + type = TimeRangeDescriptor.AMBIGUOUS obj = TimeRangeDescriptor(type) jsonStr = json.dumps(obj, cls=EnumEncoder) - self.assertEqual(jsonStr, '{"__enum__": "TimeRangeDescriptor.%s"}'%type) + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_linestring_enum_class_encodes(self): - type = LineStringType.LineString.value + type = LineStringType.LineString obj = LineStringType(type) jsonStr = json.dumps(obj, cls=EnumEncoder) - self.assertEqual(jsonStr, '{"__enum__": "LineStringType.%s"}'%type) + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_method_enum_class_encodes(self): type = Method.CONNECT @@ -73,64 +73,64 @@ def test_method_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_multilinestringtype_enum_class_encodes(self): - type = MultiLineStringType.MultiLineString.value + type = MultiLineStringType.MultiLineString obj = MultiLineStringType(type) jsonStr = json.dumps(obj, cls=EnumEncoder) - self.assertEqual(jsonStr, '{"__enum__": "MultiLineStringType.%s"}'%type) + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_multipointtype_enum_class_encodes(self): - type = MultiPointType.MultiPoint.value + type = MultiPointType.MultiPoint obj = MultiPointType(type) jsonStr = json.dumps(obj, cls=EnumEncoder) - self.assertEqual(jsonStr, '{"__enum__": "MultiPointType.%s"}'%type) + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_multipolygontype_enum_class_encodes(self): - type = MultiPolygonType.MultiPolygon.value + type = MultiPolygonType.MultiPolygon obj = MultiPolygonType(type) jsonStr = json.dumps(obj, cls=EnumEncoder) - self.assertEqual(jsonStr, '{"__enum__": "MultiPolygonType.%s"}'%type) + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_pointtype_enum_class_encodes(self): - type = PointType.Point.value + type = PointType.Point obj = PointType(type) jsonStr = json.dumps(obj, cls=EnumEncoder) - self.assertEqual(jsonStr, '{"__enum__": "PointType.%s"}'%type) + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_polygontype_enum_class_encodes(self): - type = PolygonType.Polygon.value + type = PolygonType.Polygon obj = PolygonType(type) jsonStr = json.dumps(obj, cls=EnumEncoder) - self.assertEqual(jsonStr, '{"__enum__": "PolygonType.%s"}'%type) + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_filelocationtype_enum_class_encodes(self): - type = FileLocationType.INGEST.value + type = FileLocationType.INGEST obj = FileLocationType(type) jsonStr = json.dumps(obj, cls=EnumEncoder) - self.assertEqual(jsonStr, '{"__enum__": "FileLocationType.%s"}'%type) + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_validdescriptor_enum_class_encodes(self): - type = ValidDescriptor.INVALID.value + type = ValidDescriptor.INVALID obj = ValidDescriptor(type) jsonStr = json.dumps(obj, cls=EnumEncoder) - self.assertEqual(jsonStr, '{"__enum__": "ValidDescriptor.%s"}'%type) + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) From 3f57b3850c3824fb2468e1ab3280ca67953d522a Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 12 Jul 2021 12:58:01 -0600 Subject: [PATCH 120/129] 1508-Changed avro schema OperationType to Enum to match schema registry. Added tests. --- .../cedar/schemas/avro/psi/operation_type.py | 44 +++++++++---------- .../onestop/schemas/util/jsonEncoder.py | 2 + .../unit/schemas/util/test_jsonEncoder.py | 10 +++++ 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/operation_type.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/operation_type.py index 7ceee49..a83da87 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/operation_type.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/operation_type.py @@ -1,26 +1,24 @@ -from dataclasses import asdict, dataclass -from typing import Dict +from enum import Enum +from typing import ClassVar -from undictify import type_checked_constructor +class OperationType(Enum): + """ + The types of metadata relationships which can be represented in the PSI system + """ + NO_OP = "NO_OP" + ADD = "ADD" + REMOVE = "REMOVE" -@type_checked_constructor() -@dataclass -class OperationType: - def to_dict(self) -> Dict: - """ - Returns a dictionary version of this instance. - """ - return asdict(self) - - @classmethod - def from_dict( - cls, - the_dict: Dict - ) -> 'OperationType': - """ - Returns an instance of this class from a dictionary. - - :param the_dict: The dictionary from which to create an instance of this class. - """ - return cls(**the_dict) + #: The Avro Schema associated to this class + _schema: ClassVar[str] = """{ + "type": "enum", + "namespace": "org.cedar.schemas.avro.psi", + "name": "OperationType", + "doc": "The specific operation to execute, mainly for PATCH-method input messages. Use default of NO_OP for when the method is unambiguous on its own", + "symbols": [ + "NO_OP", + "ADD", + "REMOVE" + ] + }""" \ No newline at end of file diff --git a/onestop-python-client/onestop/schemas/util/jsonEncoder.py b/onestop-python-client/onestop/schemas/util/jsonEncoder.py index cf1041c..ba39e9e 100644 --- a/onestop-python-client/onestop/schemas/util/jsonEncoder.py +++ b/onestop-python-client/onestop/schemas/util/jsonEncoder.py @@ -4,6 +4,7 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.record_type import RecordType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.valid_descriptor import ValidDescriptor +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.operation_type import OperationType from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location_type import FileLocationType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.method import Method @@ -25,6 +26,7 @@ 'MultiLineStringType': MultiLineStringType, 'MultiPointType': MultiPointType, 'MultiPolygonType': MultiPolygonType, + 'OperationType': OperationType, 'PointType': PointType, 'PolygonType': PolygonType, 'FileLocationType': FileLocationType, diff --git a/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py index 76ce9d0..b4077fc 100644 --- a/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py +++ b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py @@ -6,6 +6,7 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.record_type import RecordType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.valid_descriptor import ValidDescriptor +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.operation_type import OperationType from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location_type import FileLocationType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.method import Method @@ -99,6 +100,15 @@ def test_multipolygontype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_operationtype_enum_class_encodes(self): + type = OperationType.ADD + obj = OperationType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_pointtype_enum_class_encodes(self): type = PointType.Point obj = PointType(type) From 36ae0b60bddde6c038dd3cd338fe269d6f5ee6ca Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 12 Jul 2021 14:12:46 -0600 Subject: [PATCH 121/129] 1508-Finished test_ParsedRecord for the avro object ParsedRecord. --- .../schemas/avro/psi/test_ParsedRecord.py | 62 ++++++++++++------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py index fb7f3c9..f116e59 100644 --- a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py +++ b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py @@ -13,6 +13,9 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.reference import Reference from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location_type import FileLocationType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.valid_descriptor import ValidDescriptor +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.analysis import Analysis +from onestop.schemas.psiSchemaClasses.identification_analysis import IdentificationAnalysis +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm from onestop.schemas.psiSchemaClasses.operation import Operation from onestop.schemas.psiSchemaClasses.data_format import DataFormat from onestop.schemas.psiSchemaClasses.platform import Platform @@ -269,15 +272,46 @@ class test_ParsedRecord(unittest.TestCase): 'temporalBounding': temporalBoundingAnalysis_dict, 'spatialBounding': spatialBounding_dict } + checksum_dict = { + 'algorithm': ChecksumAlgorithm.MD5, + 'value': 'value1' + } + fileInformation_dict = { + 'name': 'file name', + 'size': 1, + 'checksums': [checksum_dict], + 'format': 'format', + 'headers': 'header', + 'optionalAttributes': {'attr1': 'value1', 'attr2': 'value2'} + } + publishing_dict = { + 'isPrivate': True, + 'until': -1 + } + relationships_dict = { + 'type': RelationshipType.collection, + 'id': 'id1' + } + errorEvent_dict = { + 'title': 'title1', + 'detail': 'detail1', + 'status': 404, + 'code': 500, + 'source': 'source1' + } parsedRecord_dict = { 'type': RecordType.collection, 'discovery': discovery_dict, 'analysis': analysis_dict, + 'fileInformation': fileInformation_dict, 'fileLocations': { 's3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc': { **fileLocation_dict } - } + }, + 'publishing': publishing_dict, + 'relationships': [relationships_dict], + 'errors': [errorEvent_dict] } # Note: Didn't make unit tests for ENUMS since they don't execute any methods. @@ -402,30 +436,10 @@ def test_reference_all_vars_set(self): self.assertEqual(reference.links[0].linkDescription, self.reference_dict['links'][0]['linkDescription']) self.assertEqual(reference.links[0].linkFunction, self.reference_dict['links'][0]['linkFunction']) - @unittest.skip - def test_fileInformation_all_vars_set(self): - content_dict = { - "fileInformation":{ - "checksums":[ - { - "value":"4809084627a18d54db59659819f8a4b5d2c76367", - "algorithm":"SHA1" - } - ], - "headers":"NetCDF file reader", - "size":22876986, - "name":"OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc", - "format":"NetCDF", - "optionalAttributes":{ - } - } - } - - parsed_record = ParsedRecord.from_dict(content_dict) - - self.assertIsNotNone(parsed_record) + def test_analysis_all_vars_set(self): + analysis = Analysis(**self.analysis_dict) - fileInformation = FileI + self.assertEqual(analysis.identification, IdentificationAnalysis(**self.identificationAnalysis_dict)) def test_fileLocation_all_vars_set(self): fileLocations = FileLocation(**self.fileLocation_dict) From 79b8979b6a12d3bd0df0f1770c6df88d208f4f27 Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 12 Jul 2021 14:46:06 -0600 Subject: [PATCH 122/129] 1508-Changed scripts with callback methods to define their records parameter as a record. This is because our S3MessageAdapter.transform, SqsHandler methods, and SqsConsumer.receive_messages all eventually only operate on 1 record. --- scripts/launch_e2e.py | 11 +++++------ scripts/launch_pyconsumer.py | 9 ++++----- scripts/sqs-to-registry/s3_notification_handler.py | 11 +++++------ 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/scripts/launch_e2e.py b/scripts/launch_e2e.py index f43af3c..2af68c3 100644 --- a/scripts/launch_e2e.py +++ b/scripts/launch_e2e.py @@ -12,11 +12,11 @@ config_dict = {} -def handler(recs, log_level): +def handler(rec, log_level): ''' Processes metadata information from sqs message triggered by S3 event and uploads to registry through web publisher (https). Also uploads s3 object to glacier. - :param recs: dict + :param rec: dict sqs message triggered by s3 event :return: str @@ -27,12 +27,11 @@ def handler(recs, log_level): logger.info('In Handler') # If record exists try to get object-uuid retrieval - logger.debug('Records:%s'%recs) - if recs is None: - logger.info('No records retrieved, doing nothing.') + logger.debug('Record:%s'%rec) + if rec is None: + logger.info('No record retrieved, doing nothing.') return - rec = recs[0] logger.debug('Record: %s'%rec) bucket = rec['s3']['bucket']['name'] s3_key = rec['s3']['object']['key'] diff --git a/scripts/launch_pyconsumer.py b/scripts/launch_pyconsumer.py index 794ca99..e2b94f2 100644 --- a/scripts/launch_pyconsumer.py +++ b/scripts/launch_pyconsumer.py @@ -12,11 +12,11 @@ config_dict = {} -def handler(recs, log_level): +def handler(rec, log_level): ''' Processes metadata information from sqs message triggered by S3 event and uploads to registry through web publisher (https). Utilizes helm for credentials and conf. - :param recs: dict + :param rec: dict sqs message triggered by s3 event :return: str @@ -28,11 +28,10 @@ def handler(recs, log_level): # Now get boto client for object-uuid retrieval object_uuid = None - if recs is None: - logger.info('No records retrieved, doing nothing.') + if rec is None: + logger.info('No record retrieved, doing nothing.') return - rec = recs[0] bucket = rec['s3']['bucket']['name'] s3_key = rec['s3']['object']['key'] diff --git a/scripts/sqs-to-registry/s3_notification_handler.py b/scripts/sqs-to-registry/s3_notification_handler.py index e5e0490..23f4165 100644 --- a/scripts/sqs-to-registry/s3_notification_handler.py +++ b/scripts/sqs-to-registry/s3_notification_handler.py @@ -15,21 +15,20 @@ config_dict = {} -def handler(recs, log_level): +def handler(rec, log_level): logger = ClientLogger.get_logger('s3_notification_handler.handler', log_level, False) logger.info('In Handler') - if recs is None: - logger.info('No records retrieved, doing nothing.') + if rec is None: + logger.info('No record retrieved, doing nothing.') return - rec = recs[0] logger.info('Record:%s'%rec) if 'ObjectRemoved' in rec['eventName']: - delete_handler(recs) + delete_handler(rec) else: - upload_handler(recs) + upload_handler(rec) if __name__ == '__main__': # Example command: python3 archive_client_integration.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml From d8093d4b4768808a58da34765bd841cd983e18ed Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 12 Jul 2021 14:46:37 -0600 Subject: [PATCH 123/129] 1508-Updated launch_pyconsumer script to have a try/catch around get_uuid_metadata --- scripts/launch_pyconsumer.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/launch_pyconsumer.py b/scripts/launch_pyconsumer.py index e2b94f2..1ad7282 100644 --- a/scripts/launch_pyconsumer.py +++ b/scripts/launch_pyconsumer.py @@ -9,6 +9,7 @@ from onestop.WebPublisher import WebPublisher from onestop.util.ClientLogger import ClientLogger from onestop.schemas.util.jsonEncoder import EnumEncoder +from botocore.exceptions import ClientError config_dict = {} @@ -34,11 +35,15 @@ def handler(rec, log_level): bucket = rec['s3']['bucket']['name'] s3_key = rec['s3']['object']['key'] - + logger.debug('Rec: %s'%rec) # Fetch the object to get the uuid logger.info("Getting uuid") s3_resource = s3_utils.connect('resource', 's3', None) - object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) + try: + object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) + except ClientError as e: + logger.error(e) + return if object_uuid is not None: logger.info('Retrieved object-uuid: %s'% object_uuid) From 0f8b797b51c7e343fa70dd7805ca75ecba325836 Mon Sep 17 00:00:00 2001 From: Erin Date: Mon, 12 Jul 2021 15:38:57 -0600 Subject: [PATCH 124/129] 1508-Changed avro enum class vars to all uppercase as is typical of enum classes. --- .../geojsonSchemaClasses/line_string_type.py | 2 +- .../multi_line_string_type.py | 2 +- .../geojsonSchemaClasses/multi_point_type.py | 2 +- .../geojsonSchemaClasses/multi_polygon_type.py | 2 +- .../schemas/geojsonSchemaClasses/point_type.py | 2 +- .../geojsonSchemaClasses/polygon_type.py | 2 +- .../org/cedar/schemas/avro/psi/record_type.py | 4 ++-- .../schemas/avro/psi/relationship_type.py | 2 +- .../schemas/avro/psi/test_ParsedRecord.py | 18 +++++++++--------- .../test/unit/schemas/util/test_jsonEncoder.py | 16 ++++++++-------- 10 files changed, 26 insertions(+), 26 deletions(-) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/line_string_type.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/line_string_type.py index eba35f3..dcf0f49 100644 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/line_string_type.py +++ b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/line_string_type.py @@ -3,7 +3,7 @@ class LineStringType(Enum): - LineString = 'LineString' + LINESTRING = 'LineString' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_line_string_type.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_line_string_type.py index 7e16945..4f47369 100644 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_line_string_type.py +++ b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_line_string_type.py @@ -3,7 +3,7 @@ class MultiLineStringType(Enum): - MultiLineString = 'MultiLineString' + MULTILINESTRING = 'MultiLineString' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_point_type.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_point_type.py index a428813..dcd95a7 100644 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_point_type.py +++ b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_point_type.py @@ -3,7 +3,7 @@ class MultiPointType(Enum): - MultiPoint = 'MultiPoint' + MULTIPOINT = 'MultiPoint' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_polygon_type.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_polygon_type.py index 8c98577..6bb38a1 100644 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_polygon_type.py +++ b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_polygon_type.py @@ -3,7 +3,7 @@ class MultiPolygonType(Enum): - MultiPolygon = 'MultiPolygon' + MULTIPOLYGON = 'MultiPolygon' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/point_type.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/point_type.py index 386124e..08a2043 100644 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/point_type.py +++ b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/point_type.py @@ -3,7 +3,7 @@ class PointType(Enum): - Point = 'Point' + POINT = 'Point' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/polygon_type.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/polygon_type.py index 6e373a3..0c1986b 100644 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/polygon_type.py +++ b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/polygon_type.py @@ -3,7 +3,7 @@ class PolygonType(Enum): - Polygon = 'Polygon' + POLYGON = 'Polygon' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ "name": "PolygonType", diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/record_type.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/record_type.py index 3a3cf09..a74ab03 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/record_type.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/record_type.py @@ -6,8 +6,8 @@ class RecordType(Enum): """ The types of metadata records which can be represented in the PSI system """ - collection = 'collection' - granule = 'granule' + COLLECTION = 'collection' + GRANULE = 'granule' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py index ba96dee..3c9b354 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py @@ -6,7 +6,7 @@ class RelationshipType(Enum): """ The types of metadata relationships which can be represented in the PSI system """ - collection = 'COLLECTION' + COLLECTION = 'COLLECTION' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py index f116e59..162950c 100644 --- a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py +++ b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py @@ -52,7 +52,7 @@ class test_ParsedRecord(unittest.TestCase): relationship_dict = { "id": "5b58de08-afef-49fb-99a1-9c5d5c003bde", - "type": RelationshipType.collection + "type": RelationshipType.COLLECTION } relationships_dict = { "relationships":[ @@ -87,27 +87,27 @@ class test_ParsedRecord(unittest.TestCase): 'description': 'desc' } point_dict = { - 'type': PointType.Point, + 'type': PointType.POINT, 'coordinates': [0.0, 1.1, 2.2, 3.3] } multiPoint_dict = { - 'type': MultiPointType.MultiPoint, + 'type': MultiPointType.MULTIPOINT, 'coordinates': [[0.0, 1.0], [2.0, 1.0]] } lineString_dict = { - 'type': LineStringType.LineString, + 'type': LineStringType.LINESTRING, 'coordinates': [[0.0, 1.0], [2.0, 1.0]] } multiLineString_dict = { - 'type': MultiLineStringType.MultiLineString, + 'type': MultiLineStringType.MULTILINESTRING, 'coordinates': [[[0.0, 1.0], [2.0, 1.0]], [[0.0, 2.0], [2.0, 2.0]]] } polygon_dict = { - 'type': PolygonType.Polygon, + 'type': PolygonType.POLYGON, 'coordinates': [[[0.0, 1.0], [2.0, 1.0]], [[0.0, 2.0], [2.0, 2.0]]] } multiPolygon_dict = { - 'type': MultiPolygonType.MultiPolygon, + 'type': MultiPolygonType.MULTIPOLYGON, 'coordinates': [[[[0.0, 1.0], [2.0, 1.0]], [[0.0, 2.0], [2.0, 2.0]]], [[[1.0, 1.0], [2.0, 1.0]], [[0.0, 2.0], [2.0, 2.0]]]] } instruments_dict = { @@ -289,7 +289,7 @@ class test_ParsedRecord(unittest.TestCase): 'until': -1 } relationships_dict = { - 'type': RelationshipType.collection, + 'type': RelationshipType.COLLECTION, 'id': 'id1' } errorEvent_dict = { @@ -300,7 +300,7 @@ class test_ParsedRecord(unittest.TestCase): 'source': 'source1' } parsedRecord_dict = { - 'type': RecordType.collection, + 'type': RecordType.COLLECTION, 'discovery': discovery_dict, 'analysis': analysis_dict, 'fileInformation': fileInformation_dict, diff --git a/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py index b4077fc..f6dbcf3 100644 --- a/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py +++ b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py @@ -29,7 +29,7 @@ def test_checksumalgorithm_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_relationshiptype_enum_class_encodes(self): - type = RelationshipType.collection + type = RelationshipType.COLLECTION obj = RelationshipType(type) jsonStr = json.dumps(obj, @@ -38,7 +38,7 @@ def test_relationshiptype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_recordtype_enum_class_encodes(self): - type = RecordType.granule + type = RecordType.GRANULE obj = RecordType(type) jsonStr = json.dumps(obj, @@ -56,7 +56,7 @@ def test_timerangedescriptor_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_linestring_enum_class_encodes(self): - type = LineStringType.LineString + type = LineStringType.LINESTRING obj = LineStringType(type) jsonStr = json.dumps(obj, @@ -74,7 +74,7 @@ def test_method_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_multilinestringtype_enum_class_encodes(self): - type = MultiLineStringType.MultiLineString + type = MultiLineStringType.MULTILINESTRING obj = MultiLineStringType(type) jsonStr = json.dumps(obj, @@ -83,7 +83,7 @@ def test_multilinestringtype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_multipointtype_enum_class_encodes(self): - type = MultiPointType.MultiPoint + type = MultiPointType.MULTIPOINT obj = MultiPointType(type) jsonStr = json.dumps(obj, @@ -92,7 +92,7 @@ def test_multipointtype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_multipolygontype_enum_class_encodes(self): - type = MultiPolygonType.MultiPolygon + type = MultiPolygonType.MULTIPOLYGON obj = MultiPolygonType(type) jsonStr = json.dumps(obj, @@ -110,7 +110,7 @@ def test_operationtype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_pointtype_enum_class_encodes(self): - type = PointType.Point + type = PointType.POINT obj = PointType(type) jsonStr = json.dumps(obj, @@ -119,7 +119,7 @@ def test_pointtype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) def test_polygontype_enum_class_encodes(self): - type = PolygonType.Polygon + type = PolygonType.POLYGON obj = PolygonType(type) jsonStr = json.dumps(obj, From 331a0ebbcb4704b69ad915efcae6727ce4721f64 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 13 Jul 2021 15:48:51 -0600 Subject: [PATCH 125/129] 1508-Fixed typo in jsonEncoder ENUMS dict; RelationshipType was all uppercase. --- onestop-python-client/onestop/schemas/util/jsonEncoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onestop-python-client/onestop/schemas/util/jsonEncoder.py b/onestop-python-client/onestop/schemas/util/jsonEncoder.py index ba39e9e..e3f13e1 100644 --- a/onestop-python-client/onestop/schemas/util/jsonEncoder.py +++ b/onestop-python-client/onestop/schemas/util/jsonEncoder.py @@ -18,7 +18,7 @@ # Diction of all the Enum Classes ENUMS = { 'ChecksumAlgorithm': ChecksumAlgorithm, - 'RELATIONSHIPTYPE': RelationshipType, + 'RelationshipType': RelationshipType, 'RecordType': RecordType, 'TimeRangeDescriptor': TimeRangeDescriptor, 'LineStringType': LineStringType, From dcc4d523b2213ec8259e43344a22f798ced48090 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 13 Jul 2021 16:21:29 -0600 Subject: [PATCH 126/129] 1508-Added tests to hit json decoding of ENUM classes with json.loads(value['content'], object_hook=as_enum) --- .../unit/schemas/util/test_jsonEncoder.py | 62 ++++++++++++++++++- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py index f6dbcf3..0da8331 100644 --- a/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py +++ b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py @@ -1,7 +1,7 @@ import json import unittest -from onestop.schemas.util.jsonEncoder import EnumEncoder +from onestop.schemas.util.jsonEncoder import EnumEncoder, as_enum from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.record_type import RecordType @@ -28,15 +28,23 @@ def test_checksumalgorithm_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_checksumalgorithm_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"ChecksumAlgorithm.MD5\"}}" + json.loads(content, object_hook=as_enum) + def test_relationshiptype_enum_class_encodes(self): type = RelationshipType.COLLECTION obj = RelationshipType(type) jsonStr = json.dumps(obj, cls=EnumEncoder) - + print("jsonStr:%s"%jsonStr) self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_relationshiptype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"RelationshipType.COLLECTION\"}}" + json.loads(content, object_hook=as_enum) + def test_recordtype_enum_class_encodes(self): type = RecordType.GRANULE obj = RecordType(type) @@ -46,6 +54,10 @@ def test_recordtype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_recordtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"RecordType.COLLECTION\"}}" + json.loads(content, object_hook=as_enum) + def test_timerangedescriptor_enum_class_encodes(self): type = TimeRangeDescriptor.AMBIGUOUS obj = TimeRangeDescriptor(type) @@ -55,7 +67,11 @@ def test_timerangedescriptor_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) - def test_linestring_enum_class_encodes(self): + def test_timerangedescriptor_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"TimeRangeDescriptor.AMBIGUOUS\"}}" + json.loads(content, object_hook=as_enum) + + def test_linestringtype_enum_class_encodes(self): type = LineStringType.LINESTRING obj = LineStringType(type) @@ -64,6 +80,10 @@ def test_linestring_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_linestringtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"LineStringType.LINESTRING\"}}" + json.loads(content, object_hook=as_enum) + def test_method_enum_class_encodes(self): type = Method.CONNECT obj = Method(type) @@ -73,6 +93,10 @@ def test_method_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_method_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"Method.POST\"}}" + json.loads(content, object_hook=as_enum) + def test_multilinestringtype_enum_class_encodes(self): type = MultiLineStringType.MULTILINESTRING obj = MultiLineStringType(type) @@ -82,6 +106,10 @@ def test_multilinestringtype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_mutilinestringtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"MultiLineStringType.MULTILINESTRING\"}}" + json.loads(content, object_hook=as_enum) + def test_multipointtype_enum_class_encodes(self): type = MultiPointType.MULTIPOINT obj = MultiPointType(type) @@ -91,6 +119,10 @@ def test_multipointtype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_multipointtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"MultiPointType.MULTIPOINT\"}}" + json.loads(content, object_hook=as_enum) + def test_multipolygontype_enum_class_encodes(self): type = MultiPolygonType.MULTIPOLYGON obj = MultiPolygonType(type) @@ -100,6 +132,10 @@ def test_multipolygontype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_multipolygontype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"MultiPolygonType.MULTIPOLYGON\"}}" + json.loads(content, object_hook=as_enum) + def test_operationtype_enum_class_encodes(self): type = OperationType.ADD obj = OperationType(type) @@ -109,6 +145,10 @@ def test_operationtype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_operationtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"OperationType.ADD\"}}" + json.loads(content, object_hook=as_enum) + def test_pointtype_enum_class_encodes(self): type = PointType.POINT obj = PointType(type) @@ -118,6 +158,10 @@ def test_pointtype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_pointtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"PointType.POINT\"}}" + json.loads(content, object_hook=as_enum) + def test_polygontype_enum_class_encodes(self): type = PolygonType.POLYGON obj = PolygonType(type) @@ -127,6 +171,10 @@ def test_polygontype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_polygontype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"PolygonType.POLYGON\"}}" + json.loads(content, object_hook=as_enum) + def test_filelocationtype_enum_class_encodes(self): type = FileLocationType.INGEST obj = FileLocationType(type) @@ -136,6 +184,10 @@ def test_filelocationtype_enum_class_encodes(self): self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + def test_filelocationtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"FileLocationType.INGEST\"}}" + json.loads(content, object_hook=as_enum) + def test_validdescriptor_enum_class_encodes(self): type = ValidDescriptor.INVALID obj = ValidDescriptor(type) @@ -144,3 +196,7 @@ def test_validdescriptor_enum_class_encodes(self): cls=EnumEncoder) self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_validdescriptor_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"ValidDescriptor.VALID\"}}" + json.loads(content, object_hook=as_enum) From 16c3f7210b2b0b31bfb8de026f9bc82e0a718863 Mon Sep 17 00:00:00 2001 From: Erin Date: Tue, 13 Jul 2021 17:31:20 -0600 Subject: [PATCH 127/129] 1508-Updated sme script for values that are now an ENUM type. Also changed handler in script to work off the actual value passed in not just the test data in there. --- .../schemas/avro/psi/test_ParsedRecord.py | 62 ++++++++++++++++++- scripts/sme/sme.py | 51 +++++++++++---- 2 files changed, 101 insertions(+), 12 deletions(-) diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py index 162950c..0f338d4 100644 --- a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py +++ b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py @@ -33,7 +33,8 @@ from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.multi_line_string import MultiLineString from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.polygon import Polygon from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.multi_polygon import MultiPolygon - +from onestop.schemas.util.jsonEncoder import EnumEncoder, as_enum, EnumEncoderValue +import json class test_ParsedRecord(unittest.TestCase): @@ -314,6 +315,65 @@ class test_ParsedRecord(unittest.TestCase): 'errors': [errorEvent_dict] } + def test_parsed_record_corner_case(self): + value = { + "type": "granule", + "content": "{" + "\"discovery\": {\n " + "\"fileIdentifier\": \"92ade5dc-946d-11ea-abe4-0242ac120004\",\n " + "\"links\": [\n {\n " + "\"linkFunction\": \"download\",\n " + "\"linkName\": \"Amazon S3\",\n " + "\"linkProtocol\": \"HTTPS\",\n " + "\"linkUrl\": \"https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n " + "}\n ],\n " + "\"parentIdentifier\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n " + "\"spatialBounding\": null,\n " + "\"temporalBounding\": {\n " + "\"beginDate\": \"2020-05-12T16:20:15.158Z\", \n " + "\"endDate\": \"2020-05-12T16:21:51.494Z\"\n " + "},\n " + "\"title\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n " + "},\n " + "\"fileInformation\": {\n " + "\"checksums\": [{" + "\"algorithm\": \"MD5\"," + "\"value\": \"44d2452e8bc2c8013e9c673086fbab7a\"" + "}]\n, " + "\"optionalAttributes\":{}, " + "\"format\": \"HSD\",\n " + "\"name\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\",\n " + "\"size\": 208918\n " + "},\n " + "\"fileLocations\": {\n " + "\"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\": {\n" + "\"optionalAttributes\":{}, " + "\"uri\":\"//nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\", " + "\"asynchronous\": false,\n " + "\"deleted\": false,\n " + "\"lastModified\": 1589300890000,\n " + "\"locality\": \"us-east-1\",\n " + "\"restricted\": false,\n " + "\"serviceType\": \"Amazon:AWS:S3\",\n " + "\"type\": {\"__enum__\": \"FileLocationType.INGEST\"},\n " + "\"uri\": \"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n " + "}\n " + "},\n " + "\"relationships\": [\n {\n " + "\"id\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n " + "\"type\": {\"__enum__\": \"RelationshipType.COLLECTION\"} }\n ]\n " + "}", + "contentType": "application/json", + "method": "PUT", + "source": "unknown", + "operation": "ADD" + } + + content_dict = json.loads(value['content'], object_hook=as_enum) + + ParsedRecord(**content_dict) + ParsedRecord.from_dict(content_dict) + # Note: Didn't make unit tests for ENUMS since they don't execute any methods. def test_parsed_record_all_vars_set(self): parsedRecord = ParsedRecord(**self.parsedRecord_dict) diff --git a/scripts/sme/sme.py b/scripts/sme/sme.py index 25e68aa..9dd92d8 100644 --- a/scripts/sme/sme.py +++ b/scripts/sme/sme.py @@ -10,9 +10,8 @@ from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.point import Point from onestop.schemas.geojsonSchemaClasses.point_type import PointType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord -#from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.temporal_bounding import TemporalBounding -from onestop.schemas.util.jsonEncoder import EnumEncoder, as_enum, EnumEncoderValue +from onestop.schemas.util.jsonEncoder import as_enum, EnumEncoderValue from onestop.KafkaPublisher import KafkaPublisher #from spatial import script_generation, postgres_insert from onestop.util.ClientLogger import ClientLogger @@ -33,12 +32,39 @@ def handler(key, value, log_level = 'INFO'): ''' # Grabs the contents of the message and turns the dict string into a dictionary using json.loads logger = ClientLogger.get_logger('sme.handler', log_level, False) - logger.info('In Handler') + logger.info('In Handler: key=%s value=%s'%(key, value)) + # This is an example for testing purposes. - value = { + test_value = { "type": "granule", - "content": "{ \"discovery\": {\n \"fileIdentifier\": \"92ade5dc-946d-11ea-abe4-0242ac120004\",\n \"links\": [\n {\n \"linkFunction\": \"download\",\n \"linkName\": \"Amazon S3\",\n \"linkProtocol\": \"HTTPS\",\n \"linkUrl\": \"https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n }\n ],\n \"parentIdentifier\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"spatialBounding\": null,\n \"temporalBounding\": {\n \"beginDate\": \"2020-05-12T16:20:15.158Z\", \n \"endDate\": \"2020-05-12T16:21:51.494Z\"\n },\n \"title\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n },\n \"fileInformation\": {\n \"checksums\": [{\"algorithm\": \"MD5\",\"value\": \"44d2452e8bc2c8013e9c673086fbab7a\"}]\n, \"optionalAttributes\":{}, \"format\": \"HSD\",\n \"name\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\",\n \"size\": 208918\n },\n " - "\"fileLocations\": {\n " + "content": "{" + "\"discovery\": {\n " + "\"fileIdentifier\": \"92ade5dc-946d-11ea-abe4-0242ac120004\",\n " + "\"links\": [\n {\n " + "\"linkFunction\": \"download\",\n " + "\"linkName\": \"Amazon S3\",\n " + "\"linkProtocol\": \"HTTPS\",\n " + "\"linkUrl\": \"https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n " + "}\n ],\n " + "\"parentIdentifier\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n " + "\"spatialBounding\": null,\n " + "\"temporalBounding\": {\n " + "\"beginDate\": \"2020-05-12T16:20:15.158Z\", \n " + "\"endDate\": \"2020-05-12T16:21:51.494Z\"\n " + "},\n " + "\"title\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n " + "},\n " + "\"fileInformation\": {\n " + "\"checksums\": [{" + "\"algorithm\": \"MD5\"," + "\"value\": \"44d2452e8bc2c8013e9c673086fbab7a\"" + "}]\n, " + "\"optionalAttributes\":{}, " + "\"format\": \"HSD\",\n " + "\"name\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\",\n " + "\"size\": 208918\n " + "},\n " + "\"fileLocations\": {\n " "\"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\": {\n" "\"optionalAttributes\":{}, " "\"uri\":\"//nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\", " @@ -48,21 +74,23 @@ def handler(key, value, log_level = 'INFO'): "\"locality\": \"us-east-1\",\n " "\"restricted\": false,\n " "\"serviceType\": \"Amazon:AWS:S3\",\n " - "\"type\": {\"type\":\"ACCESS\"},\n " + "\"type\": {\"__enum__\": \"FileLocationType.INGEST\"},\n " "\"uri\": \"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n " "}\n " "},\n " - "\"relationships\": [\n {\n \"id\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n \"type\": \"COLLECTION\"\n }\n ]\n }", + "\"relationships\": [\n {\n " + "\"id\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n " + "\"type\": {\"__enum__\": \"RelationshipType.COLLECTION\"} }\n ]\n " + "}", "contentType": "application/json", "method": "PUT", "source": "unknown", "operation": "ADD" } logger.debug('content: %s'%value['content']) - - content_dict = json.loads(value['content'], object_hook=as_enum) + content_dict = json.loads(value['content'], object_hook=as_enum) # this can fail if input values fail to map to avro ENUM values. logger.debug('content_dict: %s'%content_dict) - parsed_record = ParsedRecord().from_dict(content_dict) + parsed_record = ParsedRecord.from_dict(content_dict) # or ParsedRecord(**content_dict) # this can fail if input values fail to map to avro class values. # Geospatial Extraction bucket_key = content_dict['discovery']['links'][0]['linkUrl'].split('.com/')[1] @@ -82,6 +110,7 @@ def handler(key, value, log_level = 'INFO'): content_dict['discovery']['spatialBounding']['type'] = pointType.value # Create temp bounding obj + logger.debug('beginDate=%s endDate=%s'%(begin_date, end_date)) tempBounding = TemporalBounding(beginDate=begin_date, endDate=end_date) # Update parsed record object with geospatial data From cd99593d3cd2002e4350fd2a87e665e38ba276b9 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 22 Jul 2021 13:28:27 -0600 Subject: [PATCH 128/129] 1509-added discovery spatial bounding test for variations of spatial types. Removed some duplicated tests (ones covered by other tests). --- .../schemas/avro/psi/test_ParsedRecord.py | 124 +++++++++--------- 1 file changed, 63 insertions(+), 61 deletions(-) diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py index 0f338d4..2a7008e 100644 --- a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py +++ b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py @@ -33,8 +33,6 @@ from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.multi_line_string import MultiLineString from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.polygon import Polygon from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.multi_polygon import MultiPolygon -from onestop.schemas.util.jsonEncoder import EnumEncoder, as_enum, EnumEncoderValue -import json class test_ParsedRecord(unittest.TestCase): @@ -315,65 +313,6 @@ class test_ParsedRecord(unittest.TestCase): 'errors': [errorEvent_dict] } - def test_parsed_record_corner_case(self): - value = { - "type": "granule", - "content": "{" - "\"discovery\": {\n " - "\"fileIdentifier\": \"92ade5dc-946d-11ea-abe4-0242ac120004\",\n " - "\"links\": [\n {\n " - "\"linkFunction\": \"download\",\n " - "\"linkName\": \"Amazon S3\",\n " - "\"linkProtocol\": \"HTTPS\",\n " - "\"linkUrl\": \"https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n " - "}\n ],\n " - "\"parentIdentifier\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n " - "\"spatialBounding\": null,\n " - "\"temporalBounding\": {\n " - "\"beginDate\": \"2020-05-12T16:20:15.158Z\", \n " - "\"endDate\": \"2020-05-12T16:21:51.494Z\"\n " - "},\n " - "\"title\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n " - "},\n " - "\"fileInformation\": {\n " - "\"checksums\": [{" - "\"algorithm\": \"MD5\"," - "\"value\": \"44d2452e8bc2c8013e9c673086fbab7a\"" - "}]\n, " - "\"optionalAttributes\":{}, " - "\"format\": \"HSD\",\n " - "\"name\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\",\n " - "\"size\": 208918\n " - "},\n " - "\"fileLocations\": {\n " - "\"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\": {\n" - "\"optionalAttributes\":{}, " - "\"uri\":\"//nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\", " - "\"asynchronous\": false,\n " - "\"deleted\": false,\n " - "\"lastModified\": 1589300890000,\n " - "\"locality\": \"us-east-1\",\n " - "\"restricted\": false,\n " - "\"serviceType\": \"Amazon:AWS:S3\",\n " - "\"type\": {\"__enum__\": \"FileLocationType.INGEST\"},\n " - "\"uri\": \"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n " - "}\n " - "},\n " - "\"relationships\": [\n {\n " - "\"id\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n " - "\"type\": {\"__enum__\": \"RelationshipType.COLLECTION\"} }\n ]\n " - "}", - "contentType": "application/json", - "method": "PUT", - "source": "unknown", - "operation": "ADD" - } - - content_dict = json.loads(value['content'], object_hook=as_enum) - - ParsedRecord(**content_dict) - ParsedRecord.from_dict(content_dict) - # Note: Didn't make unit tests for ENUMS since they don't execute any methods. def test_parsed_record_all_vars_set(self): parsedRecord = ParsedRecord(**self.parsedRecord_dict) @@ -385,6 +324,69 @@ def test_discovery_all_vars_set(self): self.assertIsNotNone(discovery) + def test_discovery_spatial_bounding_supports_none_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': None + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + + def test_discovery_spatial_bounding_supports_point_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': Point(**self.point_dict) + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + + def test_discovery_spatial_bounding_supports_multipoint_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': MultiPoint(**self.multiPoint_dict) + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + + def test_discovery_spatial_bounding_supports_linestring_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': LineString(**self.lineString_dict) + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + + def test_discovery_spatial_bounding_supports_multilinestring_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': MultiLineString(**self.multiLineString_dict) + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + + def test_discovery_spatial_bounding_supports_polygon_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': Polygon(**self.polygon_dict) + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + + def test_discovery_spatial_bounding_supports_multipolygon_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': MultiPolygon(**self.multiPolygon_dict) + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + def test_keywords_all_vars_set(self): keywords = KeywordsElement(**self.keywordsElement_dict) From 4e356193e9c09a106edb29f8344c44ff9850a4d5 Mon Sep 17 00:00:00 2001 From: Erin Date: Thu, 19 Aug 2021 13:02:56 -0600 Subject: [PATCH 129/129] 1508-Added negative tests for fields that took an Enum type. --- .../schemas/avro/psi/test_ParsedRecord.py | 76 ++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py index 2a7008e..ffa1ecd 100644 --- a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py +++ b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py @@ -16,6 +16,8 @@ from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.analysis import Analysis from onestop.schemas.psiSchemaClasses.identification_analysis import IdentificationAnalysis from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum import Checksum +from onestop.schemas.psiSchemaClasses.temporal_bounding_analysis import TemporalBoundingAnalysis from onestop.schemas.psiSchemaClasses.operation import Operation from onestop.schemas.psiSchemaClasses.data_format import DataFormat from onestop.schemas.psiSchemaClasses.platform import Platform @@ -313,7 +315,6 @@ class test_ParsedRecord(unittest.TestCase): 'errors': [errorEvent_dict] } - # Note: Didn't make unit tests for ENUMS since they don't execute any methods. def test_parsed_record_all_vars_set(self): parsedRecord = ParsedRecord(**self.parsedRecord_dict) @@ -527,3 +528,76 @@ def test_relationships_optionals(self): relationship = Relationship(id=id, type=None) self.assertEqual(relationship.id, id) + + # Negative Tests + def test_lineString_type_fails_bad_type(self): + local_dict = dict(self.lineString_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, LineString, **local_dict) + + def test_multiLineString_type_fails_bad_type(self): + local_dict = dict(self.multiLineString_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, MultiLineString, **local_dict) + + def test_multiPoint_type_fails_bad_type(self): + local_dict = dict(self.multiPoint_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, MultiPoint, **local_dict) + + def test_multiPolygon_type_fails_bad_type(self): + local_dict = dict(self.multiPolygon_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, MultiPolygon, **local_dict) + + def test_point_type_fails_bad_type(self): + local_dict = dict(self.point_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, Point, **local_dict) + + def test_polygon_type_fails_bad_type(self): + local_dict = dict(self.polygon_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, Polygon, **local_dict) + + def test_temporalBoundingAnalysis_rangeDescriptor_fails_bad_type(self): + local_dict = dict(self.temporalBoundingAnalysis_dict) + local_dict['rangeDescriptor'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, TemporalBoundingAnalysis, **local_dict) + + def test_checksum_algorithm_fails_bad_type(self): + local_dict = dict(self.checksum_dict) + local_dict['algorithm'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, Checksum, **local_dict) + + def test_fileLocation_type_fails_bad_type(self): + local_dict = dict(self.fileLocation_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, FileLocation, **local_dict) + + def test_parsedRecord_type_fails_bad_type(self): + local_dict = dict(self.parsedRecord_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, ParsedRecord, **local_dict) + + def test_relationship_type_fails_bad_type(self): + local_dict = dict(self.relationship_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, Relationship, **local_dict) + + def test_temporalBoundingAnalysis_validDescriptor_fails_bad_type(self): + local_dict = dict(self.temporalBoundingAnalysis_dict) + local_dict['endDescriptor'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, TemporalBoundingAnalysis, **local_dict)