diff --git a/.circleci/config.yml b/.circleci/config.yml index 99f7692..c8ea89b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -55,7 +55,6 @@ jobs: # - slack/status: # fail_only: false -# cli build cli-build: executor: docker/docker steps: @@ -75,30 +74,55 @@ jobs: - slack/status: fail_only: false -# clients build - client-build: + onestop-s3-handler-build: executor: docker/docker steps: - setup_remote_docker - checkout - docker/check - docker/build: - path: onestop-python-client + path: scripts/sqs-to-registry + image: cedardevs/onestop-s3-handler + tag: latest + - docker/push: + image: cedardevs/onestop-s3-handler + tag: latest + - slack/status: + fail_only: false + + onestop-sme-build: + executor: docker/docker + steps: + - setup_remote_docker + - checkout + - docker/check + - docker/build: + path: scripts/sme/ + image: cedardevs/onestop-sme + tag: latest + - docker/push: + image: cedardevs/onestop-sme + tag: latest + - slack/status: + fail_only: false + + onestop-python-client-build: + executor: docker/docker + steps: + - setup_remote_docker + - checkout + - docker/check + - docker/build: + path: ./ image: cedardevs/onestop-python-client - tag: ${CIRCLE_BRANCH}-SNAPSHOT - - run: - name: "What branch am I on now?" - command: echo $CIRCLE_BRANCH -#no need to push this image yet + tag: latest - docker/push: image: cedardevs/onestop-python-client - tag: ${CIRCLE_BRANCH}-SNAPSHOT + tag: latest - slack/status: fail_only: false - # Base test configuration for Go library tests Each distinct version should - # inherit this base, and override (at least) the container image used. - python-client-test: &python-client-test + onestop-python-client-test: &python-client-test executor: python/default steps: &steps - checkout @@ -107,10 +131,16 @@ jobs: app-dir: ./onestop-python-client pkg-manager: pip - run: - name: "Run util tests" + name: "Run unit tests" command: > cd onestop-python-client/; - python -m unittest tests/util/*.py + python -m unittest discover -s test/unit +# This is commented out only because the OneStop we have running on cedardevs doesn't have its registry exposed. You can only reach it via sshing to another machine. +# - run: +# name: "Run integration tests" +# command: > +# cd onestop-python-client/; +# python -m unittest discover -s test/integration orbs: slack: circleci/slack@3.4.2 @@ -121,9 +151,15 @@ version: 2.1 workflows: main: jobs: -# - "latest" # - cli-test # - cli-build -# - client-build - - python-client-test - + - onestop-python-client-test + - onestop-python-client-build: + requires: + - onestop-python-client-test + - onestop-sme-build: + requires: + - onestop-python-client-build + - onestop-s3-handler-build: + requires: + - onestop-python-client-build diff --git a/Dockerfile b/Dockerfile index e5ec186..a906511 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,15 @@ FROM python:3.8 + COPY ./onestop-python-client /onestop-python-client COPY ./scripts /scripts + RUN apt-get update RUN pip install --upgrade pip -RUN pip install ./onestop-python-client RUN pip install -r ./onestop-python-client/requirements.txt +# Needed for scripts - do here since directory out of scope when in scripts/* dockerfiles. +# Unsure if possible this isn't latest build, like doing pip install before this is built. +RUN pip install ./onestop-python-client + #Base image stays up for dev access CMD tail -f /dev/null diff --git a/helm/onestop-sqs-consumer/values.yaml b/helm/onestop-sqs-consumer/values.yaml index 20557a0..f5a24fb 100644 --- a/helm/onestop-sqs-consumer/values.yaml +++ b/helm/onestop-sqs-consumer/values.yaml @@ -7,7 +7,7 @@ replicaCount: 1 image: repository: cedardevs/onestop-sme tag: latest - pullPolicy: IfNotPresent + pullPolicy: Always imagePullSecrets: [] nameOverride: "" @@ -57,35 +57,51 @@ config: |- log_level: INFO # AWS config values - sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs - sqs_max_polls: 100 + sqs_name: cloud-archive-client-sqs s3_region: us-east-2 s3_bucket: archive-testing-demo + sqs_max_polls: 100 #AWS config values for 2nd vault in different region vault_name: archive-vault-new s3_region2: us-east-2 - s3_bucket2: noaa-nccf-dev-archive + s3_bucket2: archive-testing-testing-test #CSB stream config format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER - type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com - file_identifier_prefix: "gov.noaa.ncei.csb:" + file_id_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE - metadata_type: granule - registry_base_url: http://onestop-registry:80 - onestop_base_url: http://onestop-search:8080 + kafka_consumer_metadata_type: GRANULE + kafka_publisher_metadata_type: GRANULE + s3_message_adapter_metadata_type: COLLECTION - security: - enabled: True + registry_base_url: http://os-registry:80 + onestop_base_url: http://os-search:8080 prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' NESDIS/H8: '0fad03df-0805-434a-86a6-7dc42d68480e' NESDIS/GOES: '11111111-1111-1111-1111-111111111111' - NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6' \ No newline at end of file + NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6' + + # Kafka config values + brokers: cp-cp-kafka:9092 + schema_registry: http://cp-cp-schema-registry:8081 + collection_topic_publish: psi-granules-by-collection + granule_topic_publish: psi-granule-parsed + collection_topic_consume: psi-collection-input-unknown + granule_topic_consume: psi-granule-input-unknown + group_id: sme-test + auto_offset_reset: earliest + security: + # True/False + enabled: False + # If security is enabled then need these: + caLoc: /etc/pki/tls/cert.pem + keyLoc: /etc/pki/tls/private/kafka-user.key + certLoc: /etc/pki/tls/certs/kafka-user.crt \ No newline at end of file diff --git a/helm/sme-chart/values.yaml b/helm/sme-chart/values.yaml index 924f62f..6016adc 100644 --- a/helm/sme-chart/values.yaml +++ b/helm/sme-chart/values.yaml @@ -1,7 +1,7 @@ image: repository: cedardevs/onestop-e2e-demo tag: latest - pullPolicy: IfNotPresent + pullPolicy: Always secret: registry_username: @@ -14,29 +14,52 @@ config: |- log_level: INFO # AWS config values - sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs - sqs_max_polls: 100 + sqs_name: cloud-archive-client-sqs s3_region: us-east-2 s3_bucket: archive-testing-demo + sqs_max_polls: 100 #AWS config values for 2nd vault in different region vault_name: archive-vault-new s3_region2: us-east-2 - s3_bucket2: noaa-nccf-dev-archive + s3_bucket2: archive-testing-testing-test #CSB stream config format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER - type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com - file_identifier_prefix: "gov.noaa.ncei.csb:" + file_id_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE - metadata_type: granule - registry_base_url: http://onestop-registry:80 - onestop_base_url: http://onestop-search:8080 + kafka_consumer_metadata_type: GRANULE + kafka_producer_metadata_type: GRANULE + web_publisher_metadata_type: GRANULE + s3_message_adapter_metadata_type: COLLECTION + + registry_base_url: http://os-registry:80 + onestop_base_url: http://os-search:8080 + + prefixMap: + NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' + NESDIS/H8: '0fad03df-0805-434a-86a6-7dc42d68480e' + NESDIS/GOES: '11111111-1111-1111-1111-111111111111' + NESDIS/SAB: '98e03b47-069a-4f2c-8071-649e8c4254d6' + # Kafka config values + brokers: cp-cp-kafka:9092 + schema_registry: http://cp-cp-schema-registry:8081 + collection_topic_publish: psi-granules-by-collection + granule_topic_publish: psi-granule-parsed + collection_topic_consume: psi-collection-input-unknown + granule_topic_consume: psi-granule-input-unknown + group_id: sme-test + auto_offset_reset: earliest security: - enabled: True \ No newline at end of file + # True/False + enabled: False + # If security is enabled then need these: + caLoc: /etc/pki/tls/cert.pem + keyLoc: /etc/pki/tls/private/kafka-user.key + certLoc: /etc/pki/tls/certs/kafka-user.crt \ No newline at end of file diff --git a/kubernetes/pyconsumer-pod.yaml b/kubernetes/pyconsumer-pod.yaml index fed2258..e6ac5c5 100644 --- a/kubernetes/pyconsumer-pod.yaml +++ b/kubernetes/pyconsumer-pod.yaml @@ -70,19 +70,21 @@ data: csb: format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER - type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 - psi_registry_url: https://cedardevs.org/ + registry_base_url: https://cedardevs.org/ access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_identifier_prefix: "gov.noaa.ncei.csb:" # Web Publisher web: - # COLLECTION or GRANULE - metadata_type: granule registry_base_url: https://cedardevs.org/onestop/registry-api onestop_base_url: https://cedardevs.org/onestop/search-api security: - enabled: True \ No newline at end of file + enabled: True + + # COLLECTION or GRANULE + kafka_consumer_metadata_type: GRANULE + kafka_publisher_metadata_type: GRANULE + s3_message_adapter_metadata_type: COLLECTION \ No newline at end of file diff --git a/onestop-python-client/config/aws-util-config-dev.yml b/onestop-python-client/config/aws-util-config-dev.yml index ee1ad95..2fdb5c1 100644 --- a/onestop-python-client/config/aws-util-config-dev.yml +++ b/onestop-python-client/config/aws-util-config-dev.yml @@ -1,11 +1,12 @@ # Example config values for osim client -log_level: INFO # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs +sqs_name: 'foobar' sqs_max_polls: 2 s3_region: "us-east-2" s3_bucket: archive-testing-demo +s3_key: 'ABI-L1b-RadF/2019/298/15/OR_ABI-L1b-RadF-M6C15_G16_s20192981500369_e20192981510082_c20192981510166.nc' #AWS config values for 2nd vault in different region vault_name: archive-vault-new diff --git a/onestop-python-client/config/credentials-template.yml b/onestop-python-client/config/credentials-template.yml index 006e175..f94c70b 100644 --- a/onestop-python-client/config/credentials-template.yml +++ b/onestop-python-client/config/credentials-template.yml @@ -9,5 +9,4 @@ registry: username: rw_user password: rw_user_pwd - - +log_level: INFO \ No newline at end of file diff --git a/onestop-python-client/config/csb-data-stream-config-template.yml b/onestop-python-client/config/csb-data-stream-config-template.yml index 887c9be..07ab823 100644 --- a/onestop-python-client/config/csb-data-stream-config-template.yml +++ b/onestop-python-client/config/csb-data-stream-config-template.yml @@ -1,7 +1,10 @@ -log_level: INFO +# COLLECTION or GRANULE +kafka_consumer_metadata_type: COLLECTION +kafka_publisher_metadata_type: COLLECTION +s3_message_adapter_metadata_type: COLLECTION + format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER -type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 #registry_base_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com registry_base_url: http://localhost/onestop/api/registry @@ -9,7 +12,7 @@ registry_base_url: http://localhost/onestop/api/registry onestop_base_url: http://localhost/onestop/api/search/search access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com -file_identifier_prefix: "gov.noaa.ncei.csb:" +file_id_prefix: "gov.noaa.ncei.csb:" prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' diff --git a/onestop-python-client/onestop/KafkaConsumer.py b/onestop-python-client/onestop/KafkaConsumer.py index e45d6cc..c064dd9 100644 --- a/onestop-python-client/onestop/KafkaConsumer.py +++ b/onestop-python-client/onestop/KafkaConsumer.py @@ -1,11 +1,9 @@ -import logging -import yaml - from confluent_kafka.schema_registry import SchemaRegistryClient from confluent_kafka.error import KafkaError from confluent_kafka import DeserializingConsumer from confluent_kafka.schema_registry.avro import AvroDeserializer from confluent_kafka.serialization import StringDeserializer +from onestop.util.ClientLogger import ClientLogger class KafkaConsumer: """ @@ -13,124 +11,114 @@ class KafkaConsumer: Attributes ---------- - conf: yaml file - kafka-publisher-config-dev.yml - logger: Logger object - utilizes python logger library and creates logging for our specific needs - logger.info: Logger object - logging statement that occurs when the class is instantiated - metadata_type: str - type of metadata (COLLECTION or GRANULE) - brokers: str - brokers (kubernetes service) - group_id: str - Client group id string. All clients sharing the same group.id belong to the same group - auto_offset_reset: str - Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) - schema_registry: str - schema registry (kubernetes service) - security: boolean - defines if security is in place - collection_topic: str - collection topic you want to consume - granule_topic: str - granule topic you want to consume + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + group_id: str + Client group id string. All clients sharing the same group.id belong to the same group + auto_offset_reset: str + Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) + schema_registry: str + schema registry (kubernetes service) + security_enabled: boolean + Whether to use security for the kafka schema registry client. + security_caLoc: str + Kafka schema registry certification authority (CA) file location. + security_keyLoc: str + Kafka schema registry client's private key file location. + security_certLoc: str + Kafka schema registry client's public key file location. + collection_topic_consume: str + collection topic you want to consume + granule_topic_consume: str + granule topic you want to consume + logger: Logger object + utilizes python logger library and creates logging for our specific needs Methods ------- - get_logger(log_name, create_file) - creates logger file - - register_client() - registers to schema registry client based on configs + register_client() + registers to schema registry client based on configs - create_consumer(registry_client) - subscribes to topic defined in configs and creates a consumer to deserialize messages from topic + connect() + utilizes register_client() and create_consumer(registry_client) to connect to schema registry and allow for consumption of topics - connect() - utilizes register_client() and create_consumer(registry_client) to connect to schema registry and allow for consumption of topics + create_consumer(registry_client) + subscribes to topic defined in configs and creates a consumer to deserialize messages from topic - consume(metadata_consumer, handler) - asynchronously polls for messages in the connected topic, results vary depending on the handler function that is passed into it + consume(metadata_consumer, handler) + asynchronously polls for messages in the connected topic, results vary depending on the handler function that is passed into it """ - conf = None - - def __init__(self, conf_loc): - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = self.get_logger(self.__class__.__name__, False) - self.logger.info("Initializing " + self.__class__.__name__) - self.metadata_type = self.conf['metadata_type'] - self.brokers = self.conf['brokers'] - self.group_id = self.conf['group_id'] - self.auto_offset_reset = self.conf['auto_offset_reset'] - self.schema_registry = self.conf['schema_registry'] - self.security = self.conf['security']['enabled'] - - self.collection_topic = self.conf['collection_topic_consume'] - self.granule_topic = self.conf['granule_topic_consume'] - - if self.metadata_type not in ['COLLECTION', 'GRANULE']: - raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - def get_logger(self, log_name, create_file): + def __init__(self, kafka_consumer_metadata_type, brokers, group_id, auto_offset_reset, schema_registry, security, collection_topic_consume, granule_topic_consume, log_level = 'INFO', **wildargs): """ - Utilizes python logger library and creates logging - - :param log_name: str - name of log to be created - :param create_file: boolean - defines whether of not you want a logger file to be created - - :return: Logger object + Attributes + ---------- + kafka_consumer_metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + group_id: str + Client group id string. All clients sharing the same group.id belong to the same group + auto_offset_reset: str + Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) + schema_registry: str + schema registry (kubernetes service) URL + security: dict + enabled boolean: Whether to use security for kafka schema registry client. + caLoc str: Kafka schema registry certification authority (CA) file location. + keyLoc str: Kafka schema registry client's private key file location. + certLoc str: Kafka schema registry client's public key file location. + + collection_topic_consume: str + collection topic you want to consume + granule_topic_consume: str + granule topic you want to consume + log_level: str + What log level to use for this class """ - # create logger - log = logging.getLogger() + self.metadata_type = kafka_consumer_metadata_type.upper() + self.brokers = brokers + self.group_id = group_id + self.auto_offset_reset = auto_offset_reset + self.schema_registry = schema_registry + self.security_enabled = security['enabled'] - # create formatter and add it to the handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + if self.security_enabled: + self.security_caLoc = security['caLoc'] + self.security_keyLoc = security['keyLoc'] + self.security_certLoc = security['certLoc'] - if self.conf['log_level'] == "DEBUG": - log.setLevel(level=logging.DEBUG) - else: - if self.conf['log_level'] == "INFO": - log.setLevel(level=logging.INFO) - else: - log.setLevel(level=logging.ERROR) + self.collection_topic = collection_topic_consume + self.granule_topic = granule_topic_consume - fh = None - if create_file: - # create file handler for logger. - fh = logging.FileHandler(log_name) - fh.setFormatter(formatter) - - # create console handler for logger. - ch = logging.StreamHandler() - ch.setFormatter(formatter) + if self.metadata_type not in ['COLLECTION', 'GRANULE']: + raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type)) - # add handlers to logger. - if create_file: - log.addHandler(fh) + self.log_level = log_level + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) + self.logger.info("Initializing " + self.__class__.__name__) - log.addHandler(ch) - return log + if wildargs: + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) def register_client(self): """ Registers to schema registry client based on configs :return: SchemaRegistryClient (confluent kafka library) - """ - reg_conf = {'url': self.schema_registry} + """ + conf = {'url': self.schema_registry} - if self.security: - reg_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - reg_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - reg_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + if self.security_enabled: + conf['ssl.ca.location'] = self.security_caLoc + conf['ssl.key.location'] = self.security_keyLoc + conf['ssl.certificate.location'] = self.security_certLoc - registry_client = SchemaRegistryClient(reg_conf) + self.logger.info("Creating SchemaRegistryClient with configuration:"+str(conf)) + registry_client = SchemaRegistryClient(conf) return registry_client def connect(self): @@ -152,33 +140,38 @@ def create_consumer(self, registry_client): :return: DeserializingConsumer object """ - metadata_schema = None topic = None if self.metadata_type == "COLLECTION": - metadata_schema = registry_client.get_latest_version(self.collection_topic + '-value').schema.schema_str topic = self.collection_topic if self.metadata_type == "GRANULE": - metadata_schema = registry_client.get_latest_version(self.granule_topic + '-value').schema.schema_str topic = self.granule_topic - metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) + self.logger.debug("topic: "+str(topic)) + + # This topic naming scheme is how OneStop creates the topics. + latest_schema = registry_client.get_latest_version(topic + '-value') - consumer_conf = {'bootstrap.servers': self.brokers} + metadata_schema = latest_schema.schema.schema_str + self.logger.debug("metadata_schema: "+metadata_schema) - if self.security: - consumer_conf['security.protocol'] = 'SSL' - consumer_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - consumer_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - consumer_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + metadata_deserializer = AvroDeserializer(schema_str=metadata_schema, schema_registry_client=registry_client) + conf = { + 'bootstrap.servers': self.brokers, + 'key.deserializer': StringDeserializer('utf-8'), + 'value.deserializer': metadata_deserializer, + 'group.id': self.group_id, + 'auto.offset.reset': self.auto_offset_reset + } - meta_consumer_conf = consumer_conf - meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8') - meta_consumer_conf['value.deserializer'] = metadata_deserializer - meta_consumer_conf['group.id'] = self.group_id - meta_consumer_conf['auto.offset.reset'] = self.auto_offset_reset + if self.security_enabled: + conf['security.protocol'] = 'SSL' + conf['ssl.ca.location'] = self.security_caLoc + conf['ssl.key.location'] = self.security_keyLoc + conf['ssl.certificate.location'] = self.security_certLoc - metadata_consumer = DeserializingConsumer(meta_consumer_conf) + self.logger.debug("Deserializing conf: "+str(conf)) + metadata_consumer = DeserializingConsumer(conf) metadata_consumer.subscribe([topic]) return metadata_consumer @@ -195,22 +188,19 @@ def consume(self, metadata_consumer, handler): """ self.logger.info('Consuming from topic') while True: - try: - msg = metadata_consumer.poll(10) + msg = metadata_consumer.poll(10) + self.logger.debug("Message received: "+str(msg)) - if msg is None: - print('No Messages') - continue + if msg is None: + self.logger.info('No Messages') + continue - key = msg.key() - value = msg.value() + key = msg.key() + value = msg.value() + self.logger.debug('Message key=%s'%key) + self.logger.debug('Message value=%s'%value) + handler(key, value, self.log_level) - except KafkaError: - raise - try: - handler(key, value) - except Exception as e: - self.logger.error("Message handler failed: {}".format(e)) - break + self.logger.debug("Closing metadata_consumer") metadata_consumer.close() diff --git a/onestop-python-client/onestop/KafkaPublisher.py b/onestop-python-client/onestop/KafkaPublisher.py index d357de8..9206fe0 100644 --- a/onestop-python-client/onestop/KafkaPublisher.py +++ b/onestop-python-client/onestop/KafkaPublisher.py @@ -1,13 +1,11 @@ -import logging -from uuid import UUID import json -import yaml +from uuid import UUID from confluent_kafka.schema_registry import SchemaRegistryClient from confluent_kafka.error import KafkaError from confluent_kafka import SerializingProducer from confluent_kafka.schema_registry.avro import AvroSerializer - +from onestop.util.ClientLogger import ClientLogger class KafkaPublisher: """ @@ -15,114 +13,98 @@ class KafkaPublisher: Attributes ---------- - conf: yaml file - config/kafka-publisher-config-dev.yml - logger: Logger object - utilizes python logger library and creates logging for our specific needs - logger.info: Logger object - logging statement that occurs when the class is instantiated - metadata_type: str - type of metadata (COLLECTION or GRANULE) - brokers: str - brokers (kubernetes service) - schema_registry: str - schema registry (kubernetes service) - security: boolean - defines if security is in place - collection_topic: str - collection topic you want to consume - granule_topic: str - granule topic you want to consume + metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + schema_registry: str + schema registry (kubernetes service) + security_enabled: boolean + defines if security is in place + security_caLoc: str + Kafka schema registry certification authority (CA) file location. + security_keyLoc: str + Kafka schema registry client's private key file location. + security_certLoc: str + Kafka schema registry client's public key file location. + collection_topic: str + collection topic you want to produce to + granule_topic: str + granule topic you want to produce to + logger: Logger object + utilizes python logger library and creates logging for our specific needs Methods ------- - get_logger(log_name, create_file) - creates logger file - - register_client() - registers to schema registry client based on configs + register_client() + registers to schema registry client based on configs - create_producer(registry_client) - creates a SerializingProducer object to produce to kafka topic + create_producer(registry_client) + creates a SerializingProducer object to produce to kafka topic - connect() - utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics + connect() + utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics - publish_collection(collection_producer, collection_uuid, content_dict, method) - Publish collection to collection topic + publish_collection(collection_producer, collection_uuid, content_dict, method) + Publish collection to collection topic - publish_granule(granule_producer, record_uuid, collection_uuid, content_dict) - Publish granule to granule topic + publish_granule(granule_producer, collection_uuid, content_dict) + Publish granule to granule topic """ - conf = None - - def __init__(self, conf_loc): - - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = self.get_logger(self.__class__.__name__, False) - self.logger.info("Initializing " + self.__class__.__name__) - self.metadata_type = self.conf['metadata_type'] - self.brokers = self.conf['brokers'] - self.schema_registry = self.conf['schema_registry'] - self.security = self.conf['security']['enabled'] - self.collection_topic = self.conf['collection_topic_produce'] - self.granule_topic = self.conf['granule_topic_produce'] - - if self.metadata_type not in ['COLLECTION', 'GRANULE']: - raise ValueError("metadata_type must be 'COLLECTION' or 'GRANULE'") - - def get_logger(self, log_name, create_file): + def __init__(self, kafka_publisher_metadata_type, brokers, schema_registry, security, collection_topic_publish, granule_topic_publish, log_level='INFO', **wildargs): """ - Utilizes python logger library and creates logging - - :param log_name: str - name of log to be created - :param create_file: boolean - defines whether of not you want a logger file to be created - - :return: Logger object + Attributes + ---------- + kafka_publisher_metadata_type: str + type of metadata (COLLECTION or GRANULE) + brokers: str + brokers (kubernetes service) + group_id: str + Client group id string. All clients sharing the same group.id belong to the same group + auto_offset_reset: str + Action to take when there is no initial offset in offset store or the desired offset is out of range (smallest, earliest, beginning, largest, latest, end, error) + schema_registry: str + schema registry (kubernetes service) URL + security: dict + enabled boolean: Whether to use security for kafka schema registry client. + caLoc str: Kafka schema registry certification authority (CA) file location. + keyLoc str: Kafka schema registry client's private key file location. + certLoc str: Kafka schema registry client's public key file location. + + collection_topic: str + collection topic you want to produce to + granule_topic: str + granule topic you want to produce to """ + self.metadata_type = kafka_publisher_metadata_type.upper() + self.brokers = brokers + self.schema_registry = schema_registry + self.security_enabled = security['enabled'] - # create logger - log = logging.getLogger() - - # create formatter and add it to the handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') - - if self.conf['log_level'] == "DEBUG": - log.setLevel(level=logging.DEBUG) - else: - if self.conf['log_level'] == "INFO": - log.setLevel(level=logging.INFO) - else: - log.setLevel(level=logging.ERROR) + if self.security_enabled: + self.security_caLoc = security['caLoc'] + self.security_keyLoc = security['keyLoc'] + self.security_certLoc = security['certLoc'] - fh = None - if create_file: - # create file handler for logger. - fh = logging.FileHandler(log_name) - fh.setFormatter(formatter) + self.collection_topic = collection_topic_publish + self.granule_topic = granule_topic_publish - # create console handler for logger. - ch = logging.StreamHandler() - ch.setFormatter(formatter) + if self.metadata_type not in ['COLLECTION', 'GRANULE']: + raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type)) - # add handlers to logger. - if create_file: - log.addHandler(fh) + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) + self.logger.info("Initializing " + self.__class__.__name__) - log.addHandler(ch) - return log + if wildargs: + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) def connect(self): """ Utilizes register_client() and create_producer(registry_client) to connect to schema registry and allow for producing to kafka topics :return: SerializingProducer Object - based on config values + based on initial constructor values """ registry_client = self.register_client() metadata_producer = self.create_producer(registry_client) @@ -137,10 +119,10 @@ def register_client(self): reg_conf = {'url': self.schema_registry} - if self.security: - reg_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - reg_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - reg_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + if self.security_enabled: + reg_conf['ssl.ca.location'] = self.security_caLoc + reg_conf['ssl.key.location'] = self.security_keyLoc + reg_conf['ssl.certificate.location'] = self.security_certLoc registry_client = SchemaRegistryClient(reg_conf) return registry_client @@ -153,34 +135,38 @@ def create_producer(self, registry_client): get this from register_client() :return: SerializingProducer Object - based on config values + based on initial constructor values """ - metadata_schema = None + topic = None if self.metadata_type == "COLLECTION": - metadata_schema = registry_client.get_latest_version(self.collection_topic + '-value').schema.schema_str + topic = self.collection_topic if self.metadata_type == "GRANULE": - metadata_schema = registry_client.get_latest_version(self.granule_topic + '-value').schema.schema_str + topic = self.granule_topic + self.logger.debug("topic: "+str(topic)) - metadata_serializer = AvroSerializer(metadata_schema, registry_client) - producer_conf = {'bootstrap.servers': self.brokers} + metadata_schema = registry_client.get_latest_version(topic + '-value').schema.schema_str + self.logger.debug("metadata_schema: "+metadata_schema) - if self.security: - producer_conf['security.protocol'] = 'SSL' - producer_conf['ssl.ca.location'] = self.conf['security']['caLoc'] - producer_conf['ssl.key.location'] = self.conf['security']['keyLoc'] - producer_conf['ssl.certificate.location'] = self.conf['security']['certLoc'] + metadata_serializer = AvroSerializer(schema_str=metadata_schema, schema_registry_client=registry_client) + conf = { + 'bootstrap.servers': self.brokers, + 'value.serializer': metadata_serializer} - meta_producer_conf = producer_conf - meta_producer_conf['value.serializer'] = metadata_serializer + if self.security_enabled: + conf['security.protocol'] = 'SSL' + conf['ssl.ca.location'] = self.security_caLoc + conf['ssl.key.location'] = self.security_keyLoc + conf['ssl.certificate.location'] = self.security_certLoc - metadata_producer = SerializingProducer(meta_producer_conf) + self.logger.debug("Serializing conf: "+str(conf)) + metadata_producer = SerializingProducer(conf) return metadata_producer def delivery_report(self, err, msg): """ - Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). + Called once for each message produced to indicate delivery of message. Triggered by poll() or flush(). :param err: str err produced after publishing, if there is one @@ -190,16 +176,29 @@ def delivery_report(self, err, msg): if err is not None: self.logger.error('Message delivery failed: {}'.format(err)) else: - self.logger.error('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) + self.logger.info('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) + + @staticmethod + def get_collection_key_from_uuid(collection_uuid): + """ + Create a key to use in a kafka message from the given string representation of the collection UUID. + :param collection_uuid: str + collection string to turn into a key. + :return: + """ + if type(collection_uuid) == bytes: + return str(UUID(bytes=collection_uuid)) + else: + return str(UUID(hex=collection_uuid)) def publish_collection(self, collection_producer, collection_uuid, content_dict, method): """ - Publish collection to collection topic + Publish a collection to the collection topic :param collection_producer: SerializingProducer use connect() :param collection_uuid: str - collection uuid that you want colelction to have + collection uuid that you want the collection to have :param content_dict: dict dictionary containing information you want to publish :param method: str @@ -208,11 +207,9 @@ def publish_collection(self, collection_producer, collection_uuid, content_dict, :return: str returns msg if publish is successful, kafka error if it wasn't successful """ - self.logger.info('Publish collection') - if type(collection_uuid) == bytes: - key = str(UUID(bytes=collection_uuid)) - else: - key = str(UUID(hex=collection_uuid)) + self.logger.info('Publishing collection') + + key = self.get_collection_key_from_uuid(collection_uuid) value_dict = { 'type': 'collection', @@ -221,21 +218,20 @@ def publish_collection(self, collection_producer, collection_uuid, content_dict, 'method': method, 'source': 'unknown', } - try: - collection_producer.produce(topic=self.collection_topic, value=value_dict, key=key, - on_delivery=self.delivery_report) - except KafkaError: - raise + self.logger.debug('Publishing collection with topic='+self.collection_topic+' key='+key+' value='+str(value_dict)) + collection_producer.produce( + topic=self.collection_topic, + value=value_dict, + key=key, + on_delivery=self.delivery_report) collection_producer.poll() - def publish_granule(self, granule_producer, record_uuid, collection_uuid, content_dict): + def publish_granule(self, granule_producer, collection_uuid, content_dict): """ - Publishes granule to granule topic + Publish a granule to the granule topic :param granule_producer: SerializingProducer use connect() - :param record_uuid: str - record uuid associated with the granule :param collection_uuid: str collection uuid associated with the granule :param content_dict: dict @@ -246,10 +242,8 @@ def publish_granule(self, granule_producer, record_uuid, collection_uuid, conten """ self.logger.info('Publish granule') - if type(record_uuid) == bytes: - key = str(UUID(bytes=collection_uuid)) - else: - key = str(UUID(hex=collection_uuid)) + key = self.get_collection_key_from_uuid(collection_uuid) + """ if type(collection_uuid) == bytes: content_dict['relationships'] = [{"type": "COLLECTION", "id": collection_uuid.hex()}] @@ -281,9 +275,11 @@ def publish_granule(self, granule_producer, record_uuid, collection_uuid, conten 'discovery': content_dict['discovery'] } - try: - granule_producer.produce(topic=self.granule_topic, value=value_dict, key=key, - on_delivery=self.delivery_report) - except KafkaError: - raise + self.logger.debug('Publishing granule with topic='+self.granule_topic+' key='+key+' value='+str(value_dict)) + granule_producer.produce( + topic=self.granule_topic, + value=value_dict, + key=key, + on_delivery=self.delivery_report) + granule_producer.poll() diff --git a/onestop-python-client/onestop/WebPublisher.py b/onestop-python-client/onestop/WebPublisher.py index 55ca06c..47c3bd3 100644 --- a/onestop-python-client/onestop/WebPublisher.py +++ b/onestop-python-client/onestop/WebPublisher.py @@ -7,31 +7,30 @@ class WebPublisher: Attributes ---------- - registry_base_url: str - url for registry endpoint - registry_username: str - username for posting metadata to registry - registry_password: str - password for posting metadata to registry - onestop_base_url: str - url for onestop endpoint - logger.info: str - logging level + registry_base_url: str + URL for registry endpoint + registry_username: str + Registry username where credentials needed + registry_password: str + Registry password where credentials needed + onestop_base_url: str + URL for OneStop endpoint + logger.info: str + logging level Methods ------- - publish_registry(metadata_type, uuid, payload, method) - Publish to registry with either POST,PUT, OR PATCH methods - delete_registry(metadata_type, uuid) - Deletes item from registry - search_registry(metadata_type, uuid) - Searches for an item in registry given its metadata type and uuid - search_onestop(metadata_type, payload) - Acquires the item, collection or granule, from OneStop - get_granules_onestop(self, uuid) - Acquires granules from OneStop given the uuid + publish_registry(metadata_type, uuid, payload, method) + Publish an item to registry with either POST, PUT, OR PATCH methods + delete_registry(metadata_type, uuid) + Delete an item from registry + search_registry(metadata_type, uuid) + Search for an item in registry given its metadata type and uuid + search_onestop(metadata_type, payload) + Search for an item in OneStop given its metadata type and payload search criteria + get_granules_onestop(self, uuid) + Search for a granule in OneStop given its uuid """ - conf = None def __init__(self, registry_base_url, registry_username, registry_password, onestop_base_url, log_level="INFO", **kwargs): self.registry_base_url = registry_base_url @@ -43,7 +42,7 @@ def __init__(self, registry_base_url, registry_username, registry_password, ones self.logger.info("Initializing " + self.__class__.__name__) if kwargs: - self.logger.info("There were extra constructor arguments: " + str(kwargs)) + self.logger.debug("Superfluous parameters in constructor call: " + str(kwargs)) def publish_registry(self, metadata_type, uuid, payload, method): """ @@ -84,12 +83,12 @@ def publish_registry(self, metadata_type, uuid, payload, method): def delete_registry(self, metadata_type, uuid): """ - Deletes item from registry + Delete an item from registry :param metadata_type: str metadata type (GRANULE/COLLECTION) :param uuid: str - uuid you want to publish with + uuid you want to delete :return: str response message indicating if delete was successful @@ -105,7 +104,7 @@ def delete_registry(self, metadata_type, uuid): def search_registry(self, metadata_type, uuid): """ - Searches for an item in registry given its metadata type and uuid + Search for an item in registry given its metadata type and uuid :param metadata_type: str metadata type (GRANULE/COLLECTION) @@ -126,7 +125,7 @@ def search_registry(self, metadata_type, uuid): def search_onestop(self, metadata_type, payload): """ - Searches for an item in OneStop given its metadata type and payload search criteria. + Search for an item in OneStop given its metadata type and payload search criteria. :param metadata_type: str metadata type (GRANULE/COLLECTION) @@ -147,7 +146,7 @@ def search_onestop(self, metadata_type, payload): def get_granules_onestop(self, uuid): """ - Searches for a granule in OneStop given its uuid + Search for a granule in OneStop given its uuid :param uuid: str uuid you want search for diff --git a/onestop-python-client/onestop/extract/CsbExtractor.py b/onestop-python-client/onestop/extract/CsbExtractor.py index e79cddc..b1006cb 100644 --- a/onestop-python-client/onestop/extract/CsbExtractor.py +++ b/onestop-python-client/onestop/extract/CsbExtractor.py @@ -2,61 +2,33 @@ from datetime import datetime class CsbExtractor: + """ A class used to extract geospatial data from csv files in an s3 bucket - Attributes - ---------- - su : S3 Utils object - an instance of the s3 utils class used to connect to the corresponding s3 bucket to get access to the csv file for extraction - boto_client: boto3 client - specific boto3 client type (s3, s3_resource, glacier, session) used to access aws resources - bucket: str - the name of the s3 bucket in which you want to access - key: str - the name of key path for the specific item you want to access in the bucket - - Methods ------- is_csv(file_name) - checks to see if the given file is of type csv + Verifies a file name ends with '.csv' get_spatial_temporal_bounds(lon_column_name, lat_column_name, date_column_name) - extracts min/max longitude and latitude values as well as beginning and ending dates from specified csv file + Gets the spacial bounding box for the open file. This seeks to the start of the file at start and the end. extract_coords(max_lon, max_lat, min_lon, min_lat) - extracts specific coordinates corresponding to min/max longitude and latitude values given from get_spatial_temporal_bounds(....) method + Given the max/min lon and lat, the function will parse the csv file to extract the coordinates within the given bounding box. """ - def __init__(self, su, key): - """ - :param su: S3 Utils object - an instance of the s3 utils class used to connect to the corresponding s3 bucket to get access to the csv file for extraction - :param key: str - the name of key path for the specific item you want to access in the bucket - - Other Attributes - ________________ - boto_client: boto3 client - specific boto3 client type (s3, s3_resource, glacier, session) used to access aws resources - bucket: str - the name of the s3 bucket in which you want to access + @staticmethod + def is_csv(file_name): """ - self.su = su - boto_client = self.su.connect("session", None) - bucket = self.su.conf['s3_bucket'] - self.key = key - - def is_csv(self, file_name): - """ - Checks to see if the given file is of type csv + Verifies a file name ends with '.csv' :param file_name: str - the name of the file in the s3 bucket i.e. file1.csv + File name with extension on the end. - :return: boolean - True if the file name contains .csv and False otherwise + :return: str + True if ends with csv + False if doesn't end with csv """ csv_str = '.csv' if file_name.endswith(csv_str): @@ -64,28 +36,22 @@ def is_csv(self, file_name): return False - # def smart_open_read(self, key): - # boto_client = self.su.connect("session", None) - # bucket = self.su.conf['s3_bucket'] - # self.su.read_csv_s3(boto_client, bucket, key) - - - def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_column_name): + @staticmethod + def get_spatial_temporal_bounds(sm_open_file, lon_column_name, lat_column_name, date_column_name): """ - Extracts min/max longitude and latitude values as well as beginning and ending dates from specified csv file + Gets the spacial bounding box for the open file. This seeks to the start of the file at start and the end. + :param sm_open_file: file-like object + A file-like object that is open, say from smart_open's sm_open. :param lon_column_name: str - name of longitude column in the csv file + Longitude column name :param lat_column_name: str - name of the latitude column in the csv file + Latitude column name :param date_column_name: str - name of the date column in the csv file + Date column name :return: dict - Key : Value - geospatial (str) -> List[float] containing min/max longitude and latitude values - temporal (str) -> List[str] containing beginning and end dates - + geospatial and temporal fields of the bounding box for given constraints. """ lon_min_val = None lon_max_val = None @@ -99,9 +65,7 @@ def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_col # variable to be returned in string format begin_date_str = '' - boto_client = self.su.connect("session", None) - bucket = self.su.conf['s3_bucket'] - sm_open_file = self.su.get_csv_s3(boto_client, bucket, self.key) + sm_open_file.seek(0) csv_reader = csv.DictReader(sm_open_file) for row in csv_reader: @@ -151,43 +115,40 @@ def get_spatial_temporal_bounds(self, lon_column_name, lat_column_name, date_col "temporal": [begin_date_str, end_date_str] } + sm_open_file.seek(0) return geospatial_temporal_bounds - - def extract_coords(self, max_lon, max_lat, min_lon, min_lat): + @staticmethod + def extract_coords(sm_open_file, max_lon, max_lat, min_lon, min_lat): """ - Extracts specific coordinates corresponding to min/max longitude and latitude values given from get_spatial_temporal_bounds(....) method - - :param max_lon: float - maximum longitude value - :param max_lat: float - maximum latitude value - :param min_lon: float - minimum longitude value - :param min_lat: float - minimum latitude value - - :return: List[ List[Float] ] - Returns a list of lists. Each list contains floats (longitude and latitude ) value pairs corresponding to - one of the min/max latitude and longitude values that were extracted previously from get_spatial_temporal_bounds (...) + Given the max/min lon and lat, the function will parse the csv file to extract the coordinates within the given bounding box. + + :param sm_open_file: file-like object + A file-like object that is open, say from smart_open's sm_open. + :param max_lon: str + Maximum longitude + :param max_lat: str + Maximum latitude + :param min_lon: str + Minimum longitude + :param min_lat: str + Minimum latitude + + :return: list + List of the the coordinates (no duplicates) within the file that are within the given bounding box. """ - # Keeps track of all coordinates that needs to be added to json payload coords = [] - boto_client = self.su.connect("session", None) - bucket = self.su.conf['s3_bucket'] - sm_open_file = self.su.get_csv_s3(boto_client, bucket, self.key) + sm_open_file.seek(0) csv_reader = csv.DictReader(sm_open_file) - for row in csv_reader: - if float( row['LAT'] ) == min_lat or float( row['LAT'] ) == max_lat or float( - row['LON'] ) == min_lon or float( row['LON'] ) == max_lon: + if float( row['LAT'] ) == min_lat or float( row['LAT'] ) == max_lat or \ + float( row['LON'] ) == min_lon or float( row['LON'] ) == max_lon: coord = [float( row['LON'] ), float( row['LAT'] )] - - # check to see if that coordinate has already been appended to the list that is keeping track of our coordinates + # if this coordinate has already been appended to the list to return (no duplicates) if coord not in coords: coords.append( coord ) + sm_open_file.seek(0) return coords - diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/line_string_type.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/line_string_type.py index eba35f3..dcf0f49 100644 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/line_string_type.py +++ b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/line_string_type.py @@ -3,7 +3,7 @@ class LineStringType(Enum): - LineString = 'LineString' + LINESTRING = 'LineString' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_line_string_type.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_line_string_type.py index 7e16945..4f47369 100644 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_line_string_type.py +++ b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_line_string_type.py @@ -3,7 +3,7 @@ class MultiLineStringType(Enum): - MultiLineString = 'MultiLineString' + MULTILINESTRING = 'MultiLineString' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_point_type.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_point_type.py index a428813..dcd95a7 100644 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_point_type.py +++ b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_point_type.py @@ -3,7 +3,7 @@ class MultiPointType(Enum): - MultiPoint = 'MultiPoint' + MULTIPOINT = 'MultiPoint' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_polygon_type.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_polygon_type.py index 8c98577..6bb38a1 100644 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_polygon_type.py +++ b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/multi_polygon_type.py @@ -3,7 +3,7 @@ class MultiPolygonType(Enum): - MultiPolygon = 'MultiPolygon' + MULTIPOLYGON = 'MultiPolygon' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/point_type.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/point_type.py index 386124e..08a2043 100644 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/point_type.py +++ b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/point_type.py @@ -3,7 +3,7 @@ class PointType(Enum): - Point = 'Point' + POINT = 'Point' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/polygon_type.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/polygon_type.py index 6e373a3..0c1986b 100644 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses/polygon_type.py +++ b/onestop-python-client/onestop/schemas/geojsonSchemaClasses/polygon_type.py @@ -3,7 +3,7 @@ class PolygonType(Enum): - Polygon = 'Polygon' + POLYGON = 'Polygon' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ "name": "PolygonType", diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/__init__.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/__init__.py deleted file mode 100644 index 3862fe7..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -import faker - -from pyavro_gen.enum_with_schema_provider import EnumWithSchemaProvider -from .testing_classes import test_classes - -fake = faker.Faker() -fake.add_provider(EnumWithSchemaProvider) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/line_string_type_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/line_string_type_factory.py deleted file mode 100644 index 2f69d1d..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/line_string_type_factory.py +++ /dev/null @@ -1,9 +0,0 @@ -from factory import Factory, lazy_attribute -from onestop.schemaTest2.line_string_type import LineStringType -from onestop.schemaTest2_test import fake - - -class LineStringTypeFactory(Factory): - class Meta: - model = LineStringType - value = lazy_attribute(lambda x: fake.enum_with_schema(LineStringType)) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_line_string_type_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_line_string_type_factory.py deleted file mode 100644 index 3c3ef42..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_line_string_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.multi_line_string_type import MultiLineStringType -from onestop.schemaTest2_test import fake - - -class MultiLineStringTypeFactory(Factory): - class Meta: - model = MultiLineStringType - value = lazy_attribute(lambda x: fake.enum_with_schema(MultiLineStringType)) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_point_type_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_point_type_factory.py deleted file mode 100644 index 90b4b6a..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_point_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.multi_point_type import MultiPointType -from onestop.schemaTest2_test import fake - - -class MultiPointTypeFactory(Factory): - class Meta: - model = MultiPointType - value = lazy_attribute(lambda x: fake.enum_with_schema(MultiPointType)) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_polygon_type_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_polygon_type_factory.py deleted file mode 100644 index a417b9f..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/multi_polygon_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.multi_polygon_type import MultiPolygonType -from onestop.schemaTest2_test import fake - - -class MultiPolygonTypeFactory(Factory): - class Meta: - model = MultiPolygonType - value = lazy_attribute(lambda x: fake.enum_with_schema(MultiPolygonType)) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py deleted file mode 100644 index 832bb72..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from onestop.schemaTest2_test.org.cedar.schemas.avro.geojson.line_string_factory import LineStringFactory -from onestop.schemaTest2_test.org.cedar.schemas.avro.geojson.polygon_factory import PolygonFactory -from onestop.schemaTest2_test.org.cedar.schemas.avro.geojson.multi_line_string_factory import MultiLineStringFactory -from onestop.schemaTest2_test.org.cedar.schemas.avro.geojson.point_factory import PointFactory -from onestop.schemaTest2_test.org.cedar.schemas.avro.geojson.multi_point_factory import MultiPointFactory -from onestop.schemaTest2_test.org.cedar.schemas.avro.geojson.multi_polygon_factory import MultiPolygonFactory diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py deleted file mode 100644 index 85213c3..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.org.cedar.schemas.avro.geojson import LineString -from onestop.schemaTest2_test import fake -from onestop.schemaTest2_test.line_string_type_factory import LineStringTypeFactory - - -class LineStringFactory(Factory): - class Meta: - model = LineString - type = lazy_attribute(lambda x: LineStringTypeFactory()) - coordinates = lazy_attribute(lambda x: [[[fake.pyfloat() for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py deleted file mode 100644 index 227621f..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.org.cedar.schemas.avro.geojson import MultiLineString -from onestop.schemaTest2_test import fake -from onestop.schemaTest2_test.multi_line_string_type_factory import MultiLineStringTypeFactory - - -class MultiLineStringFactory(Factory): - class Meta: - model = MultiLineString - type = lazy_attribute(lambda x: MultiLineStringTypeFactory()) - coordinates = lazy_attribute(lambda x: [[[[[fake.pyfloat() for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py deleted file mode 100644 index 5d55bbd..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.org.cedar.schemas.avro.geojson import MultiPoint -from onestop.schemaTest2_test import fake -from onestop.schemaTest2_test.multi_point_type_factory import MultiPointTypeFactory - - -class MultiPointFactory(Factory): - class Meta: - model = MultiPoint - type = lazy_attribute(lambda x: MultiPointTypeFactory()) - coordinates = lazy_attribute(lambda x: [[[fake.pyfloat() for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py deleted file mode 100644 index 2d716ab..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.org.cedar.schemas.avro.geojson import MultiPolygon -from onestop.schemaTest2_test import fake -from onestop.schemaTest2_test.multi_polygon_type_factory import MultiPolygonTypeFactory - - -class MultiPolygonFactory(Factory): - class Meta: - model = MultiPolygon - type = lazy_attribute(lambda x: MultiPolygonTypeFactory()) - coordinates = lazy_attribute(lambda x: [[[[[[[fake.pyfloat() for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py deleted file mode 100644 index 05ad1e8..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.org.cedar.schemas.avro.geojson import Point -from onestop.schemaTest2_test import fake -from onestop.schemaTest2_test.point_type_factory import PointTypeFactory - - -class PointFactory(Factory): - class Meta: - model = Point - type = lazy_attribute(lambda x: PointTypeFactory()) - coordinates = lazy_attribute(lambda x: [fake.pyfloat() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py deleted file mode 100644 index 1274b26..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.org.cedar.schemas.avro.geojson import Polygon -from onestop.schemaTest2_test import fake -from onestop.schemaTest2_test.polygon_type_factory import PolygonTypeFactory - - -class PolygonFactory(Factory): - class Meta: - model = Polygon - type = lazy_attribute(lambda x: PolygonTypeFactory()) - coordinates = lazy_attribute(lambda x: [[[[[fake.pyfloat() for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]][randint(0, 0)] for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/point_type_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/point_type_factory.py deleted file mode 100644 index 542a39d..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/point_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.point_type import PointType -from onestop.schemaTest2_test import fake - - -class PointTypeFactory(Factory): - class Meta: - model = PointType - value = lazy_attribute(lambda x: fake.enum_with_schema(PointType)) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/polygon_type_factory.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/polygon_type_factory.py deleted file mode 100644 index 01ca0e3..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/polygon_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from onestop.schemaTest2.polygon_type import PolygonType -from onestop.schemaTest2_test import fake - - -class PolygonTypeFactory(Factory): - class Meta: - model = PolygonType - value = lazy_attribute(lambda x: fake.enum_with_schema(PolygonType)) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/testing_classes.py b/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/testing_classes.py deleted file mode 100644 index d2fa1a0..0000000 --- a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/testing_classes.py +++ /dev/null @@ -1,16 +0,0 @@ -from pyavro_gen.codewriters.namespace import ClassItem - -test_classes = [ - ClassItem('schemaTest2.org.cedar.schemas.avro.geojson', 'LineString', 'schemaTest2_test.org.cedar.schemas.avro.geojson', 'LineStringFactory'), - ClassItem('schemaTest2.org.cedar.schemas.avro.geojson', 'Polygon', 'schemaTest2_test.org.cedar.schemas.avro.geojson', 'PolygonFactory'), - ClassItem('schemaTest2.org.cedar.schemas.avro.geojson', 'MultiLineString', 'schemaTest2_test.org.cedar.schemas.avro.geojson', 'MultiLineStringFactory'), - ClassItem('schemaTest2.org.cedar.schemas.avro.geojson', 'Point', 'schemaTest2_test.org.cedar.schemas.avro.geojson', 'PointFactory'), - ClassItem('schemaTest2.org.cedar.schemas.avro.geojson', 'MultiPoint', 'schemaTest2_test.org.cedar.schemas.avro.geojson', 'MultiPointFactory'), - ClassItem('schemaTest2.org.cedar.schemas.avro.geojson', 'MultiPolygon', 'schemaTest2_test.org.cedar.schemas.avro.geojson', 'MultiPolygonFactory'), - ClassItem('schemaTest2.', 'LineStringType', 'schemaTest2_test.', 'LineStringTypeFactory'), - ClassItem('schemaTest2.', 'PolygonType', 'schemaTest2_test.', 'PolygonTypeFactory'), - ClassItem('schemaTest2.', 'MultiLineStringType', 'schemaTest2_test.', 'MultiLineStringTypeFactory'), - ClassItem('schemaTest2.', 'PointType', 'schemaTest2_test.', 'PointTypeFactory'), - ClassItem('schemaTest2.', 'MultiPointType', 'schemaTest2_test.', 'MultiPointTypeFactory'), - ClassItem('schemaTest2.', 'MultiPolygonType', 'schemaTest2_test.', 'MultiPolygonTypeFactory'), -] diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/file_location_type.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/file_location_type.py index f3b19a8..3c7aa38 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/file_location_type.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/file_location_type.py @@ -1,29 +1,23 @@ -from dataclasses import asdict, dataclass -from typing import Dict +from enum import Enum +from typing import ClassVar -from undictify import type_checked_constructor +class FileLocationType(Enum): + INGEST = 'INGEST' + ARCHIVE = 'ARCHIVE' + ACCESS = 'ACCESS' + WORKING = 'WORKING' -@type_checked_constructor() -@dataclass -class FileLocationType: - # manually added this - type: str - - def to_dict(self) -> Dict: - """ - Returns a dictionary version of this instance. - """ - return asdict(self) - - @classmethod - def from_dict( - cls, - the_dict: Dict - ) -> 'FileLocationType': - """ - Returns an instance of this class from a dictionary. - - :param the_dict: The dictionary from which to create an instance of this class. - """ - return cls(**the_dict) + #: The Avro Schema associated to this class + _schema: ClassVar[str] = """{ + "type": "enum", + "namespace": "org.cedar.schemas.avro.psi", + "name": "FileLocationType", + "doc": "The type of the file location, e.g. an ingest location, access location, etc.", + "symbols": [ + "INGEST", + "ARCHIVE", + "ACCESS", + "WORKING" + ] + }""" diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/method.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/method.py index a741e7e..3d21d61 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/method.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/method.py @@ -1,26 +1,36 @@ -from dataclasses import asdict, dataclass -from typing import Dict +from enum import Enum +from typing import ClassVar -from undictify import type_checked_constructor +class Method(Enum): + """ + The types of metadata relationships which can be represented in the PSI system + """ + HEAD = 'HEAD' + OPTIONS = 'OPTIONS' + GET = 'GET' + POST = 'POST' + PUT = 'PUT' + PATCH = 'PATCH' + DELETE = 'DELETE' + TRACE = 'TRACE' + CONNECT = 'CONNECT' -@type_checked_constructor() -@dataclass -class Method: - def to_dict(self) -> Dict: - """ - Returns a dictionary version of this instance. - """ - return asdict(self) - - @classmethod - def from_dict( - cls, - the_dict: Dict - ) -> 'Method': - """ - Returns an instance of this class from a dictionary. - - :param the_dict: The dictionary from which to create an instance of this class. - """ - return cls(**the_dict) + #: The Avro Schema associated to this class + _schema: ClassVar[str] = """{ + "type": "enum", + "namespace": "org.cedar.schemas.avro.psi", + "name": "Method", + "doc": "An HTTP request method", + "symbols": [ + "HEAD", + "OPTIONS", + "GET", + "POST", + "PUT", + "PATCH", + "DELETE", + "TRACE", + "CONNECT" + ] + }""" diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/operation_type.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/operation_type.py index 7ceee49..a83da87 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/operation_type.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/operation_type.py @@ -1,26 +1,24 @@ -from dataclasses import asdict, dataclass -from typing import Dict +from enum import Enum +from typing import ClassVar -from undictify import type_checked_constructor +class OperationType(Enum): + """ + The types of metadata relationships which can be represented in the PSI system + """ + NO_OP = "NO_OP" + ADD = "ADD" + REMOVE = "REMOVE" -@type_checked_constructor() -@dataclass -class OperationType: - def to_dict(self) -> Dict: - """ - Returns a dictionary version of this instance. - """ - return asdict(self) - - @classmethod - def from_dict( - cls, - the_dict: Dict - ) -> 'OperationType': - """ - Returns an instance of this class from a dictionary. - - :param the_dict: The dictionary from which to create an instance of this class. - """ - return cls(**the_dict) + #: The Avro Schema associated to this class + _schema: ClassVar[str] = """{ + "type": "enum", + "namespace": "org.cedar.schemas.avro.psi", + "name": "OperationType", + "doc": "The specific operation to execute, mainly for PATCH-method input messages. Use default of NO_OP for when the method is unambiguous on its own", + "symbols": [ + "NO_OP", + "ADD", + "REMOVE" + ] + }""" \ No newline at end of file diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/parsed_record.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/parsed_record.py index edc9e46..5402719 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/parsed_record.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/parsed_record.py @@ -26,11 +26,7 @@ class ParsedRecord: fileInformation: Optional[FileInformation] #: A list of location objects describing where the file is located - - # Commenting out for now because causing errors - #fileLocations: Dict[str, FileLocation] - - fileLocations: Optional[FileLocation] + fileLocations: Dict[str, FileLocation] publishing: Optional[Publishing] #: A record of this objects relationships to other objects in the inventory diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/record_type.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/record_type.py index 3a3cf09..a74ab03 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/record_type.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/record_type.py @@ -6,8 +6,8 @@ class RecordType(Enum): """ The types of metadata records which can be represented in the PSI system """ - collection = 'collection' - granule = 'granule' + COLLECTION = 'collection' + GRANULE = 'granule' #: The Avro Schema associated to this class _schema: ClassVar[str] = """{ diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship.py index fa4d92e..498f093 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship.py @@ -2,10 +2,8 @@ from typing import ClassVar, Dict, Optional from undictify import type_checked_constructor - from .relationship_type import RelationshipType - @type_checked_constructor() @dataclass class Relationship: diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py index c227f89..3c9b354 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/relationship_type.py @@ -1,28 +1,20 @@ -from dataclasses import asdict, dataclass -from typing import Dict +from enum import Enum +from typing import ClassVar -from undictify import type_checked_constructor +class RelationshipType(Enum): + """ + The types of metadata relationships which can be represented in the PSI system + """ + COLLECTION = 'COLLECTION' -@type_checked_constructor() -@dataclass -class RelationshipType: - type: str - - def to_dict(self) -> Dict: - """ - Returns a dictionary version of this instance. - """ - return asdict(self) - - @classmethod - def from_dict( - cls, - the_dict: Dict - ) -> 'RelationshipType': - """ - Returns an instance of this class from a dictionary. - - :param the_dict: The dictionary from which to create an instance of this class. - """ - return cls(**the_dict) + #: The Avro Schema associated to this class + _schema: ClassVar[str] = """{ + "name": "RelationshipType", + "namespace": "org.cedar.schemas.avro.psi", + "type": "enum", + "doc": " + "symbols": [ + "COLLECTION" + ] + }""" diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/valid_descriptor.py b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/valid_descriptor.py index 522712c..7a8b3ec 100644 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/valid_descriptor.py +++ b/onestop-python-client/onestop/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/valid_descriptor.py @@ -1,26 +1,21 @@ -from dataclasses import asdict, dataclass -from typing import Dict +from enum import Enum +from typing import ClassVar -from undictify import type_checked_constructor +class ValidDescriptor(Enum): + VALID = 'VALID' + INVALID = 'INVALID' + UNDEFINED = 'UNDEFINED' -@type_checked_constructor() -@dataclass -class ValidDescriptor: - def to_dict(self) -> Dict: - """ - Returns a dictionary version of this instance. - """ - return asdict(self) - - @classmethod - def from_dict( - cls, - the_dict: Dict - ) -> 'ValidDescriptor': - """ - Returns an instance of this class from a dictionary. - - :param the_dict: The dictionary from which to create an instance of this class. - """ - return cls(**the_dict) + #: The Avro Schema associated to this class + _schema: ClassVar[str] = """{ + "name": "ValidDescriptor", + "namespace": "org.cedar.schemas.avro.psi", + "type": "enum", + "doc": "The types of metadata records which can be represented in the PSI system", + "symbols": [ + "VALID", + "INVALID", + "UNDEFINED" + ] + }""" \ No newline at end of file diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/__init__.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/__init__.py deleted file mode 100644 index 3862fe7..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -import faker - -from pyavro_gen.enum_with_schema_provider import EnumWithSchemaProvider -from .testing_classes import test_classes - -fake = faker.Faker() -fake.add_provider(EnumWithSchemaProvider) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_access_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_access_analysis_factory.py deleted file mode 100644 index d666bfa..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_access_analysis_factory.py +++ /dev/null @@ -1,12 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import DataAccessAnalysis -from psiSchemaClasses_test import fake - - -class DataAccessAnalysisFactory(Factory): - class Meta: - model = DataAccessAnalysis - dataAccessExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_format_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_format_factory.py deleted file mode 100644 index 83f05c0..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/data_format_factory.py +++ /dev/null @@ -1,13 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import DataFormat -from psiSchemaClasses_test import fake - - -class DataFormatFactory(Factory): - class Meta: - model = DataFormat - name = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - version = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/description_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/description_analysis_factory.py deleted file mode 100644 index 8a75307..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/description_analysis_factory.py +++ /dev/null @@ -1,15 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import DescriptionAnalysis -from psiSchemaClasses_test import fake - - -class DescriptionAnalysisFactory(Factory): - class Meta: - model = DescriptionAnalysis - descriptionExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - descriptionCharacters = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - descriptionFleschReadingEaseScore = lazy_attribute(lambda x: [fake.pyfloat(), None][randint(0, 1)]) - descriptionFleschKincaidReadingGradeLevel = lazy_attribute(lambda x: [fake.pyfloat(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/identification_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/identification_analysis_factory.py deleted file mode 100644 index 07f7019..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/identification_analysis_factory.py +++ /dev/null @@ -1,19 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import IdentificationAnalysis -from psiSchemaClasses_test import fake - - -class IdentificationAnalysisFactory(Factory): - class Meta: - model = IdentificationAnalysis - fileIdentifierExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - fileIdentifierString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - doiExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - doiString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - parentIdentifierExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - parentIdentifierString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - hierarchyLevelNameExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - isGranule = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/instruments_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/instruments_factory.py deleted file mode 100644 index 10b8367..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/instruments_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import Instruments -from psiSchemaClasses_test import fake - - -class InstrumentsFactory(Factory): - class Meta: - model = Instruments - instrumentIdentifier = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instrumentType = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instrumentDescription = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/keywords_element_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/keywords_element_factory.py deleted file mode 100644 index 29a6565..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/keywords_element_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import KeywordsElement -from psiSchemaClasses_test import fake - - -class KeywordsElementFactory(Factory): - class Meta: - model = KeywordsElement - values = lazy_attribute(lambda x: [fake.pystr() for _ in range(randint(1, 5))]) - type = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - namespace = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/operation_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/operation_factory.py deleted file mode 100644 index b6ff961..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/operation_factory.py +++ /dev/null @@ -1,15 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import Operation -from psiSchemaClasses_test import fake - - -class OperationFactory(Factory): - class Meta: - model = Operation - operationDescription = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - operationIdentifier = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - operationStatus = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - operationType = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py deleted file mode 100644 index 5606a48..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson.point_factory import PointFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson.multi_point_factory import MultiPointFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson.line_string_factory import LineStringFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson.multi_line_string_factory import MultiLineStringFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson.polygon_factory import PolygonFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson.multi_polygon_factory import MultiPolygonFactory diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py deleted file mode 100644 index b7a55b0..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/line_string_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.geojson import LineString - - -class LineStringFactory(Factory): - class Meta: - model = LineString - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py deleted file mode 100644 index c4ea8c3..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_line_string_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.geojson import MultiLineString - - -class MultiLineStringFactory(Factory): - class Meta: - model = MultiLineString - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py deleted file mode 100644 index 89f6621..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_point_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.geojson import MultiPoint - - -class MultiPointFactory(Factory): - class Meta: - model = MultiPoint - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py deleted file mode 100644 index aa653d0..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/multi_polygon_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.geojson import MultiPolygon - - -class MultiPolygonFactory(Factory): - class Meta: - model = MultiPolygon - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py deleted file mode 100644 index 3dde1b1..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/point_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.geojson import Point - - -class PointFactory(Factory): - class Meta: - model = Point - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py deleted file mode 100644 index e481e48..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/geojson/polygon_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.geojson import Polygon - - -class PolygonFactory(Factory): - class Meta: - model = Polygon - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/__init__.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/__init__.py deleted file mode 100644 index 2bc3169..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.temporal_bounding_factory import TemporalBoundingFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.link_factory import LinkFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.responsible_party_factory import ResponsiblePartyFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.reference_factory import ReferenceFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.record_type_factory import RecordTypeFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.file_information_factory import FileInformationFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.publishing_factory import PublishingFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.error_event_factory import ErrorEventFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.valid_descriptor_factory import ValidDescriptorFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.method_factory import MethodFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.operation_type_factory import OperationTypeFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.file_location_type_factory import FileLocationTypeFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.checksum_factory import ChecksumFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.checksum_algorithm_factory import ChecksumAlgorithmFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.relationship_type_factory import RelationshipTypeFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.parsed_record_with_id_factory import ParsedRecordWithIdFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.file_location_factory import FileLocationFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.relationship_factory import RelationshipFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.input_event_factory import InputEventFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.input_factory import InputFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.aggregated_input_factory import AggregatedInputFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.analysis_factory import AnalysisFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.discovery_factory import DiscoveryFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi.parsed_record_factory import ParsedRecordFactory diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/aggregated_input_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/aggregated_input_factory.py deleted file mode 100644 index df261eb..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/aggregated_input_factory.py +++ /dev/null @@ -1,26 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import AggregatedInput -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ( - ErrorEventFactory, FileInformationFactory, FileLocationFactory, - InputEventFactory, PublishingFactory, RecordTypeFactory, - RelationshipFactory) - - -class AggregatedInputFactory(Factory): - class Meta: - model = AggregatedInput - rawJson = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - rawXml = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - initialSource = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - type = lazy_attribute(lambda x: [RecordTypeFactory(), None][randint(0, 1)]) - fileInformation = lazy_attribute(lambda x: [FileInformationFactory(), None][randint(0, 1)]) - fileLocations = lazy_attribute(lambda x: {fake.pystr(): FileLocationFactory() for _ in range(randint(3, 10))}) - publishing = lazy_attribute(lambda x: [PublishingFactory(), None][randint(0, 1)]) - relationships = lazy_attribute(lambda x: [RelationshipFactory() for _ in range(randint(1, 5))]) - deleted = lazy_attribute(lambda x: fake.pybool()) - events = lazy_attribute(lambda x: [InputEventFactory() for _ in range(randint(1, 5))]) - errors = lazy_attribute(lambda x: [ErrorEventFactory() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/analysis_factory.py deleted file mode 100644 index 00a3199..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/analysis_factory.py +++ /dev/null @@ -1,24 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Analysis -from psiSchemaClasses_test. import (DataAccessAnalysisFactory, - DescriptionAnalysisFactory, - IdentificationAnalysisFactory, - SpatialBoundingAnalysisFactory, - TemporalBoundingAnalysisFactory, - ThumbnailAnalysisFactory, - TitleAnalysisFactory) - - -class AnalysisFactory(Factory): - class Meta: - model = Analysis - identification = lazy_attribute(lambda x: [IdentificationAnalysisFactory(), None][randint(0, 1)]) - titles = lazy_attribute(lambda x: [TitleAnalysisFactory(), None][randint(0, 1)]) - description = lazy_attribute(lambda x: [DescriptionAnalysisFactory(), None][randint(0, 1)]) - dataAccess = lazy_attribute(lambda x: [DataAccessAnalysisFactory(), None][randint(0, 1)]) - thumbnail = lazy_attribute(lambda x: [ThumbnailAnalysisFactory(), None][randint(0, 1)]) - temporalBounding = lazy_attribute(lambda x: [TemporalBoundingAnalysisFactory(), None][randint(0, 1)]) - spatialBounding = lazy_attribute(lambda x: [SpatialBoundingAnalysisFactory(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_algorithm_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_algorithm_factory.py deleted file mode 100644 index 745054c..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_algorithm_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import ChecksumAlgorithm -from psiSchemaClasses_test import fake - - -class ChecksumAlgorithmFactory(Factory): - class Meta: - model = ChecksumAlgorithm - value = lazy_attribute(lambda x: fake.enum_with_schema(ChecksumAlgorithm)) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_factory.py deleted file mode 100644 index a5bba5a..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/checksum_factory.py +++ /dev/null @@ -1,13 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Checksum -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import \ - ChecksumAlgorithmFactory - - -class ChecksumFactory(Factory): - class Meta: - model = Checksum - algorithm = lazy_attribute(lambda x: ChecksumAlgorithmFactory()) - value = lazy_attribute(lambda x: fake.pystr()) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/discovery_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/discovery_factory.py deleted file mode 100644 index 6ac992b..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/discovery_factory.py +++ /dev/null @@ -1,67 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Discovery -from psiSchemaClasses_test import fake -from psiSchemaClasses_test. import (DataFormatFactory, InstrumentsFactory, - KeywordsElementFactory, OperationFactory, - PlatformFactory, ServiceFactory) -from psiSchemaClasses_test.org.cedar.schemas.avro.geojson import ( - LineStringFactory, MultiLineStringFactory, MultiPointFactory, - MultiPolygonFactory, PointFactory, PolygonFactory) -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ( - LinkFactory, ReferenceFactory, ResponsiblePartyFactory, - TemporalBoundingFactory) - - -class DiscoveryFactory(Factory): - class Meta: - model = Discovery - fileIdentifier = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - parentIdentifier = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - hierarchyLevelName = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - doi = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - purpose = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - status = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - credit = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - title = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - alternateTitle = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - description = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - keywords = lazy_attribute(lambda x: [KeywordsElementFactory() for _ in range(randint(1, 5))]) - topicCategories = lazy_attribute(lambda x: [fake.pystr() for _ in range(randint(1, 5))]) - temporalBounding = lazy_attribute(lambda x: [TemporalBoundingFactory(), None][randint(0, 1)]) - spatialBounding = lazy_attribute(lambda x: [None, PointFactory(), MultiPointFactory(), LineStringFactory(), MultiLineStringFactory(), PolygonFactory(), MultiPolygonFactory()][randint(0, 6)]) - isGlobal = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - acquisitionInstruments = lazy_attribute(lambda x: [InstrumentsFactory() for _ in range(randint(1, 5))]) - acquisitionOperations = lazy_attribute(lambda x: [OperationFactory() for _ in range(randint(1, 5))]) - acquisitionPlatforms = lazy_attribute(lambda x: [PlatformFactory() for _ in range(randint(1, 5))]) - dataFormats = lazy_attribute(lambda x: [DataFormatFactory() for _ in range(randint(1, 5))]) - links = lazy_attribute(lambda x: [LinkFactory() for _ in range(randint(1, 5))]) - responsibleParties = lazy_attribute(lambda x: [ResponsiblePartyFactory() for _ in range(randint(1, 5))]) - thumbnail = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - thumbnailDescription = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - creationDate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - revisionDate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - publicationDate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - citeAsStatements = lazy_attribute(lambda x: [fake.pystr() for _ in range(randint(1, 5))]) - crossReferences = lazy_attribute(lambda x: [ReferenceFactory() for _ in range(randint(1, 5))]) - largerWorks = lazy_attribute(lambda x: [ReferenceFactory() for _ in range(randint(1, 5))]) - useLimitation = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - legalConstraints = lazy_attribute(lambda x: [fake.pystr() for _ in range(randint(1, 5))]) - accessFeeStatement = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - orderingInstructions = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - edition = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - dsmmAccessibility = lazy_attribute(lambda x: fake.pyint()) - dsmmDataIntegrity = lazy_attribute(lambda x: fake.pyint()) - dsmmDataQualityAssessment = lazy_attribute(lambda x: fake.pyint()) - dsmmDataQualityAssurance = lazy_attribute(lambda x: fake.pyint()) - dsmmDataQualityControlMonitoring = lazy_attribute(lambda x: fake.pyint()) - dsmmPreservability = lazy_attribute(lambda x: fake.pyint()) - dsmmProductionSustainability = lazy_attribute(lambda x: fake.pyint()) - dsmmTransparencyTraceability = lazy_attribute(lambda x: fake.pyint()) - dsmmUsability = lazy_attribute(lambda x: fake.pyint()) - dsmmAverage = lazy_attribute(lambda x: fake.pyfloat()) - updateFrequency = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - presentationForm = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - services = lazy_attribute(lambda x: [ServiceFactory() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/error_event_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/error_event_factory.py deleted file mode 100644 index dbf6280..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/error_event_factory.py +++ /dev/null @@ -1,16 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import ErrorEvent -from psiSchemaClasses_test import fake - - -class ErrorEventFactory(Factory): - class Meta: - model = ErrorEvent - title = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - detail = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - status = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - code = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - source = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_information_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_information_factory.py deleted file mode 100644 index 1da43dd..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_information_factory.py +++ /dev/null @@ -1,18 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import FileInformation -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ChecksumFactory - - -class FileInformationFactory(Factory): - class Meta: - model = FileInformation - name = lazy_attribute(lambda x: fake.pystr()) - size = lazy_attribute(lambda x: fake.pyint()) - checksums = lazy_attribute(lambda x: [ChecksumFactory() for _ in range(randint(1, 5))]) - format = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - headers = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - optionalAttributes = lazy_attribute(lambda x: {fake.pystr(): fake.pystr() for _ in range(randint(3, 10))}) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_factory.py deleted file mode 100644 index 9298690..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_factory.py +++ /dev/null @@ -1,22 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import FileLocation -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import \ - FileLocationTypeFactory - - -class FileLocationFactory(Factory): - class Meta: - model = FileLocation - uri = lazy_attribute(lambda x: fake.pystr()) - type = lazy_attribute(lambda x: [FileLocationTypeFactory(), None][randint(0, 1)]) - deleted = lazy_attribute(lambda x: fake.pybool()) - restricted = lazy_attribute(lambda x: fake.pybool()) - asynchronous = lazy_attribute(lambda x: fake.pybool()) - locality = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - lastModified = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - serviceType = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - optionalAttributes = lazy_attribute(lambda x: {fake.pystr(): fake.pystr() for _ in range(randint(3, 10))}) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_type_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_type_factory.py deleted file mode 100644 index fa9352f..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/file_location_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import FileLocationType - - -class FileLocationTypeFactory(Factory): - class Meta: - model = FileLocationType - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_event_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_event_factory.py deleted file mode 100644 index cc3c6a4..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_event_factory.py +++ /dev/null @@ -1,18 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import InputEvent -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ( - MethodFactory, OperationTypeFactory) - - -class InputEventFactory(Factory): - class Meta: - model = InputEvent - timestamp = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - method = lazy_attribute(lambda x: [MethodFactory(), None][randint(0, 1)]) - source = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - operation = lazy_attribute(lambda x: [OperationTypeFactory(), None][randint(0, 1)]) - failedState = lazy_attribute(lambda x: fake.pybool()) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_factory.py deleted file mode 100644 index d855377..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/input_factory.py +++ /dev/null @@ -1,19 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Input -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ( - MethodFactory, OperationTypeFactory, RecordTypeFactory) - - -class InputFactory(Factory): - class Meta: - model = Input - type = lazy_attribute(lambda x: [RecordTypeFactory(), None][randint(0, 1)]) - content = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - contentType = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - method = lazy_attribute(lambda x: [MethodFactory(), None][randint(0, 1)]) - source = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - operation = lazy_attribute(lambda x: [OperationTypeFactory(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/link_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/link_factory.py deleted file mode 100644 index fc9057e..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/link_factory.py +++ /dev/null @@ -1,16 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Link -from psiSchemaClasses_test import fake - - -class LinkFactory(Factory): - class Meta: - model = Link - linkName = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - linkProtocol = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - linkUrl = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - linkDescription = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - linkFunction = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/method_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/method_factory.py deleted file mode 100644 index 14e2eb1..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/method_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Method - - -class MethodFactory(Factory): - class Meta: - model = Method - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/operation_type_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/operation_type_factory.py deleted file mode 100644 index d5e5116..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/operation_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import OperationType - - -class OperationTypeFactory(Factory): - class Meta: - model = OperationType - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_factory.py deleted file mode 100644 index 47b0d9a..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_factory.py +++ /dev/null @@ -1,22 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import ParsedRecord -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ( - AnalysisFactory, DiscoveryFactory, ErrorEventFactory, - FileInformationFactory, FileLocationFactory, PublishingFactory, - RecordTypeFactory, RelationshipFactory) - - -class ParsedRecordFactory(Factory): - class Meta: - model = ParsedRecord - type = lazy_attribute(lambda x: [RecordTypeFactory(), None][randint(0, 1)]) - discovery = lazy_attribute(lambda x: [DiscoveryFactory(), None][randint(0, 1)]) - analysis = lazy_attribute(lambda x: [AnalysisFactory(), None][randint(0, 1)]) - fileInformation = lazy_attribute(lambda x: [FileInformationFactory(), None][randint(0, 1)]) - fileLocations = lazy_attribute(lambda x: {fake.pystr(): FileLocationFactory() for _ in range(randint(3, 10))}) - publishing = lazy_attribute(lambda x: PublishingFactory()) - relationships = lazy_attribute(lambda x: [RelationshipFactory() for _ in range(randint(1, 5))]) - errors = lazy_attribute(lambda x: [ErrorEventFactory() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_with_id_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_with_id_factory.py deleted file mode 100644 index 9d95eb9..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/parsed_record_with_id_factory.py +++ /dev/null @@ -1,13 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import ParsedRecordWithId -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import \ - ParsedRecordFactory - - -class ParsedRecordWithIdFactory(Factory): - class Meta: - model = ParsedRecordWithId - id = lazy_attribute(lambda x: fake.pystr()) - record = lazy_attribute(lambda x: ParsedRecordFactory()) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/publishing_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/publishing_factory.py deleted file mode 100644 index dfeb3bd..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/publishing_factory.py +++ /dev/null @@ -1,13 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Publishing -from psiSchemaClasses_test import fake - - -class PublishingFactory(Factory): - class Meta: - model = Publishing - isPrivate = lazy_attribute(lambda x: fake.pybool()) - until = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/record_type_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/record_type_factory.py deleted file mode 100644 index 012f335..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/record_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import RecordType -from psiSchemaClasses_test import fake - - -class RecordTypeFactory(Factory): - class Meta: - model = RecordType - value = lazy_attribute(lambda x: fake.enum_with_schema(RecordType)) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/reference_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/reference_factory.py deleted file mode 100644 index ce55525..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/reference_factory.py +++ /dev/null @@ -1,15 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Reference -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import LinkFactory - - -class ReferenceFactory(Factory): - class Meta: - model = Reference - title = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - date = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - links = lazy_attribute(lambda x: [LinkFactory() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_factory.py deleted file mode 100644 index 4a910f0..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_factory.py +++ /dev/null @@ -1,15 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import Relationship -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import \ - RelationshipTypeFactory - - -class RelationshipFactory(Factory): - class Meta: - model = Relationship - type = lazy_attribute(lambda x: [RelationshipTypeFactory(), None][randint(0, 1)]) - id = lazy_attribute(lambda x: fake.pystr()) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_type_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_type_factory.py deleted file mode 100644 index 8f10e24..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/relationship_type_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import RelationshipType - - -class RelationshipTypeFactory(Factory): - class Meta: - model = RelationshipType - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/responsible_party_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/responsible_party_factory.py deleted file mode 100644 index 12bc55a..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/responsible_party_factory.py +++ /dev/null @@ -1,17 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import ResponsibleParty -from psiSchemaClasses_test import fake - - -class ResponsiblePartyFactory(Factory): - class Meta: - model = ResponsibleParty - individualName = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - organizationName = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - positionName = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - role = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - email = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - phone = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/temporal_bounding_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/temporal_bounding_factory.py deleted file mode 100644 index 1bc7b7b..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/temporal_bounding_factory.py +++ /dev/null @@ -1,18 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import TemporalBounding -from psiSchemaClasses_test import fake - - -class TemporalBoundingFactory(Factory): - class Meta: - model = TemporalBounding - beginDate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - beginIndeterminate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - endDate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - endIndeterminate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instant = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instantIndeterminate = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - description = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/valid_descriptor_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/valid_descriptor_factory.py deleted file mode 100644 index 0152b7d..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/psi/valid_descriptor_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses.org.cedar.schemas.avro.psi import ValidDescriptor - - -class ValidDescriptorFactory(Factory): - class Meta: - model = ValidDescriptor - - pass diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/platform_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/platform_factory.py deleted file mode 100644 index ada2e39..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/platform_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import Platform -from psiSchemaClasses_test import fake - - -class PlatformFactory(Factory): - class Meta: - model = Platform - platformIdentifier = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - platformDescription = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - platformSponsor = lazy_attribute(lambda x: [fake.pystr() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/service_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/service_factory.py deleted file mode 100644 index bb81b12..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/service_factory.py +++ /dev/null @@ -1,20 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import Service -from psiSchemaClasses_test import fake -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import ( - LinkFactory, ResponsiblePartyFactory) - - -class ServiceFactory(Factory): - class Meta: - model = Service - title = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - alternateTitle = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - description = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - date = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - dateType = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - pointOfContact = lazy_attribute(lambda x: [ResponsiblePartyFactory(), None][randint(0, 1)]) - operations = lazy_attribute(lambda x: [LinkFactory() for _ in range(randint(1, 5))]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/spatial_bounding_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/spatial_bounding_analysis_factory.py deleted file mode 100644 index 45fe832..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/spatial_bounding_analysis_factory.py +++ /dev/null @@ -1,14 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import SpatialBoundingAnalysis -from psiSchemaClasses_test import fake - - -class SpatialBoundingAnalysisFactory(Factory): - class Meta: - model = SpatialBoundingAnalysis - spatialBoundingExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - isValid = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - validationError = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/temporal_bounding_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/temporal_bounding_analysis_factory.py deleted file mode 100644 index bdb6c93..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/temporal_bounding_analysis_factory.py +++ /dev/null @@ -1,46 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import TemporalBoundingAnalysis -from psiSchemaClasses_test import fake -from psiSchemaClasses_test. import TimeRangeDescriptorFactory -from psiSchemaClasses_test.org.cedar.schemas.avro.psi import \ - ValidDescriptorFactory - - -class TemporalBoundingAnalysisFactory(Factory): - class Meta: - model = TemporalBoundingAnalysis - beginDescriptor = lazy_attribute(lambda x: [ValidDescriptorFactory(), None][randint(0, 1)]) - beginPrecision = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - beginIndexable = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - beginZoneSpecified = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - beginUtcDateTimeString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - beginYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - beginDayOfYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - beginDayOfMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - beginMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - endDescriptor = lazy_attribute(lambda x: [ValidDescriptorFactory(), None][randint(0, 1)]) - endPrecision = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - endIndexable = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - endZoneSpecified = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - endUtcDateTimeString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - endYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - endDayOfYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - endDayOfMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - endMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantDescriptor = lazy_attribute(lambda x: [ValidDescriptorFactory(), None][randint(0, 1)]) - instantPrecision = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instantIndexable = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - instantZoneSpecified = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instantUtcDateTimeString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instantEndUtcDateTimeString = lazy_attribute(lambda x: [fake.pystr(), None][randint(0, 1)]) - instantYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantDayOfYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantEndDayOfYear = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantDayOfMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantEndDayOfMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - instantEndMonth = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - rangeDescriptor = lazy_attribute(lambda x: [TimeRangeDescriptorFactory(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/testing_classes.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/testing_classes.py deleted file mode 100644 index 1532f68..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/testing_classes.py +++ /dev/null @@ -1,48 +0,0 @@ -from pyavro_gen.codewriters.namespace import ClassItem - -test_classes = [ - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'TemporalBounding', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'TemporalBoundingFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Link', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'LinkFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'ResponsibleParty', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ResponsiblePartyFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Reference', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ReferenceFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'RecordType', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'RecordTypeFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'FileInformation', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'FileInformationFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Publishing', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'PublishingFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'ErrorEvent', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ErrorEventFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'ValidDescriptor', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ValidDescriptorFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Method', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'MethodFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'OperationType', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'OperationTypeFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'FileLocationType', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'FileLocationTypeFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Checksum', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ChecksumFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'ChecksumAlgorithm', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ChecksumAlgorithmFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'RelationshipType', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'RelationshipTypeFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'ParsedRecordWithId', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ParsedRecordWithIdFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'FileLocation', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'FileLocationFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Relationship', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'RelationshipFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'InputEvent', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'InputEventFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Input', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'InputFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'AggregatedInput', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'AggregatedInputFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Analysis', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'AnalysisFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'Discovery', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'DiscoveryFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.psi', 'ParsedRecord', 'psiSchemaClasses_test.org.cedar.schemas.avro.psi', 'ParsedRecordFactory'), - ClassItem('psiSchemaClasses.', 'KeywordsElement', 'psiSchemaClasses_test.', 'KeywordsElementFactory'), - ClassItem('psiSchemaClasses.', 'Instruments', 'psiSchemaClasses_test.', 'InstrumentsFactory'), - ClassItem('psiSchemaClasses.', 'Operation', 'psiSchemaClasses_test.', 'OperationFactory'), - ClassItem('psiSchemaClasses.', 'Platform', 'psiSchemaClasses_test.', 'PlatformFactory'), - ClassItem('psiSchemaClasses.', 'DataFormat', 'psiSchemaClasses_test.', 'DataFormatFactory'), - ClassItem('psiSchemaClasses.', 'IdentificationAnalysis', 'psiSchemaClasses_test.', 'IdentificationAnalysisFactory'), - ClassItem('psiSchemaClasses.', 'TitleAnalysis', 'psiSchemaClasses_test.', 'TitleAnalysisFactory'), - ClassItem('psiSchemaClasses.', 'DescriptionAnalysis', 'psiSchemaClasses_test.', 'DescriptionAnalysisFactory'), - ClassItem('psiSchemaClasses.', 'DataAccessAnalysis', 'psiSchemaClasses_test.', 'DataAccessAnalysisFactory'), - ClassItem('psiSchemaClasses.', 'ThumbnailAnalysis', 'psiSchemaClasses_test.', 'ThumbnailAnalysisFactory'), - ClassItem('psiSchemaClasses.', 'SpatialBoundingAnalysis', 'psiSchemaClasses_test.', 'SpatialBoundingAnalysisFactory'), - ClassItem('psiSchemaClasses.', 'TimeRangeDescriptor', 'psiSchemaClasses_test.', 'TimeRangeDescriptorFactory'), - ClassItem('psiSchemaClasses.', 'Service', 'psiSchemaClasses_test.', 'ServiceFactory'), - ClassItem('psiSchemaClasses.', 'TemporalBoundingAnalysis', 'psiSchemaClasses_test.', 'TemporalBoundingAnalysisFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.geojson', 'Point', 'psiSchemaClasses_test.org.cedar.schemas.avro.geojson', 'PointFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.geojson', 'MultiPoint', 'psiSchemaClasses_test.org.cedar.schemas.avro.geojson', 'MultiPointFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.geojson', 'LineString', 'psiSchemaClasses_test.org.cedar.schemas.avro.geojson', 'LineStringFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.geojson', 'MultiLineString', 'psiSchemaClasses_test.org.cedar.schemas.avro.geojson', 'MultiLineStringFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.geojson', 'Polygon', 'psiSchemaClasses_test.org.cedar.schemas.avro.geojson', 'PolygonFactory'), - ClassItem('psiSchemaClasses.org.cedar.schemas.avro.geojson', 'MultiPolygon', 'psiSchemaClasses_test.org.cedar.schemas.avro.geojson', 'MultiPolygonFactory'), -] diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/thumbnail_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/thumbnail_analysis_factory.py deleted file mode 100644 index 7d94baf..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/thumbnail_analysis_factory.py +++ /dev/null @@ -1,12 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import ThumbnailAnalysis -from psiSchemaClasses_test import fake - - -class ThumbnailAnalysisFactory(Factory): - class Meta: - model = ThumbnailAnalysis - thumbnailExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/time_range_descriptor_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/time_range_descriptor_factory.py deleted file mode 100644 index e1c1574..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/time_range_descriptor_factory.py +++ /dev/null @@ -1,10 +0,0 @@ -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import TimeRangeDescriptor -from psiSchemaClasses_test import fake - - -class TimeRangeDescriptorFactory(Factory): - class Meta: - model = TimeRangeDescriptor - value = lazy_attribute(lambda x: fake.enum_with_schema(TimeRangeDescriptor)) diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/title_analysis_factory.py b/onestop-python-client/onestop/schemas/psiSchemaClasses_test/title_analysis_factory.py deleted file mode 100644 index e6f5a1f..0000000 --- a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/title_analysis_factory.py +++ /dev/null @@ -1,19 +0,0 @@ -from random import randint - -from factory import Factory, lazy_attribute - -from psiSchemaClasses. import TitleAnalysis -from psiSchemaClasses_test import fake - - -class TitleAnalysisFactory(Factory): - class Meta: - model = TitleAnalysis - titleExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - titleCharacters = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - alternateTitleExists = lazy_attribute(lambda x: [fake.pybool(), None][randint(0, 1)]) - alternateTitleCharacters = lazy_attribute(lambda x: [fake.pyint(), None][randint(0, 1)]) - titleFleschReadingEaseScore = lazy_attribute(lambda x: [fake.pyfloat(), None][randint(0, 1)]) - alternateTitleFleschReadingEaseScore = lazy_attribute(lambda x: [fake.pyfloat(), None][randint(0, 1)]) - titleFleschKincaidReadingGradeLevel = lazy_attribute(lambda x: [fake.pyfloat(), None][randint(0, 1)]) - alternateTitleFleschKincaidReadingGradeLevel = lazy_attribute(lambda x: [fake.pyfloat(), None][randint(0, 1)]) diff --git a/onestop-python-client/onestop/schemas/util/jsonEncoder.py b/onestop-python-client/onestop/schemas/util/jsonEncoder.py index d324b88..e3f13e1 100644 --- a/onestop-python-client/onestop/schemas/util/jsonEncoder.py +++ b/onestop-python-client/onestop/schemas/util/jsonEncoder.py @@ -1,11 +1,36 @@ import json -from enum import Enum + from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.record_type import RecordType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.valid_descriptor import ValidDescriptor +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.operation_type import OperationType +from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location_type import FileLocationType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.method import Method +from onestop.schemas.geojsonSchemaClasses.line_string_type import LineStringType +from onestop.schemas.geojsonSchemaClasses.multi_line_string_type import MultiLineStringType +from onestop.schemas.geojsonSchemaClasses.multi_point_type import MultiPointType +from onestop.schemas.geojsonSchemaClasses.multi_polygon_type import MultiPolygonType +from onestop.schemas.geojsonSchemaClasses.point_type import PointType +from onestop.schemas.geojsonSchemaClasses.polygon_type import PolygonType # Diction of all the Enum Classes ENUMS = { 'ChecksumAlgorithm': ChecksumAlgorithm, - + 'RelationshipType': RelationshipType, + 'RecordType': RecordType, + 'TimeRangeDescriptor': TimeRangeDescriptor, + 'LineStringType': LineStringType, + 'Method': Method, + 'MultiLineStringType': MultiLineStringType, + 'MultiPointType': MultiPointType, + 'MultiPolygonType': MultiPolygonType, + 'OperationType': OperationType, + 'PointType': PointType, + 'PolygonType': PolygonType, + 'FileLocationType': FileLocationType, + 'ValidDescriptor': ValidDescriptor } # Used as an argument in json.dumps, transform Enum instance for later use diff --git a/onestop-python-client/onestop/util/S3MessageAdapter.py b/onestop-python-client/onestop/util/S3MessageAdapter.py index d640b77..593f29e 100644 --- a/onestop-python-client/onestop/util/S3MessageAdapter.py +++ b/onestop-python-client/onestop/util/S3MessageAdapter.py @@ -1,101 +1,85 @@ -import yaml from onestop.util.ClientLogger import ClientLogger -""" -from onestop.info.ImMessage import ImMessage -from onestop.info.FileMessage import FileMessage -from onestop.info.Link import Link -""" from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord, Publishing, ErrorEvent from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location import FileLocation,FileLocationType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_information import FileInformation from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum import Checksum, ChecksumAlgorithm -from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship import Relationship, RelationshipType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship import Relationship +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.discovery import Discovery, Link - class S3MessageAdapter: """ A class used to extract information from sqs messages that have been triggered by s3 events and transform it into correct format for publishing to IM Registry Attributes ---------- - conf: yaml file - csb-data-stream-config.yml - s3_utils: S3Utils object - used to access objects inside of s3 buckets - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated - prefix_mapping: Dict - contains mapping of various line offices and their associated collection id + access_bucket: str + Cloud bucket to put in the links field when transformed. + type: str + COLLECTION or GRANULE + file_id_prefix: str + File prefix returned as fileIdentifier + collection_id: str + Collection this data belongs to. Returned as parent identifier. + log_level: str + The log level to use for this class (Defaults to 'INFO') - Methods - ------- - collection_id_map(s3_key) - given an s3 key that contains one of the NESDIS line offices in its path, it will provide the corresponding collection id - - transform(recs) - transforms sqs message triggered by s3 event to correct format for publishing to IM registry - """ - def __init__(self, conf_loc, s3_utils): - """ - - :param conf_loc: yaml file - csb-data-stream-config.yml - :param s3_utils: S3Utils object - used to access objects inside of s3 buckets - - Other Attributes - ---------------- logger: ClientLogger object utilizes python logger library and creates logging for our specific needs logger.info: ClientLogger object logging statement that occurs when the class is instantiated - prefix_mapping: Dict - contains mapping of various line offices and their associated collection id + Methods + ------- + transform(recs) + transforms sqs message triggered by s3 event to correct format for publishing to IM registry + """ + def __init__(self, access_bucket, s3_message_adapter_metadata_type, file_id_prefix, collection_id, log_level = 'INFO', **wildargs): """ - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False) - self.logger.info("Initializing " + self.__class__.__name__) - self.s3_utils = s3_utils - - self.prefix_mapping = self.conf['prefixMap'] + Parameters + ---------- + access_bucket: str + access bucket to put in the links field when transformed. + s3_message_adapter_metadata_type: str + COLLECTION or GRANULE + file_id_prefix: str + File prefix returned as fileIdentifier + collection_id: str + Collection this data belongs to. Returned as parent identifier. + log_level: str + Log level for when logging in class. - def collection_id_map(self, s3_key): """ - Given an s3 key that contains one of the NESDIS line offices in its path, it will provide the corresponding collection id - - :param s3_key: str - key path of object in s3 bucket + self.access_bucket = access_bucket + self.metadata_type = s3_message_adapter_metadata_type.upper() + self.file_id_prefix = file_id_prefix + self.collection_id = collection_id + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) + self.logger.info("Initializing " + self.__class__.__name__) - :return: str - associated line office collection id - """ - # Looks through our prefix map and returns appropriate collection id - for key in self.prefix_mapping: - if key in s3_key: - return self.prefix_mapping[key] + if self.metadata_type not in ['COLLECTION', 'GRANULE']: + raise ValueError("metadata_type of '%s' must be 'COLLECTION' or 'GRANULE'"%(self.metadata_type)) + if wildargs: + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) - def transform(self, recs): + def transform(self, rec): """ - Transforms sqs message triggered by s3 event to correct format for publishing to IM registry + Transforms a single sqs message, triggered by a s3 event, to correct format for publishing to IM registry - :param recs: dict - sqs event message + Parameters: + ---------- + rec: dict + Single record in a sqs event message to transform :return: ParsedRecord Object The Parsed Record class is an avro schema generated class """ - self.logger.info("Transform!") - rec = recs[0] # This is standard format 1 record per message for now according to AWS docs + self.logger.info("Transforming Record") + self.logger.debug("Record: %s"%rec) s3_bucket = rec['s3']['bucket']['name'] s3_key = rec['s3']['object']['key'] @@ -104,21 +88,25 @@ def transform(self, recs): checkSumAlgorithm = ChecksumAlgorithm(value='MD5') alg_value = rec['s3']['object']['eTag'] checkSum = Checksum(algorithm=checkSumAlgorithm, value=alg_value) - checkSum_dict = checkSum.to_dict() file_name = str(s3_key)[pos:] file_size = rec['s3']['object']['size'] fileInformation = FileInformation(name=file_name, size=file_size, checksums=[checkSum], optionalAttributes={}) # Relationship - relationshipType = RelationshipType(type=self.conf['type']) - relationship = Relationship(id=self.conf['collection_id'], type=relationshipType) + relationship = Relationship(id=self.collection_id, type=RelationshipType(self.metadata_type)) # File Location - fileLocationType = FileLocationType(type='ARCHIVE') + fileLocationType = FileLocationType(FileLocationType.ARCHIVE) s3_obj_uri = "s3://" + s3_bucket + "/" + s3_key - fileLocation = FileLocation(uri=s3_obj_uri, type=fileLocationType, deleted=False, restricted=True, - asynchronous=False, serviceType='Amazon:AWS:S3', optionalAttributes={}) + fileLocation = {s3_obj_uri: + FileLocation(uri=s3_obj_uri, + type=fileLocationType, + deleted=False, + restricted=True, + asynchronous=False, + serviceType='Amazon:AWS:S3', + optionalAttributes={})} # Error Event errorEvent = ErrorEvent() @@ -127,12 +115,12 @@ def transform(self, recs): publishing = Publishing(isPrivate=True) # Discovery - access_obj_uri = self.conf['access_bucket'] + "/" + s3_key + access_obj_uri = self.access_bucket + "/" + s3_key link1 = Link(linkName="Amazon S3", linkUrl=access_obj_uri, linkProtocol="HTTPS", linkFunction="download") link2 = Link(linkName="Amazon S3", linkUrl=s3_obj_uri, linkProtocol="Amazon:AWS:S3", linkFunction="download") # To Change? Come back to this later - parent_identifier = self.conf['collection_id'] - file_identifier = self.conf['file_identifier_prefix'] + file_name[:-4] + parent_identifier = self.collection_id + file_identifier = self.file_id_prefix + file_name[:-4] # Initializing most fields to their default values in the avro schema so that it doesn't cause an error in Kafka discovery = Discovery(links=[link1, link2], title=file_name, parentIdentifier=parent_identifier, @@ -141,7 +129,9 @@ def transform(self, recs): legalConstraints=[], dsmmAccessibility=0, dsmmDataIntegrity=0, dsmmDataQualityAssessment=0, dsmmDataQualityAssurance=0, dsmmDataQualityControlMonitoring=0, dsmmPreservability=0, dsmmProductionSustainability=0, dsmmTransparencyTraceability=0, dsmmUsability=0, dsmmAverage=0.0, services=[]) - +#fileLocations=dict +#fileLocations={'nesdis-ncei-csb-dev/csv/file2.csv': new FileLocation(...)} +#integration/test_WebPublisher parsedRecord = ParsedRecord(fileInformation=fileInformation, fileLocations=fileLocation, relationships=[relationship], errors=[errorEvent], publishing=publishing, discovery=discovery) diff --git a/onestop-python-client/onestop/util/S3Utils.py b/onestop-python-client/onestop/util/S3Utils.py index 7bb0fbe..e654df9 100644 --- a/onestop-python-client/onestop/util/S3Utils.py +++ b/onestop-python-client/onestop/util/S3Utils.py @@ -1,5 +1,5 @@ import logging -import yaml + import uuid import boto3 import botocore @@ -15,100 +15,106 @@ class S3Utils: Attributes ---------- - conf: yaml file - aws-util-config-dev.yml - cred: yaml file - credentials.yml - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated + access_key: str + Cloud access key - Methods - ------- - connect(client_type, region) - connects to a boto3 client + secret_key: str + Cloud secret key - objectkey_exists(bucket, s3_key) - checks to see if a s3 key path exists in a particular bucket + log_level: str + The log level to use for this class (Defaults to 'INFO') - get_uuid_metadata(boto_client, bucket, s3_key) - returns metadata uuid of an s3 object if it has one, otherwise prints that one does not exist + logger: ClientLogger object + Creates logging for us to log to. - add_uuid_metadata(boto_client, bucket, s3_key) - adds metadata uuid to an s3 object + Methods + ------- + connect(client_type, region) + connects to a boto3 service - upload_s3(boto_client, local_file, bucket, s3_key, overwrite) - uploads a file to s3 bucket + objectkey_exists(bucket, s3_key) + checks to see if a s3 key path exists in a particular bucket - get_csv_s3(boto_client, bucket, key) - gets a csv file from s3 bucket using smart open library + get_uuid_metadata(boto_client, bucket, s3_key) + returns metadata uuid of an s3 object if it has one, otherwise prints that one does not exist - read_bytes_s3(boto_client, bucket, key) - returns raw information of s3 object + add_uuid_metadata(boto_client, bucket, s3_key) + adds metadata uuid to an s3 object - upload_archive(boto_client, vault_name, src_data) - Add an archive to an Amazon S3 Glacier vault. The upload occurs synchronously. + upload_s3(boto_client, local_file, bucket, s3_key, overwrite) + uploads a file to s3 bucket - s3_to_glacier(boto_client, bucket_name, key) - Changes storage class of s3 object from s3 -> glacier. Utilizes s3 client type + get_csv_s3(boto_client, bucket, key) + gets a csv file from s3 bucket using smart open library - s3_to_glacier_object_lock(boto_client, bucket_name, key, object_lock_mode, object_lock_retention) - Changes storage class of s3 object from s3 -> glacier and places it in object lock mode. Utilizes s3 client type + read_bytes_s3(boto_client, bucket, key) + returns raw information of s3 object - s3_restore(boto_client, bucket_name, key, days) - Restores an object in S3 glacier back to S3 for specified amount of days + upload_archive(boto_client, vault_name, src_data) + Add an archive to an Amazon S3 Glacier vault. The upload occurs synchronously. - retrieve_inventory(boto_client, vault_name) - Initiate an Amazon Glacier inventory-retrieval job + s3_to_glacier(boto_client, bucket_name, key) + Changes storage class of s3 object from s3 -> glacier. Utilizes s3 client type - retrieve_inventory_results(vault_name, boto_client, job_id) - Retrieve the results of an Amazon Glacier inventory-retrieval job - """ - conf = None + s3_to_glacier_object_lock(boto_client, bucket_name, key, object_lock_mode, object_lock_retention) + Changes storage class of s3 object from s3 -> glacier and places it in object lock mode. Utilizes s3 client type - def __init__(self, conf_loc, cred_loc): + s3_restore(boto_client, bucket_name, key, days) + Restores an object in S3 glacier back to S3 for specified amount of days - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) + retrieve_inventory(boto_client, vault_name) + Initiate an Amazon Glacier inventory-retrieval job - with open(cred_loc) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) + retrieve_inventory_results(vault_name, boto_client, job_id) + Retrieve the results of an Amazon Glacier inventory-retrieval job + """ - self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False) + def __init__(self, access_key, secret_key, log_level = 'INFO', **wildargs): + self.access_key = access_key + self.secret_key = secret_key + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) - def connect(self, client_type, region): + if wildargs: + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) + + def connect(self, type, service_name, region): """ - Connects to a boto3 client + Connects to a boto3 of specified type using the credentials provided in the constructor. - :param client_type: str - boto client type in which you want to access + :param type: str + boto object type to return, see return type. + :param service_name: str + (Optional for session type) boto service name in which you want to access :param region: str - name of aws region you want to access + (Optional for session type) name of aws region you want to access - :return: boto3 client - dependent on the client_type parameter + :return: boto3 connection object + A boto3 connection object; Client, Session, or Resource. """ - - if client_type == "s3": - boto = boto3.client("s3", aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key'], region_name=region) - - if client_type == "s3_resource": - boto = boto3.resource("s3", region_name=region, aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key'] ) - - if client_type == "glacier": - boto = boto3.client("glacier", region_name=region, aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key']) - - if client_type == "session": - boto = boto3.Session( - aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key'], + type = type.lower() + if type == 'session': + return boto3.Session( + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, + region_name=region + ) + elif type == 'client': + return boto3.client( + service_name, + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, + region_name=region + ) + elif type == 'resource': + return boto3.resource( + service_name, + region_name=region, + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key ) - return boto + else: + raise Exception('Unknown boto3 type of "%s"'%(type)) def objectkey_exists(self, bucket, s3_key): """ @@ -214,7 +220,7 @@ def upload_s3(self, boto_client, local_file, bucket, s3_key, overwrite): obj_uuid = str(uuid.uuid4()) if not overwrite: - key_exists = self.objectkey_exists(boto_client, bucket, s3_key) + key_exists = self.objectkey_exists(bucket, s3_key) if (not key_exists) or (key_exists and overwrite): try: @@ -226,11 +232,11 @@ def upload_s3(self, boto_client, local_file, bucket, s3_key, overwrite): self.logger.error("File to upload was not found. Path: "+local_file) return False - def get_csv_s3(self, boto_client, bucket, key): + def get_csv_s3(self, boto_session, bucket, key): """ gets a csv file from s3 bucket using smart open library - :param boto_client: session + :param boto_session: session utilizes boto session type :param bucket: str name of bucket @@ -240,7 +246,7 @@ def get_csv_s3(self, boto_client, bucket, key): :return: smart open file """ url = "s3://" + bucket + "/" + key - sm_open_file = sm_open(url, 'r', transport_params={'session': boto_client}) + sm_open_file = sm_open(url, 'r', transport_params={'session': boto_session}) return sm_open_file def read_bytes_s3(self, boto_client, bucket, key): @@ -386,7 +392,6 @@ def s3_restore(self, boto_client, bucket_name, key, days): # returns status of object retrieval return obj.restore - def retrieve_inventory(self, boto_client, vault_name): """ Initiate an Amazon Glacier inventory-retrieval job diff --git a/onestop-python-client/onestop/util/SqsConsumer.py b/onestop-python-client/onestop/util/SqsConsumer.py index f782cc5..b0449d0 100644 --- a/onestop-python-client/onestop/util/SqsConsumer.py +++ b/onestop-python-client/onestop/util/SqsConsumer.py @@ -1,10 +1,7 @@ -import logging -from datetime import datetime, timezone -import yaml -import boto3 import json -from onestop.util.ClientLogger import ClientLogger +from datetime import datetime, timezone +from onestop.util.ClientLogger import ClientLogger class SqsConsumer: """ @@ -12,116 +9,107 @@ class SqsConsumer: Attributes ---------- - conf: yaml file - aws-util-config-dev.yml - cred: yaml file - credentials.yml - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated + logger: ClientLogger object + utilizes python logger library and creates logging for our specific needs Methods ------- - connect() - connects a boto sqs instance based on configurations in conf and cred yml files - - receive_messages(queue, sqs_max_polls, cb) - polls for messages in the queue + receive_messages(sqs_client, sqs_queue_name, sqs_max_polls, cb) + polls for messages in the queue """ - conf = None - def __init__(self, conf_loc, cred_loc): + def __init__(self, log_level = 'INFO', **wildargs): """ - - :param conf_loc: yaml file - aws-util-config-dev.yml - :param cred_loc: yaml file - credentials.yml - - Other Attributes - ---------------- - logger: ClientLogger object - utilizes python logger library and creates logging for our specific needs - logger.info: ClientLogger object - logging statement that occurs when the class is instantiated - + Attributes + ---------- + log_level: str + The log level to use for this class (Defaults to 'INFO') """ - with open(conf_loc) as f: - self.conf = yaml.load(f, Loader=yaml.FullLoader) - - with open(cred_loc) as f: - self.cred = yaml.load(f, Loader=yaml.FullLoader) - - self.logger = ClientLogger.get_logger(self.__class__.__name__, self.conf['log_level'], False) + self.log_level = log_level + self.logger = ClientLogger.get_logger(self.__class__.__name__, log_level, False) self.logger.info("Initializing " + self.__class__.__name__) - def connect(self): - """ - Connects a boto sqs instance based on configurations in conf and cred yml files + if wildargs: + self.logger.debug("Superfluous parameters in constructor call: " + str(wildargs)) - :return: boto sqs - returns instance of boto sqs resource + def connect(self, sqs_resource, sqs_queue_name): + """ + Gets a boto SQS.Queue resource. + :param sqs_resource: boto SQS.Resource + SQS resource to create the queue from. + :param sqs_queue_name: str + SQS queue name to create and return a boto SQS.Queue object to. + :return: SQS.Queue + An SQS.Queue resource to use for Queue operations. """ - boto_session = boto3.Session(aws_access_key_id=self.cred['sandbox']['access_key'], - aws_secret_access_key=self.cred['sandbox']['secret_key']) - # Get the queue. This returns an SQS.Queue instance - sqs_session = boto_session.resource('sqs', region_name=self.conf['s3_region']) - sqs_queue = sqs_session.Queue(self.conf['sqs_url']) - self.logger.info("Connecting to " + self.conf['sqs_url']) - return sqs_queue - - def receive_messages(self, queue, sqs_max_polls, cb): + return sqs_resource.create_queue(QueueName=sqs_queue_name) + + def receive_messages(self, sqs_queue, sqs_max_polls, cb): """ - Polls for messages in the queue + Polls for messages from an sqs queue - :param queue: boto sqs resource - instance of boto sqs resource given from connect() + :param sqs_queue: boto SQS.Queue object + boto SQS Queue object. Can be generated by the method in this class. :param sqs_max_polls: int number of polls :param cb: function call back function - :return: Dependent on the call back function + :return: If the Message has a Records key then the call back function gets called on a single message. """ self.logger.info("Receive messages") + self.logger.info("Polling %d time(s) for SQS messages" % sqs_max_polls) + + if sqs_max_polls < 1: + raise ValueError('Max polling value should be greater than 0.') - i = 1 - while i <= sqs_max_polls: + for i in range(1, sqs_max_polls+1): self.logger.info("Polling attempt: " + str(i)) - i = i + 1 - sqs_messages = queue.receive_messages(MaxNumberOfMessages=10, WaitTimeSeconds=10) + # boto3 SQS.Queue appears to have a subset of SQS.Client methods plus a few management queue ones. + # The ones they do share seem to have different return types. + # The message method names are different and return types different: + # Client.send_message and Queue.send_message and Queue.send_messages + # Client.receive_message and Queue.receive_messages + sqs_messages = sqs_queue.receive_messages( + MaxNumberOfMessages=10, + WaitTimeSeconds=10 + ) self.logger.info("Received %d messages." % len(sqs_messages)) + self.logger.debug("Messages: %s" % sqs_messages) for sqs_message in sqs_messages: - try: - # Log start time - dt_start = datetime.now(tz=timezone.utc) - self.logger.info("Started processing message") + # Log start time + dt_start = datetime.now(tz=timezone.utc) + self.logger.info("Starting message processing") + self.logger.debug("Message: %s" % sqs_message) + self.logger.debug("Message body: %s" % sqs_message.body) + try: message_body = json.loads(sqs_message.body) + self.logger.debug("Message body message: %s" % message_body['Message']) message_content = json.loads(message_body['Message']) - - if 'Records' in message_content: - recs = message_content['Records'] - self.logger.info("Received message") - self.logger.debug('Records: ' + str(recs)) - else: - self.logger.info("s3 event without records content received.") - - sqs_message.delete() - - self.logger.info("The SQS message has been deleted.") - - dt_end = datetime.now(tz=timezone.utc) - processing_time = dt_end - dt_start - - self.logger.info("Completed processing message (s):" + str(processing_time.microseconds * 1000)) - cb(recs) - except: self.logger.exception( "An exception was thrown while processing a message, but this program will continue. The " - "message will not be deleted from the SQS queue. The message was: %s" % sqs_message.body) + "message will not be deleted from the SQS queue. The message was: %s" % sqs_message) + + if 'Records' in message_content: + recs = message_content['Records'] + self.logger.debug('Message "Records": %s' % recs) + if len(recs) > 0: + rec = recs[0] + self.logger.debug('Single Message "Record": %s' % rec) + cb(rec, self.log_level) + else: + self.logger.info("Message body's records is empty.") + else: + self.logger.info("s3 event message without 'Records' content received.") + + dt_end = datetime.now(tz=timezone.utc) + processing_time = dt_end - dt_start + self.logger.info("Completed processing the message in %s seconds."%(processing_time.microseconds / 1000000)) + + sqs_message.delete() + self.logger.info("The SQS message has been deleted.") diff --git a/onestop-python-client/onestop/util/SqsHandlers.py b/onestop-python-client/onestop/util/SqsHandlers.py index 57be8da..2a61f08 100644 --- a/onestop-python-client/onestop/util/SqsHandlers.py +++ b/onestop-python-client/onestop/util/SqsHandlers.py @@ -1,3 +1,8 @@ +import json + +from onestop.util.ClientLogger import ClientLogger +from onestop.schemas.util.jsonEncoder import EnumEncoder + def create_delete_handler(web_publisher): """ Creates a delete function handler to be used with SqsConsumer.receive_messages. @@ -7,21 +12,89 @@ def create_delete_handler(web_publisher): :param: web_publisher: WebPublisher object """ - def delete(records): - if records is None: + def delete(rec, log_level='INFO'): + + logger = ClientLogger.get_logger('SqsHandlers.create_delete_handler.delete', log_level, False) + logger.info("In create_delete_handler.delete() handler") + logger.debug("Record: %s"%rec) + + if not rec or rec is None: + logger.info("Ending handler, record empty, record=%s"%rec) return - record = records[0] - if record['eventName'] != 'ObjectRemoved:Delete': + + if rec['eventName'] != 'ObjectRemoved:Delete': + logger.info("Ending handler, eventName=%s"%rec['eventName']) return - bucket = record['s3']['bucket']['name'] - s3_key = record['s3']['object']['key'] + + logger.info('Attempting to delete record %s'%rec) + + bucket = rec['s3']['bucket']['name'] + s3_key = rec['s3']['object']['key'] s3_url = "s3://" + bucket + "/" + s3_key payload = '{"queries":[{"type": "fieldQuery", "field": "links.linkUrl", "value": "' + s3_url + '"}] }' search_response = web_publisher.search_onestop('granule', payload) + logger.debug('OneStop search response=%s'%search_response) response_json = search_response.json() + logger.debug('OneStop search response json=%s'%response_json) + logger.debug('OneStop search response data=%s'%response_json['data']) if len(response_json['data']) != 0: granule_uuid = response_json['data'][0]['id'] response = web_publisher.delete_registry('granule', granule_uuid) + logger.debug('web_publisher.delete_registry response: %s'%response) return response + logger.warning("OneStop search response has no 'data' field. Response=%s"%response_json) + return delete + +def create_upload_handler(web_publisher, s3_utils, s3_message_adapter): + """ + Creates a upload function handler to be used with SqsConsumer.receive_messages. + + The upload handler function checks the object for a UUID and if one is not found, it will create one for it. + + :param: web_publisher: WebPublisher object + :param: s3_utils: S3Utils object + :param: s3ma: S3MessageAdapter object + + """ + def upload(rec, log_level='DEBUG'): + logger = ClientLogger.get_logger('SqsHandlers.create_upload_handler.upload', log_level, False) + logger.info("In create_upload_handler.upload() handler") + logger.debug("Records: %s"%rec) + + s3_key = rec['s3']['object']['key'] + logger.info("Received message for " + s3_key) + logger.info("Event type: " + rec['eventName']) + bucket = rec['s3']['bucket']['name'] + logger.info("BUCKET: %s"%bucket) + + # Fetch the object's uuid from cloud object, if exists. + s3_resource = s3_utils.connect('resource', 's3', None) + object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) + if object_uuid is not None: + logger.info("Retrieved object-uuid: %s"%object_uuid) + else: + logger.info("Adding uuid") + # Can't add uuid to glacier and should be copied over + if "backup" not in bucket: + object_uuid = s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) + + # Convert s3 message to IM message + im_message = s3_message_adapter.transform(rec) + json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder) + logger.debug('transformed message, json_payload: %s'%json_payload) + + # Send the message to registry + method = 'PATCH' # Backup location should be patched if not backup within bucket name + if "backup" not in bucket: + method = 'POST' + + logger.debug('web_publisher.publish_registry method using "%s" with payload %s'%(method,json_payload)) + registry_response = web_publisher.publish_registry("granule", object_uuid, json_payload, method) + logger.debug('web_publisher.publish_registry response=%s'%registry_response) + logger.debug('web_publisher.publish_registry response json=%s'%registry_response.json()) + + return registry_response + + return upload \ No newline at end of file diff --git a/onestop-python-client/requirements.txt b/onestop-python-client/requirements.txt index 735dad7..75e4f29 100644 --- a/onestop-python-client/requirements.txt +++ b/onestop-python-client/requirements.txt @@ -1,12 +1,13 @@ confluent-kafka -avro-python3 +avro fastavro smart-open PyYAML~=5.3.1 setuptools~=49.2.0 argparse~=1.4.0 -boto3~=1.15.11 +boto~=2.49.0 +boto3~=1.17.71 requests~=2.24.0 -botocore~=1.18.11 -moto==1.3.16.dev122 +botocore~=1.20.71 +moto[all]==2.0.5 undictify diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/__init__.py b/onestop-python-client/test/__init__.py similarity index 100% rename from onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/__init__.py rename to onestop-python-client/test/__init__.py diff --git a/onestop-python-client/tests/data/file1.csv b/onestop-python-client/test/data/file1.csv similarity index 100% rename from onestop-python-client/tests/data/file1.csv rename to onestop-python-client/test/data/file1.csv diff --git a/onestop-python-client/tests/data/file1_s3.csv b/onestop-python-client/test/data/file1_s3.csv similarity index 100% rename from onestop-python-client/tests/data/file1_s3.csv rename to onestop-python-client/test/data/file1_s3.csv diff --git a/onestop-python-client/tests/data/file2.csv b/onestop-python-client/test/data/file2.csv similarity index 100% rename from onestop-python-client/tests/data/file2.csv rename to onestop-python-client/test/data/file2.csv diff --git a/onestop-python-client/tests/data/file3.csv b/onestop-python-client/test/data/file3.csv similarity index 100% rename from onestop-python-client/tests/data/file3.csv rename to onestop-python-client/test/data/file3.csv diff --git a/onestop-python-client/tests/data/file4.csv b/onestop-python-client/test/data/file4.csv similarity index 100% rename from onestop-python-client/tests/data/file4.csv rename to onestop-python-client/test/data/file4.csv diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/__init__.py b/onestop-python-client/test/integration/__init__.py similarity index 100% rename from onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/__init__.py rename to onestop-python-client/test/integration/__init__.py diff --git a/onestop-python-client/tests/test_WebPublisher.py b/onestop-python-client/test/integration/test_WebPublisher.py similarity index 83% rename from onestop-python-client/tests/test_WebPublisher.py rename to onestop-python-client/test/integration/test_WebPublisher.py index c81a7de..5c7935a 100644 --- a/onestop-python-client/tests/test_WebPublisher.py +++ b/onestop-python-client/test/integration/test_WebPublisher.py @@ -2,8 +2,10 @@ import json import unittest import time +import os.path from onestop.WebPublisher import WebPublisher +from os import path class WebPublisherTest(unittest.TestCase): wp = None @@ -56,16 +58,26 @@ class WebPublisherTest(unittest.TestCase): def setUpClass(cls): print("Set it up!") - cred_loc = "../config/credentials.yml" - conf_loc = "../config/csb-data-stream-config-template.yml" - - with open(cred_loc) as f: - creds = yaml.load(f, Loader=yaml.FullLoader) - - registry_username = creds['registry']['username'] - registry_password = creds['registry']['password'] - access_key = creds['sandbox']['access_key'] - access_secret = creds['sandbox']['secret_key'] + cred_loc = "config/credentials.yml" + conf_loc = "config/csb-data-stream-config-template.yml" + + if path.exists(cred_loc): + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Credentials file doesn't exist at '%s', using environment variables."%cred_loc) + registry_username = os.environ.get('REGISTRY_USERNAME') + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + if registry_username == None: + msg = "REGISTRY_USERNAME not defined as env variable. Credentials file at '" + cred_loc + "' doesn't exist." + raise Exception(msg) with open(conf_loc) as f: conf = yaml.load(f, Loader=yaml.FullLoader) diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/__init__.py b/onestop-python-client/test/unit/__init__.py similarity index 100% rename from onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/__init__.py rename to onestop-python-client/test/unit/__init__.py diff --git a/onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/__init__.py b/onestop-python-client/test/unit/extractor/__init__.py similarity index 100% rename from onestop-python-client/onestop/schemas/geojsonSchemaClasses_test/org/cedar/schemas/avro/__init__.py rename to onestop-python-client/test/unit/extractor/__init__.py diff --git a/onestop-python-client/test/unit/extractor/test_CsbExtractor.py b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py new file mode 100644 index 0000000..cba1bf7 --- /dev/null +++ b/onestop-python-client/test/unit/extractor/test_CsbExtractor.py @@ -0,0 +1,119 @@ +import unittest +import os + +from moto import mock_s3 +from onestop.extract.CsbExtractor import CsbExtractor +from onestop.util.S3Utils import S3Utils + +class CsbExtractorTest(unittest.TestCase): + + def setUp(self): + print("Set it up!") + self.root_proj_path = os.getcwd() + self.assertIsNotNone(self.root_proj_path) + self.data_file_path = os.getcwd() + '/test/data/file4.csv' + self.key = "file4.csv" + # Use open instead of our method because we aren't testing our code here. + self.file_obj = open(self.data_file_path) + + config_dict = { + "access_key": "test_access_key", + "secret_key": "test_secret_key", + "log_level": "DEBUG" + } + + self.s3_utils = S3Utils(**config_dict) + self.bucket = "bucket" + self.region = "region" + + def tearDown(self): + print("Tear it down!") + self.file_obj.close() + + def test_is_csv(self): + self.assertTrue(CsbExtractor.is_csv("test/blah/file.csv"), "Failed to determine a csv file name was a csv file.") + + def test_is_not_csv(self): + self.assertFalse(CsbExtractor.is_csv("test/blah/file.txt"), "Failed to determine a csv file name was not a csv file.") + + @mock_s3 + def test_csb_SME_user_path(self): + # Setup bucket and file to read + s3 = self.s3_utils.connect('client', 's3', self.region) + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) + self.s3_utils.upload_s3(s3, self.data_file_path, self.bucket, self.key, True) + self.assertTrue(self.s3_utils.read_bytes_s3(s3, self.bucket, self.key)) + + # This is how we would expect an external user to get the file. + sm_open_file = self.s3_utils.get_csv_s3(self.s3_utils.connect('session', None, self.region), self.bucket, self.key) + + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME') + coords = bounds_dict["geospatial"] + self.assertEqual(coords[0], -96.847995) + self.assertEqual(coords[1], 29.373065) + self.assertEqual(coords[2], -92.747995) + self.assertEqual(coords[3], 33.373065) + + date_rng = bounds_dict["temporal"] + self.assertEqual(date_rng[0], '2018-04-10T14:00:06.000Z' ) + self.assertEqual(date_rng[1], '2020-04-10T14:00:06.000Z' ) + + def test_get_geospatial_temporal_bounds(self): + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + + coords = bounds_dict["geospatial"] + self.assertEqual(coords[0], -96.847995) + self.assertEqual(coords[1], 29.373065) + self.assertEqual(coords[2], -92.747995) + self.assertEqual(coords[3], 33.373065) + + date_rng = bounds_dict["temporal"] + self.assertEqual(date_rng[0], '2018-04-10T14:00:06.000Z' ) + self.assertEqual(date_rng[1], '2020-04-10T14:00:06.000Z' ) + + def test_get_min_lon(self): + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + + coords = bounds_dict["geospatial"] + min_lon = coords[0] + self.assertEqual(min_lon, -96.847995) + + def test_get_max_datetime(self): + + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + + date_rng = bounds_dict["temporal"] + end_date = date_rng[1] + self.assertEqual(end_date, '2020-04-10T14:00:06.000Z') + + def test_get_min_datetime(self): + bounds_dict = CsbExtractor.get_spatial_temporal_bounds(self.file_obj, 'LON', 'LAT', 'TIME') + + date_rng = bounds_dict["temporal"] + begin_date = date_rng[0] + self.assertEqual(begin_date, '2018-04-10T14:00:06.000Z') + + def test_extract_coords(self): + coords = CsbExtractor.extract_coords(self.file_obj, -92.747995, 33.373065, -96.847995, 29.373065) + result = [[ + -94.847995, + 29.373065 + ], + [ + -96.847995, + 29.373065 + ], + [ + -94.847995, + 33.373065 + ], + [ + -92.747995, + 29.383065 + ] + ] + self.assertEqual(coords, result) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/__init__.py b/onestop-python-client/test/unit/schemas/__init__.py similarity index 100% rename from onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/__init__.py rename to onestop-python-client/test/unit/schemas/__init__.py diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/__init__.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/__init__.py similarity index 100% rename from onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/__init__.py rename to onestop-python-client/test/unit/schemas/psiSchemaClasses/__init__.py diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/__init__.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/__init__.py similarity index 100% rename from onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/__init__.py rename to onestop-python-client/test/unit/schemas/psiSchemaClasses/org/__init__.py diff --git a/onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/__init__.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/__init__.py similarity index 100% rename from onestop-python-client/onestop/schemas/psiSchemaClasses_test/org/cedar/schemas/avro/__init__.py rename to onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/__init__.py diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/__init__.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/__init__.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/__init__.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py new file mode 100644 index 0000000..ffa1ecd --- /dev/null +++ b/onestop-python-client/test/unit/schemas/psiSchemaClasses/org/cedar/schemas/avro/psi/test_ParsedRecord.py @@ -0,0 +1,603 @@ +import unittest + +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord +from onestop.schemas.psiSchemaClasses.keywords_element import KeywordsElement +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.discovery import Discovery +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location import FileLocation +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.temporal_bounding import TemporalBounding +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship import Relationship +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.record_type import RecordType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.link import Link +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.responsible_party import ResponsibleParty +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.reference import Reference +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location_type import FileLocationType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.valid_descriptor import ValidDescriptor +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.analysis import Analysis +from onestop.schemas.psiSchemaClasses.identification_analysis import IdentificationAnalysis +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum import Checksum +from onestop.schemas.psiSchemaClasses.temporal_bounding_analysis import TemporalBoundingAnalysis +from onestop.schemas.psiSchemaClasses.operation import Operation +from onestop.schemas.psiSchemaClasses.data_format import DataFormat +from onestop.schemas.psiSchemaClasses.platform import Platform +from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor +from onestop.schemas.psiSchemaClasses.instruments import Instruments +from onestop.schemas.geojsonSchemaClasses.line_string_type import LineStringType +from onestop.schemas.geojsonSchemaClasses.multi_line_string_type import MultiLineStringType +from onestop.schemas.geojsonSchemaClasses.multi_point_type import MultiPointType +from onestop.schemas.geojsonSchemaClasses.multi_polygon_type import MultiPolygonType +from onestop.schemas.geojsonSchemaClasses.point_type import PointType +from onestop.schemas.geojsonSchemaClasses.polygon_type import PolygonType +from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.point import Point +from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.multi_point import MultiPoint +from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.line_string import LineString +from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.multi_line_string import MultiLineString +from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.polygon import Polygon +from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.multi_polygon import MultiPolygon + +class test_ParsedRecord(unittest.TestCase): + + fileLocation_dict = { + "serviceType":"Amazon:AWS:S3", + "deleted":False, + "restricted":False, + "asynchronous":False, + "locality":"us-east-1", + "lastModified":1572430074000, + "type": FileLocationType.INGEST, + "optionalAttributes":{ + }, + "uri":"s3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc" + } + + relationship_dict = { + "id": "5b58de08-afef-49fb-99a1-9c5d5c003bde", + "type": RelationshipType.COLLECTION + } + relationships_dict = { + "relationships":[ + relationship_dict, + { + "id":"6668de08-afef-49fb-99a1-9c5d5c003bde", + "type":{"type":"collection"} + } + ] + } + + # Discovery Related items + link_dict = { + "linkName": "Amazon S3", + "linkProtocol": "HTTPS", + "linkUrl": "https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2", + "linkDescription": "who knows", + "linkFunction": "download" + } + keywordsElement_dict = { + 'values': ['value1'], + 'type': 'type1', + 'namespace': 'name space' + } + temporalBounding_dict = { + 'beginDate': 'begin date', + 'beginIndeterminate': 'begin ind', + 'endDate': 'end date', + 'endIndeterminate': 'end ind', + 'instant': 'instant', + 'instantIndeterminate': 'instant ind', + 'description': 'desc' + } + point_dict = { + 'type': PointType.POINT, + 'coordinates': [0.0, 1.1, 2.2, 3.3] + } + multiPoint_dict = { + 'type': MultiPointType.MULTIPOINT, + 'coordinates': [[0.0, 1.0], [2.0, 1.0]] + } + lineString_dict = { + 'type': LineStringType.LINESTRING, + 'coordinates': [[0.0, 1.0], [2.0, 1.0]] + } + multiLineString_dict = { + 'type': MultiLineStringType.MULTILINESTRING, + 'coordinates': [[[0.0, 1.0], [2.0, 1.0]], [[0.0, 2.0], [2.0, 2.0]]] + } + polygon_dict = { + 'type': PolygonType.POLYGON, + 'coordinates': [[[0.0, 1.0], [2.0, 1.0]], [[0.0, 2.0], [2.0, 2.0]]] + } + multiPolygon_dict = { + 'type': MultiPolygonType.MULTIPOLYGON, + 'coordinates': [[[[0.0, 1.0], [2.0, 1.0]], [[0.0, 2.0], [2.0, 2.0]]], [[[1.0, 1.0], [2.0, 1.0]], [[0.0, 2.0], [2.0, 2.0]]]] + } + instruments_dict = { + 'instrumentIdentifier': 'ident', + 'instrumentType': 'type', + 'instrumentDescription': 'desc' + } + operation_dict = { + 'operationDescription': 'desc', + 'operationIdentifier': 'iden', + 'operationStatus': 'status', + 'operationType': 'type' + } + platform_dict = { + 'platformIdentifier': 'ident', + 'platformDescription': 'desc', + 'platformSponsor': ['sponsor1'] + } + dateFormat_dict = { + 'name': 'date1', + 'version': 'version1' + } + responsibleParty_dict = { + 'individualName': 'person name', + 'organizationName': 'organization', + 'positionName': 'position name', + 'role': 'role', + 'email': 'email addy', + 'phone': 'phone' + } + reference_dict = { + 'title': 'a title', + 'date': 'date', + 'links': [link_dict] + } + discovery_dict = { + 'fileIdentifier': 'gov.noaa.nodc:NDBC-COOPS', + 'parentIdentifier': 'gov.noaa.nodc:NDBC-COOPS', + 'hierarchyLevelName': '', + 'doi': 'doi', + 'purpose': 'purpose', + 'status': 'status', + 'credit': 'credit', + 'title': 'title', + 'alternateTitle': 'alternate title', + 'description': 'description', + 'keywords': [keywordsElement_dict], + 'topicCategories': ['category1'], + 'temporalBounding': temporalBounding_dict, + 'spatialBounding': None, + 'isGlobal': False, + 'acquisitionInstruments': [instruments_dict], + 'acquisitionOperations': [operation_dict], + 'acquisitionPlatforms': [platform_dict], + 'dataFormats': [dateFormat_dict], + 'links': [link_dict], + 'responsibleParties': [responsibleParty_dict], + 'thumbnail': 'thumbnail', + 'thumbnailDescription': 'thumbnail description', + 'creationDate': 'creation date', + 'revisionDate': 'revision date', + 'publicationDate': 'publicationd date', + 'citeAsStatements': ['cite as statements'], + 'crossReferences': [reference_dict], + 'largerWorks': [reference_dict], + 'useLimitation': 'use limitation', + 'legalConstraints': ['legal constraints'], + 'accessFeeStatement': 'access fee', + 'orderingInstructions': 'no instructions', + 'edition': 'edition1', + 'dsmmAccessibility': -4, + 'dsmmDataIntegrity': -3, + 'dsmmDataQualityAssessment': -2, + 'dsmmDataQualityAssurance': -1, + 'dsmmDataQualityControlMonitoring': 1, + 'dsmmPreservability': 2, + 'dsmmProductionSustainability': 3, + 'dsmmTransparencyTraceability': 4, + 'dsmmUsability': 5, + 'dsmmAverage': 5.0, + 'updateFrequency': 'update freq', + 'presentationForm': 'presentation form' + } + + identificationAnalysis_dict = { + 'fileIdentifierExists': True, + 'fileIdentifierString': 'file iden', + 'doiExists': False, + 'doiString': 'doi', + 'parentIdentifierExists': True, + 'parentIdentifierString': 'parent iden', + 'hierarchyLevelNameExists': False, + 'isGranule': True + } + titleAnalysis_dict = { + 'titleExists': True, + 'titleCharacters': 1, + 'alternateTitleExists': True, + 'alternateTitleCharacters': 2, + 'titleFleschReadingEaseScore': 3.0, + 'alternateTitleFleschReadingEaseScore': 4.0, + 'titleFleschKincaidReadingGradeLevel': 5.0, + 'alternateTitleFleschKincaidReadingGradeLevel': 6.0 + } + descriptionAnalysis_dict = { + 'descriptionExists': True, + 'descriptionCharacters': 3, + 'descriptionFleschReadingEaseScore': 1.0, + 'descriptionFleschKincaidReadingGradeLevel': 2.0 + } + dataAccessAnalysis_dict = { + 'dataAccessExists': False + } + thumbnail_dict = { + 'thumbnailExists': True + } + temporalBoundingAnalysis_dict = { + 'beginDescriptor': ValidDescriptor.VALID, + 'beginPrecision': 'begin prec', + 'beginIndexable': True, + 'beginZoneSpecified': 'begin zone', + 'beginUtcDateTimeString': 'begin utc', + 'beginYear': 2021, + 'beginDayOfYear': 2, + 'beginDayOfMonth': 2, + 'beginMonth': 2, + 'endDescriptor': ValidDescriptor.INVALID, + 'endPrecision': 'end prec', + 'endIndexable': False, + 'endZoneSpecified': 'end zone', + 'endUtcDateTimeString': 'end utc', + 'endYear': 2025, + 'endDayOfYear': 2, + 'endDayOfMonth': 2, + 'endMonth': 2, + 'instantDescriptor': ValidDescriptor.UNDEFINED, + 'instantPrecision': 'instant prec', + 'instantIndexable': False, + 'instantZoneSpecified': 'instant zone', + 'instantUtcDateTimeString': 'instant utc', + 'instantEndUtcDateTimeString': 'instant end utc', + 'instantYear': 2, + 'instantDayOfYear': 2, + 'instantEndDayOfYear': 2, + 'instantDayOfMonth': 2, + 'instantEndDayOfMonth': 2, + 'instantMonth': 2, + 'instantEndMonth': 2, + 'rangeDescriptor': TimeRangeDescriptor.AMBIGUOUS + } + spatialBounding_dict = { + 'spatialBoundingExists': False, + 'isValid': True, + 'validationError': 'validation' + } + analysis_dict = { + 'identification': identificationAnalysis_dict, + 'titles': titleAnalysis_dict, + 'description': descriptionAnalysis_dict, + 'dataAccess': dataAccessAnalysis_dict, + 'thumbnail': thumbnail_dict, + 'temporalBounding': temporalBoundingAnalysis_dict, + 'spatialBounding': spatialBounding_dict + } + checksum_dict = { + 'algorithm': ChecksumAlgorithm.MD5, + 'value': 'value1' + } + fileInformation_dict = { + 'name': 'file name', + 'size': 1, + 'checksums': [checksum_dict], + 'format': 'format', + 'headers': 'header', + 'optionalAttributes': {'attr1': 'value1', 'attr2': 'value2'} + } + publishing_dict = { + 'isPrivate': True, + 'until': -1 + } + relationships_dict = { + 'type': RelationshipType.COLLECTION, + 'id': 'id1' + } + errorEvent_dict = { + 'title': 'title1', + 'detail': 'detail1', + 'status': 404, + 'code': 500, + 'source': 'source1' + } + parsedRecord_dict = { + 'type': RecordType.COLLECTION, + 'discovery': discovery_dict, + 'analysis': analysis_dict, + 'fileInformation': fileInformation_dict, + 'fileLocations': { + 's3://noaa-goes16/ABI-L1b-RadF/2019/303/09/OR_ABI-L1b-RadF-M6C10_G16_s20193030950389_e20193031000109_c20193031000158.nc': { + **fileLocation_dict + } + }, + 'publishing': publishing_dict, + 'relationships': [relationships_dict], + 'errors': [errorEvent_dict] + } + + def test_parsed_record_all_vars_set(self): + parsedRecord = ParsedRecord(**self.parsedRecord_dict) + + self.assertEqual(parsedRecord.type, self.parsedRecord_dict['type']) + + def test_discovery_all_vars_set(self): + discovery = Discovery(**self.discovery_dict) + + self.assertIsNotNone(discovery) + + def test_discovery_spatial_bounding_supports_none_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': None + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + + def test_discovery_spatial_bounding_supports_point_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': Point(**self.point_dict) + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + + def test_discovery_spatial_bounding_supports_multipoint_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': MultiPoint(**self.multiPoint_dict) + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + + def test_discovery_spatial_bounding_supports_linestring_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': LineString(**self.lineString_dict) + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + + def test_discovery_spatial_bounding_supports_multilinestring_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': MultiLineString(**self.multiLineString_dict) + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + + def test_discovery_spatial_bounding_supports_polygon_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': Polygon(**self.polygon_dict) + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + + def test_discovery_spatial_bounding_supports_multipolygon_type(self): + dict = { + 'links': [self.link_dict], + 'spatialBounding': MultiPolygon(**self.multiPolygon_dict) + } + discovery = Discovery(**dict) + + self.assertEqual(discovery.spatialBounding, dict['spatialBounding']) + + def test_keywords_all_vars_set(self): + keywords = KeywordsElement(**self.keywordsElement_dict) + + self.assertEqual(keywords.values, self.keywordsElement_dict['values']) + self.assertEqual(keywords.type, self.keywordsElement_dict['type']) + self.assertEqual(keywords.namespace, self.keywordsElement_dict['namespace']) + + def test_temporalBounding_all_vars_set(self): + temporal = TemporalBounding(**self.temporalBounding_dict) + + self.assertEqual(temporal.beginDate, self.temporalBounding_dict['beginDate']) + self.assertEqual(temporal.beginIndeterminate, self.temporalBounding_dict['beginIndeterminate']) + self.assertEqual(temporal.endDate, self.temporalBounding_dict['endDate']) + self.assertEqual(temporal.endIndeterminate, self.temporalBounding_dict['endIndeterminate']) + self.assertEqual(temporal.instant, self.temporalBounding_dict['instant']) + self.assertEqual(temporal.instantIndeterminate, self.temporalBounding_dict['instantIndeterminate']) + self.assertEqual(temporal.description, self.temporalBounding_dict['description']) + + def test_point_all_vars_set(self): + point = Point(**self.point_dict) + + self.assertEqual(point.type, self.point_dict['type']) + + def test_multiPoint_all_vars_set(self): + multi_point = MultiPoint(**self.multiPoint_dict) + + self.assertEqual(multi_point.type, self.multiPoint_dict['type']) + self.assertEqual(multi_point.coordinates, self.multiPoint_dict['coordinates']) + + def test_lineString_all_vars_set(self): + line_string = LineString(**self.lineString_dict) + + self.assertEqual(line_string.type, self.lineString_dict['type']) + self.assertEqual(line_string.coordinates, self.lineString_dict['coordinates']) + + def test_multiLineString_all_vars_set(self): + multi_line_string = MultiLineString(**self.multiLineString_dict) + + self.assertEqual(multi_line_string.type, self.multiLineString_dict['type']) + self.assertEqual(multi_line_string.coordinates, self.multiLineString_dict['coordinates']) + + def test_polygon_all_vars_set(self): + polygon = Polygon(**self.polygon_dict) + + self.assertEqual(polygon.type, self.polygon_dict['type']) + self.assertEqual(polygon.coordinates, self.polygon_dict['coordinates']) + + def test_multiPolygon_all_vars_set(self): + multi_polygon = MultiPolygon(**self.multiPolygon_dict) + + self.assertEqual(multi_polygon.type, self.multiPolygon_dict['type']) + self.assertEqual(multi_polygon.coordinates, self.multiPolygon_dict['coordinates']) + + def test_instruments_all_vars_set(self): + instruments = Instruments(**self.instruments_dict) + + self.assertEqual(instruments.instrumentIdentifier, self.instruments_dict['instrumentIdentifier']) + self.assertEqual(instruments.instrumentType, self.instruments_dict['instrumentType']) + self.assertEqual(instruments.instrumentDescription, self.instruments_dict['instrumentDescription']) + + def test_operation_all_vars_set(self): + operation = Operation(**self.operation_dict) + + self.assertEqual(operation.operationDescription, self.operation_dict['operationDescription']) + self.assertEqual(operation.operationIdentifier, self.operation_dict['operationIdentifier']) + self.assertEqual(operation.operationStatus, self.operation_dict['operationStatus']) + self.assertEqual(operation.operationType, self.operation_dict['operationType']) + + def test_platform_all_vars_set(self): + platform = Platform(**self.platform_dict) + + self.assertEqual(platform.platformIdentifier, self.platform_dict['platformIdentifier']) + self.assertEqual(platform.platformDescription, self.platform_dict['platformDescription']) + self.assertEqual(platform.platformSponsor, self.platform_dict['platformSponsor']) + + def test_dateFormat_all_vars_set(self): + dateformat = DataFormat(**self.dateFormat_dict) + + self.assertEqual(dateformat.name, self.dateFormat_dict['name']) + self.assertEqual(dateformat.version, self.dateFormat_dict['version']) + + def test_link_all_vars_set(self): + link = Link(**self.link_dict) + + self.assertEqual(link.linkName, self.link_dict['linkName']) + self.assertEqual(link.linkProtocol, self.link_dict['linkProtocol']) + self.assertEqual(link.linkUrl, self.link_dict['linkUrl']) + self.assertEqual(link.linkDescription, self.link_dict['linkDescription']) + self.assertEqual(link.linkFunction, self.link_dict['linkFunction']) + + def test_responsibleParty_all_vars_set(self): + responsibleParty = ResponsibleParty(**self.responsibleParty_dict) + + self.assertEqual(responsibleParty.individualName, self.responsibleParty_dict['individualName']) + self.assertEqual(responsibleParty.organizationName, self.responsibleParty_dict['organizationName']) + self.assertEqual(responsibleParty.positionName, self.responsibleParty_dict['positionName']) + self.assertEqual(responsibleParty.role, self.responsibleParty_dict['role']) + self.assertEqual(responsibleParty.email, self.responsibleParty_dict['email']) + self.assertEqual(responsibleParty.phone, self.responsibleParty_dict['phone']) + + def test_reference_all_vars_set(self): + reference = Reference(**self.reference_dict) + + self.assertEqual(reference.title, self.reference_dict['title']) + self.assertEqual(reference.date, self.reference_dict['date']) + self.assertEqual(reference.links[0].linkName, self.reference_dict['links'][0]['linkName']) + self.assertEqual(reference.links[0].linkProtocol, self.reference_dict['links'][0]['linkProtocol']) + self.assertEqual(reference.links[0].linkUrl, self.reference_dict['links'][0]['linkUrl']) + self.assertEqual(reference.links[0].linkDescription, self.reference_dict['links'][0]['linkDescription']) + self.assertEqual(reference.links[0].linkFunction, self.reference_dict['links'][0]['linkFunction']) + + def test_analysis_all_vars_set(self): + analysis = Analysis(**self.analysis_dict) + + self.assertEqual(analysis.identification, IdentificationAnalysis(**self.identificationAnalysis_dict)) + + def test_fileLocation_all_vars_set(self): + fileLocations = FileLocation(**self.fileLocation_dict) + + self.assertEqual(fileLocations.uri, self.fileLocation_dict['uri']) + self.assertEqual(fileLocations.type, self.fileLocation_dict['type']) + self.assertEqual(fileLocations.deleted, self.fileLocation_dict['deleted']) + self.assertEqual(fileLocations.restricted, self.fileLocation_dict['restricted']) + self.assertEqual(fileLocations.asynchronous, self.fileLocation_dict['asynchronous']) + self.assertEqual(fileLocations.locality, self.fileLocation_dict['locality']) + self.assertEqual(fileLocations.lastModified, self.fileLocation_dict['lastModified']) + self.assertEqual(fileLocations.serviceType, self.fileLocation_dict['serviceType']) + self.assertEqual(fileLocations.optionalAttributes, self.fileLocation_dict['optionalAttributes']) + + def test_relationships_all_vars_set(self): + relationship = Relationship(**self.relationship_dict) + + self.assertEqual(relationship.id, self.relationship_dict['id']) + self.assertEqual(relationship.type, self.relationship_dict['type']) + + def test_relationships_optionals(self): + id = '12' + relationship = Relationship(id=id, type=None) + + self.assertEqual(relationship.id, id) + + # Negative Tests + def test_lineString_type_fails_bad_type(self): + local_dict = dict(self.lineString_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, LineString, **local_dict) + + def test_multiLineString_type_fails_bad_type(self): + local_dict = dict(self.multiLineString_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, MultiLineString, **local_dict) + + def test_multiPoint_type_fails_bad_type(self): + local_dict = dict(self.multiPoint_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, MultiPoint, **local_dict) + + def test_multiPolygon_type_fails_bad_type(self): + local_dict = dict(self.multiPolygon_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, MultiPolygon, **local_dict) + + def test_point_type_fails_bad_type(self): + local_dict = dict(self.point_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, Point, **local_dict) + + def test_polygon_type_fails_bad_type(self): + local_dict = dict(self.polygon_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, Polygon, **local_dict) + + def test_temporalBoundingAnalysis_rangeDescriptor_fails_bad_type(self): + local_dict = dict(self.temporalBoundingAnalysis_dict) + local_dict['rangeDescriptor'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, TemporalBoundingAnalysis, **local_dict) + + def test_checksum_algorithm_fails_bad_type(self): + local_dict = dict(self.checksum_dict) + local_dict['algorithm'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, Checksum, **local_dict) + + def test_fileLocation_type_fails_bad_type(self): + local_dict = dict(self.fileLocation_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, FileLocation, **local_dict) + + def test_parsedRecord_type_fails_bad_type(self): + local_dict = dict(self.parsedRecord_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, ParsedRecord, **local_dict) + + def test_relationship_type_fails_bad_type(self): + local_dict = dict(self.relationship_dict) + local_dict['type'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, Relationship, **local_dict) + + def test_temporalBoundingAnalysis_validDescriptor_fails_bad_type(self): + local_dict = dict(self.temporalBoundingAnalysis_dict) + local_dict['endDescriptor'] = 'BadEnumTypeString' + + self.assertRaises(TypeError, TemporalBoundingAnalysis, **local_dict) diff --git a/onestop-python-client/test/unit/schemas/util/__init__.py b/onestop-python-client/test/unit/schemas/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py new file mode 100644 index 0000000..0da8331 --- /dev/null +++ b/onestop-python-client/test/unit/schemas/util/test_jsonEncoder.py @@ -0,0 +1,202 @@ +import json +import unittest + +from onestop.schemas.util.jsonEncoder import EnumEncoder, as_enum +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.relationship_type import RelationshipType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.record_type import RecordType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.valid_descriptor import ValidDescriptor +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.operation_type import OperationType +from onestop.schemas.psiSchemaClasses.time_range_descriptor import TimeRangeDescriptor +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.file_location_type import FileLocationType +from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.method import Method +from onestop.schemas.geojsonSchemaClasses.line_string_type import LineStringType +from onestop.schemas.geojsonSchemaClasses.multi_line_string_type import MultiLineStringType +from onestop.schemas.geojsonSchemaClasses.multi_point_type import MultiPointType +from onestop.schemas.geojsonSchemaClasses.multi_polygon_type import MultiPolygonType +from onestop.schemas.geojsonSchemaClasses.point_type import PointType +from onestop.schemas.geojsonSchemaClasses.polygon_type import PolygonType + +class jsonEncoderTest(unittest.TestCase): + + def test_checksumalgorithm_enum_class_encodes(self): + type = ChecksumAlgorithm.MD5 + obj = ChecksumAlgorithm(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_checksumalgorithm_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"ChecksumAlgorithm.MD5\"}}" + json.loads(content, object_hook=as_enum) + + def test_relationshiptype_enum_class_encodes(self): + type = RelationshipType.COLLECTION + obj = RelationshipType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + print("jsonStr:%s"%jsonStr) + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_relationshiptype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"RelationshipType.COLLECTION\"}}" + json.loads(content, object_hook=as_enum) + + def test_recordtype_enum_class_encodes(self): + type = RecordType.GRANULE + obj = RecordType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_recordtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"RecordType.COLLECTION\"}}" + json.loads(content, object_hook=as_enum) + + def test_timerangedescriptor_enum_class_encodes(self): + type = TimeRangeDescriptor.AMBIGUOUS + obj = TimeRangeDescriptor(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_timerangedescriptor_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"TimeRangeDescriptor.AMBIGUOUS\"}}" + json.loads(content, object_hook=as_enum) + + def test_linestringtype_enum_class_encodes(self): + type = LineStringType.LINESTRING + obj = LineStringType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_linestringtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"LineStringType.LINESTRING\"}}" + json.loads(content, object_hook=as_enum) + + def test_method_enum_class_encodes(self): + type = Method.CONNECT + obj = Method(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_method_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"Method.POST\"}}" + json.loads(content, object_hook=as_enum) + + def test_multilinestringtype_enum_class_encodes(self): + type = MultiLineStringType.MULTILINESTRING + obj = MultiLineStringType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_mutilinestringtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"MultiLineStringType.MULTILINESTRING\"}}" + json.loads(content, object_hook=as_enum) + + def test_multipointtype_enum_class_encodes(self): + type = MultiPointType.MULTIPOINT + obj = MultiPointType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_multipointtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"MultiPointType.MULTIPOINT\"}}" + json.loads(content, object_hook=as_enum) + + def test_multipolygontype_enum_class_encodes(self): + type = MultiPolygonType.MULTIPOLYGON + obj = MultiPolygonType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_multipolygontype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"MultiPolygonType.MULTIPOLYGON\"}}" + json.loads(content, object_hook=as_enum) + + def test_operationtype_enum_class_encodes(self): + type = OperationType.ADD + obj = OperationType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_operationtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"OperationType.ADD\"}}" + json.loads(content, object_hook=as_enum) + + def test_pointtype_enum_class_encodes(self): + type = PointType.POINT + obj = PointType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_pointtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"PointType.POINT\"}}" + json.loads(content, object_hook=as_enum) + + def test_polygontype_enum_class_encodes(self): + type = PolygonType.POLYGON + obj = PolygonType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_polygontype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"PolygonType.POLYGON\"}}" + json.loads(content, object_hook=as_enum) + + def test_filelocationtype_enum_class_encodes(self): + type = FileLocationType.INGEST + obj = FileLocationType(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_filelocationtype_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"FileLocationType.INGEST\"}}" + json.loads(content, object_hook=as_enum) + + def test_validdescriptor_enum_class_encodes(self): + type = ValidDescriptor.INVALID + obj = ValidDescriptor(type) + + jsonStr = json.dumps(obj, + cls=EnumEncoder) + + self.assertEqual(jsonStr, '{"__enum__": "%s"}'%type) + + def test_validdescriptor_json_decode_enum(self): + content = "{\"type\": {\"__enum__\": \"ValidDescriptor.VALID\"}}" + json.loads(content, object_hook=as_enum) diff --git a/onestop-python-client/test/unit/test_KafkaConsumer.py b/onestop-python-client/test/unit/test_KafkaConsumer.py new file mode 100644 index 0000000..4a5345f --- /dev/null +++ b/onestop-python-client/test/unit/test_KafkaConsumer.py @@ -0,0 +1,287 @@ +import unittest + +from unittest.mock import ANY, patch, MagicMock, call +from onestop.KafkaConsumer import KafkaConsumer +from confluent_kafka.schema_registry import SchemaRegistryClient + +class test_KafkaConsumer(unittest.TestCase): + kp = None + conf_w_security = None + conf_wo_security = None + + @classmethod + def setUp(cls): + print("Set it up!") + cls.conf_w_security = { + "kafka_consumer_metadata_type" : "GRANULE", + "brokers" : "onestop-dev-cp-kafka:9092", + "group_id" : "sme-test", + "auto_offset_reset" : "earliest", + "schema_registry" : "http://onestop-dev-cp-schema-registry:8081", + "security" : { + "enabled" : True, + "caLoc" : "/etc/pki/tls/cert.pem", + "keyLoc" : "/etc/pki/tls/private/kafka-user.key", + "certLoc" : "/etc/pki/tls/certs/kafka-user.crt" + }, + "collection_topic_consume" : "psi-collection-input-unknown", + "granule_topic_consume" : "psi-granule-input-unknown", + "log_level" : "DEBUG" + } + cls.conf_wo_security = dict(cls.conf_w_security) + # Remove security credential section. + cls.conf_wo_security['security'] = { + "enabled":False + } + + @classmethod + def tearDown(self): + print("Tear it down!") + + def test_init_happy_nonconditional_params(self): + consumer = KafkaConsumer(**self.conf_w_security) + + self.assertEqual(consumer.metadata_type, self.conf_w_security['kafka_consumer_metadata_type']) + self.assertEqual(consumer.brokers, self.conf_w_security['brokers']) + self.assertEqual(consumer.group_id, self.conf_w_security['group_id']) + self.assertEqual(consumer.auto_offset_reset, self.conf_w_security['auto_offset_reset']) + self.assertEqual(consumer.schema_registry, self.conf_w_security['schema_registry']) + self.assertEqual(consumer.security_enabled, self.conf_w_security['security']['enabled']) + self.assertEqual(consumer.collection_topic, self.conf_w_security['collection_topic_consume']) + self.assertEqual(consumer.granule_topic, self.conf_w_security['granule_topic_consume']) + + def test_init_security_enabled(self): + consumer = KafkaConsumer(**self.conf_w_security) + + self.assertEqual(consumer.security_caLoc, self.conf_w_security['security']['caLoc']) + self.assertEqual(consumer.security_keyLoc, self.conf_w_security['security']['keyLoc']) + self.assertEqual(consumer.security_certLoc, self.conf_w_security['security']['certLoc']) + + def test_init_security_disabled(self): + consumer = KafkaConsumer(**self.conf_wo_security) + + self.assertRaises(AttributeError, getattr, consumer, "security_caLoc") + self.assertRaises(AttributeError, getattr, consumer, "security_keyLoc") + self.assertRaises(AttributeError, getattr, consumer, "security_certLoc") + + def test_init_metadata_type_valid(self): + consumer = KafkaConsumer(**self.conf_w_security) + + self.assertEqual(consumer.metadata_type, self.conf_w_security['kafka_consumer_metadata_type']) + + def test_init_metadata_type_invalid(self): + wrong_metadata_type_config = dict(self.conf_w_security) + wrong_metadata_type_config['kafka_consumer_metadata_type'] = "invalid_type" + + self.assertRaises(ValueError, KafkaConsumer, **wrong_metadata_type_config) + + def test_init_extra_params(self): + conf = dict(self.conf_wo_security) + conf['junk_key'] = 'junk_value' + KafkaConsumer(**conf) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_w_security(self, mock_client): + exp_security_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] + } + mock_client.return_value = None + + consumer = KafkaConsumer(**self.conf_w_security) + consumer.register_client() + + mock_client.assert_called() + mock_client.assert_called_with(ANY, exp_security_conf) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_wo_security(self, mock_client): + exp_security_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] + } + mock_client.return_value = None + + consumer = KafkaConsumer(**self.conf_wo_security) + consumer.register_client() + try: + mock_client.assert_called_with(ANY, exp_security_conf) + except: + return + raise AssertionError('Expected register_client() to not have been called with security arguments.') + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_calls_AvroDeserializer(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_w_security_collection = dict(self.conf_w_security) + conf_w_security_collection['kafka_consumer_metadata_type'] = "COLLECTION" + + consumer = KafkaConsumer(**conf_w_security_collection) + reg_client = consumer.register_client() + reg_client.get_latest_version = MagicMock() + deser_consumer = consumer.create_consumer(reg_client) + + # Verify AvroDeserializer called with expected registry client + mock_avro_deserializer.assert_called_with(schema_str=ANY, schema_registry_client=reg_client) + + self.assertIsNotNone(deser_consumer) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_collection_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_w_security_collection = dict(self.conf_w_security) + topic = conf_w_security_collection['collection_topic_consume'] + conf_w_security_collection['kafka_consumer_metadata_type'] = 'COLLECTION' + + consumer = KafkaConsumer(**conf_w_security_collection) + reg_client = MagicMock() + deser_consumer = consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into DeserializingConsumer + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_collection['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], + 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_collection['group_id'], + 'auto.offset.reset': conf_w_security_collection['auto_offset_reset'] + }) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + self.assertIsNotNone(deser_consumer) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_collection_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_wo_security_collection = dict(self.conf_wo_security) + topic = conf_wo_security_collection['collection_topic_consume'] + conf_wo_security_collection['kafka_consumer_metadata_type'] = 'COLLECTION' + + consumer = KafkaConsumer(**conf_wo_security_collection) + reg_client = MagicMock() + deser_consumer = consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify no security passed into DeserializingConsumer + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_collection['brokers'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_wo_security_collection['group_id'], + 'auto.offset.reset': conf_wo_security_collection['auto_offset_reset'] + }) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + self.assertIsNotNone(deser_consumer) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_granule_w_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_w_security_granule = dict(self.conf_w_security) + topic = conf_w_security_granule['granule_topic_consume'] + conf_w_security_granule['kafka_consumer_metadata_type'] = 'GRANULE' + + consumer = KafkaConsumer(**conf_w_security_granule) + reg_client = MagicMock() + deser_consumer = consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into DeserializingConsumer + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_granule['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], + 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_w_security_granule['group_id'], + 'auto.offset.reset': conf_w_security_granule['auto_offset_reset'] + }) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([topic]) + + self.assertIsNotNone(deser_consumer) + + @patch('onestop.KafkaConsumer.AvroDeserializer') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_create_consumer_granule_wo_security(self, mock_deserializing_consumer, mock_avro_deserializer): + conf_wo_security_granule = dict(self.conf_wo_security) + exp_topic = conf_wo_security_granule['granule_topic_consume'] + conf_wo_security_granule['kafka_consumer_metadata_type'] = 'GRANULE' + + consumer = KafkaConsumer(**conf_wo_security_granule) + reg_client = MagicMock() + deser_consumer = consumer.create_consumer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(exp_topic + '-value') + + # Verify no security passed into DeserializingConsumer called with expected configuration + mock_deserializing_consumer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_granule['brokers'], + 'key.deserializer': ANY, + 'value.deserializer': ANY, + 'group.id': conf_wo_security_granule['group_id'], + 'auto.offset.reset': conf_wo_security_granule['auto_offset_reset'] + }) + mock_deserializing_consumer.return_value.subscribe.assert_called_with([exp_topic]) + + self.assertIsNotNone(deser_consumer) + + def test_connect(self): + mock_client = MagicMock() + + consumer = KafkaConsumer(**self.conf_w_security) + consumer.register_client = MagicMock(return_value=mock_client) + consumer.create_consumer = MagicMock(return_value=MagicMock(mock_client)) + consumer.connect() + + consumer.register_client.assert_called_once() + consumer.create_consumer.assert_called_with(mock_client) + + @patch('confluent_kafka.cimpl.Message') + @patch('onestop.KafkaConsumer.DeserializingConsumer') + def test_consume(self, mock_metadata_consumer, mock_message): + mock_message_key = 'key1' + mock_message_value = 'value1' + consumer = KafkaConsumer(**self.conf_w_security) + consumer.register_client = MagicMock(return_value=MagicMock()) + mock_message.key.return_value = mock_message_key + mock_message.value.return_value = mock_message_value + mock_metadata_consumer.poll.side_effect = [None, mock_message, Exception] + mock_handler = MagicMock() + + # Would have liked not having the try/catch but it wasn't ignoring the exception. Just need to not fail due to end of loop. + try: + self.assertRaises(Exception, consumer.consume(mock_metadata_consumer, mock_handler)) + except Exception as e: + print("Ignoring exception: {}".format(e)) + + # Verify kafka consumer poll called expected number of times + self.assertEqual(mock_metadata_consumer.poll.call_count, 3) + mock_metadata_consumer.poll.assert_has_calls([call(10), call(10), call(10)]) + + # Verify callback function was called once with expected message attributes + mock_handler.assert_called_once() + mock_handler.assert_called_with(mock_message_key, mock_message_value, self.conf_w_security['log_level']) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/test/unit/test_KafkaPublisher.py b/onestop-python-client/test/unit/test_KafkaPublisher.py new file mode 100644 index 0000000..6357a3c --- /dev/null +++ b/onestop-python-client/test/unit/test_KafkaPublisher.py @@ -0,0 +1,335 @@ +import unittest +import json + +from onestop.KafkaPublisher import KafkaPublisher +from unittest.mock import ANY, patch, MagicMock +from confluent_kafka.schema_registry import SchemaRegistryClient + +class test_KafkaPublisher(unittest.TestCase): + kp = None + conf_w_security = None + conf_wo_security = None + + @classmethod + def setUp(cls): + print("Set it up!") + cls.conf_w_security = { + "kafka_publisher_metadata_type" : "GRANULE", + "brokers" : "onestop-dev-cp-kafka:9092", + "schema_registry" : "http://onestop-dev-cp-schema-registry:8081", + "security" : { + "enabled" : True, + "caLoc" : "/etc/pki/tls/cert.pem", + "keyLoc" : "/etc/pki/tls/private/kafka-user.key", + "certLoc" : "/etc/pki/tls/certs/kafka-user.crt" + }, + "collection_topic_publish" : "psi-collection-input-unknown", + "granule_topic_publish" : "psi-granule-input-unknown", + "log_level" : "DEBUG" + } + cls.conf_wo_security = dict(cls.conf_w_security) + # Remove security credential section. + cls.conf_wo_security['security'] = { + "enabled":False + } + + @classmethod + def tearDown(self): + print("Tear it down!") + + def test_init_happy_nonconditional_params(self): + publisher = KafkaPublisher(**self.conf_w_security) + + self.assertEqual(publisher.metadata_type, self.conf_w_security['kafka_publisher_metadata_type']) + self.assertEqual(publisher.brokers, self.conf_w_security['brokers']) + self.assertEqual(publisher.schema_registry, self.conf_w_security['schema_registry']) + self.assertEqual(publisher.security_enabled, self.conf_w_security['security']['enabled']) + self.assertEqual(publisher.collection_topic, self.conf_w_security['collection_topic_publish']) + self.assertEqual(publisher.granule_topic, self.conf_w_security['granule_topic_publish']) + + def test_init_security_enabled(self): + publisher = KafkaPublisher(**self.conf_w_security) + + self.assertEqual(publisher.security_caLoc, self.conf_w_security['security']['caLoc']) + self.assertEqual(publisher.security_keyLoc, self.conf_w_security['security']['keyLoc']) + self.assertEqual(publisher.security_certLoc, self.conf_w_security['security']['certLoc']) + + def test_init_security_disabled(self): + publisher = KafkaPublisher(**self.conf_wo_security) + + self.assertRaises(AttributeError, getattr, publisher, "security_caLoc") + self.assertRaises(AttributeError, getattr, publisher, "security_keyLoc") + self.assertRaises(AttributeError, getattr, publisher, "security_certLoc") + + def test_init_metadata_type_valid(self): + publisher = KafkaPublisher(**self.conf_w_security) + + self.assertEqual(publisher.metadata_type, self.conf_w_security['kafka_publisher_metadata_type']) + + def test_init_metadata_type_invalid(self): + wrong_metadata_type_config = dict(self.conf_w_security) + wrong_metadata_type_config['kafka_publisher_metadata_type'] = "invalid_type" + + self.assertRaises(ValueError, KafkaPublisher, **wrong_metadata_type_config) + + def test_init_extra_params(self): + conf = dict(self.conf_wo_security) + conf['junk_key'] = 'junk_value' + KafkaPublisher(**conf) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_w_security(self, mock_client): + exp_security_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] + } + mock_client.return_value = None + + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client() + + mock_client.assert_called() + mock_client.assert_called_with(ANY, exp_security_conf) + + @patch.object(SchemaRegistryClient, '__init__', autospec=True) + def test_register_client_wo_security(self, mock_client): + exp_security_conf = { + 'url':self.conf_w_security['schema_registry'], + 'ssl.ca.location': self.conf_w_security['security']['caLoc'], + 'ssl.key.location': self.conf_w_security['security']['keyLoc'], + 'ssl.certificate.location': self.conf_w_security['security']['certLoc'] + } + mock_client.return_value = None + + publisher = KafkaPublisher(**self.conf_wo_security) + publisher.register_client() + try: + mock_client.assert_called_with(ANY, exp_security_conf) + except: + return + raise AssertionError('Expected register_client() to not have been called with security arguments.') + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_calls_AvroSerializer(self, mock_serializing_publisher, mock_avro_serializer): + conf_w_security_collection = dict(self.conf_w_security) + conf_w_security_collection['kafka_publisher_metadata_type'] = "COLLECTION" + + publisher = KafkaPublisher(**conf_w_security_collection) + reg_client = publisher.register_client() + reg_client.get_latest_version = MagicMock() + publisher.create_producer(reg_client) + + # Verify AvroSerializer called with expected registry client + mock_avro_serializer.assert_called_with(schema_str=ANY, schema_registry_client=reg_client) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_collection_w_security(self, mock_serializing_producer, mock_avro_serializer): + conf_w_security_collection = dict(self.conf_w_security) + topic = conf_w_security_collection['collection_topic_publish'] + conf_w_security_collection['kafka_publisher_metadata_type'] = 'COLLECTION' + + publisher = KafkaPublisher(**conf_w_security_collection) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into SerializingProducer + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_collection['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_collection['security']['caLoc'], + 'ssl.key.location': conf_w_security_collection['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_collection['security']['certLoc'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_collection_wo_security(self, mock_serializing_producer, mock_avro_serializer): + conf_wo_security_collection = dict(self.conf_wo_security) + topic = conf_wo_security_collection['collection_topic_publish'] + conf_wo_security_collection['kafka_publisher_metadata_type'] = 'COLLECTION' + + publisher = KafkaPublisher(**conf_wo_security_collection) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify no security passed into SerializingProducer + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_collection['brokers'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_granule_w_security(self, mock_serializing_producer, mock_avro_serializer): + conf_w_security_granule = dict(self.conf_w_security) + topic = conf_w_security_granule['granule_topic_publish'] + conf_w_security_granule['kafka_publisher_metadata_type'] = 'GRANULE' + + publisher = KafkaPublisher(**conf_w_security_granule) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(topic + '-value') + + # Verify security passed into SerializingProducer + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_w_security_granule['brokers'], + 'security.protocol': 'SSL', + 'ssl.ca.location': conf_w_security_granule['security']['caLoc'], + 'ssl.key.location': conf_w_security_granule['security']['keyLoc'], + 'ssl.certificate.location': conf_w_security_granule['security']['certLoc'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + @patch('onestop.KafkaPublisher.AvroSerializer') + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_create_producer_granule_wo_security(self, mock_serializing_producer, mock_avro_serializer): + conf_wo_security_granule = dict(self.conf_wo_security) + exp_topic = conf_wo_security_granule['granule_topic_publish'] + conf_wo_security_granule['kafka_publisher_metadata_type'] = 'GRANULE' + + publisher = KafkaPublisher(**conf_wo_security_granule) + reg_client = MagicMock() + prod = publisher.create_producer(reg_client) + + # Verify metadata type was taken into consideration for getting topic information + reg_client.get_latest_version.assert_called_with(exp_topic + '-value') + + # Verify no security passed into SerializingProducer called with expected configuration + mock_serializing_producer.assert_called_with( + { + 'bootstrap.servers': conf_wo_security_granule['brokers'], + 'value.serializer': ANY, + }) + + self.assertIsNotNone(prod) + + def test_connect(self): + mock_client = MagicMock() + + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client = MagicMock(return_value=mock_client) + publisher.create_producer = MagicMock(return_value=MagicMock(mock_client)) + publisher.connect() + + publisher.register_client.assert_called_once() + publisher.create_producer.assert_called_with(mock_client) + + def test_get_collection_key_from_uuid(self): + expKey = '12345678-1234-5678-1234-567812345678' + for uuid in [ + '{12345678-1234-5678-1234-567812345678}', + '12345678123456781234567812345678', + 'urn:uuid:12345678-1234-5678-1234-567812345678', + b'\x12\x34\x56\x78'*4, +# b'\x78\x56\x34\x12\x34\x12\x78\x56' + b'\x12\x34\x56\x78\x12\x34\x56\x78', +# {0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678}, +# 0x12345678123456781234567812345678, + ]: + with self.subTest(uuid=uuid): + print ("Testing uuid "+str(uuid)) + key = KafkaPublisher.get_collection_key_from_uuid(uuid) + print("Acquired uuid="+str(key)) + self.assertEqual(key, expKey) + + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_publish_collection(self, mock_collection_producer): + uuid = '{12345678-1234-5678-1234-567812345678}' + content_dict = { + 'title': 'this is a test', + 'location': 'somewhere in space' + } + method = 'PUT' + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client = MagicMock(return_value=MagicMock()) + mock_collection_producer.produce = MagicMock() + mock_collection_producer.poll.side_effect = [1] + + publisher.publish_collection(mock_collection_producer, uuid, content_dict, method) + + # Verify kafka produce called once + mock_collection_producer.produce.assert_called_with( + topic=self.conf_w_security['collection_topic_publish'], + value={ + 'type': 'collection', + 'content': json.dumps(content_dict), + 'contentType': 'application/json', + 'method': method, + 'source': 'unknown', + }, + key=publisher.get_collection_key_from_uuid(uuid), + on_delivery=publisher.delivery_report + ) + + # Verify kafka produce poll called once + mock_collection_producer.poll.assert_called_once() + + + @patch('onestop.KafkaPublisher.SerializingProducer') + def test_publish_granule(self, mock_collection_producer): + uuid = '{12345678-1234-5678-1234-567812345678}' + content_dict = { + 'title': 'this is a test', + 'location': 'somewhere in space', + 'relationships': [{"type": "COLLECTION", + "id": '{12345678-1234-5678-1234-567812345678}'}], + 'errors': [], + 'analysis': 'No analysis', + 'fileLocations': 'archived', + 'fileInformation': 'no information', + 'discovery': 'AWS' + } + publisher = KafkaPublisher(**self.conf_w_security) + publisher.register_client = MagicMock(return_value=MagicMock()) + mock_collection_producer.produce = MagicMock() + mock_collection_producer.poll.side_effect = [1] + + publisher.publish_granule(mock_collection_producer, uuid, content_dict) + + # Verify kafka produce called once + mock_collection_producer.produce.assert_called_with( + topic=self.conf_w_security['granule_topic_publish'], + value={ + 'type': 'granule', + 'content': json.dumps(content_dict), + #'contentType': 'application/json', + 'method': 'PUT', + 'source': 'unknown', + 'operation': None, + 'relationships': content_dict['relationships'], + 'errors': content_dict['errors'], + 'analysis': content_dict['analysis'], + 'fileLocations': {'fileLocation': content_dict['fileLocations']}, + 'fileInformation': content_dict['fileInformation'], + 'discovery': content_dict['discovery'] + }, + key=publisher.get_collection_key_from_uuid(uuid), + on_delivery=publisher.delivery_report + ) + + # Verify kafka produce poll called once + mock_collection_producer.poll.assert_called_once() + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/test/unit/test_SqsHandlers.py b/onestop-python-client/test/unit/test_SqsHandlers.py new file mode 100644 index 0000000..6e8481b --- /dev/null +++ b/onestop-python-client/test/unit/test_SqsHandlers.py @@ -0,0 +1,320 @@ +import json +import unittest + +from unittest import mock +from unittest.mock import patch +from moto import mock_sqs +from test.utils import abspath_from_relative, create_delete_message +from onestop.WebPublisher import WebPublisher +from onestop.util.S3Utils import S3Utils +from onestop.util.S3MessageAdapter import S3MessageAdapter +from onestop.util.SqsConsumer import SqsConsumer +from onestop.util.SqsHandlers import create_delete_handler +from onestop.util.SqsHandlers import create_upload_handler +from onestop.schemas.util.jsonEncoder import EnumEncoder + +class test_SqsHandler(unittest.TestCase): + + def setUp(self): + print("Set it up!") + + self.config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', + 's3_message_adapter_metadata_type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'registry_base_url': 'http://localhost/onestop/api/registry', + 'registry_username': 'admin', + 'registry_password': 'whoknows', + 'onestop_base_url': 'http://localhost/onestop/api/search/search', + 'log_level': 'DEBUG' + } + + self.wp = WebPublisher(**self.config_dict) + self.s3_utils = S3Utils(**self.config_dict) + self.s3_message_adapter = S3MessageAdapter(**self.config_dict) + self.sqs_consumer = SqsConsumer(**self.config_dict) + + self.sqs_max_polls = 3 + self.region = 'us-east-2' + self.bucket = 'archive-testing-demo' + self.key = 'ABI-L1b-RadF/2019/298/15/OR_ABI-L1b-RadF-M6C15_G16_s20192981500369_e20192981510082_c20192981510166.nc' + + def tearDown(self): + print("Tear it down!") + + def mocked_search_response_data(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + print ("args: "+str(args)+" kwargs: "+str(kwargs)) + onestop_search_response = { + "data":[ + { + "attributes":{ + "serviceLinks":[ + + ], + "citeAsStatements":[ + + ], + "links":[ + { + "linkFunction":"download", + "linkUrl":"s3://archive-testing-demo-backup/public/NESDIS/CSB/csv/2019/12/01/20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv", + "linkName":"Amazon S3", + "linkProtocol":"Amazon:AWS:S3" + }, + { + "linkFunction":"download", + "linkUrl":"https://archive-testing-demo.s3-us-east-2.amazonaws.com/public/NESDIS/CSB/csv/2019/12/01/20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv", + "linkName":"Amazon S3", + "linkProtocol":"HTTPS" + } + ], + "internalParentIdentifier":"fdb56230-87f4-49f2-ab83-104cfd073177", + "filesize":63751, + "title":"20191201_08d5538c6f8dbefd7d82929623a34385_pointData.csv" + }, + "id":"77b11a1e-1b75-46e1-b7d6-99b5022ed113", + "type":"granule" + } + ], + "meta":{ + "took":1, + "total":6, + "exactCount":True + } + } + return MockResponse(onestop_search_response, 200) + + def mocked_search_response_data_empty(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + print ("args: "+str(args)+" kwargs: "+str(kwargs)) + onestop_search_response = { + "data":[], + "meta":{ + "took":1, + "total":6, + "exactCount":True + } + } + return MockResponse(onestop_search_response, 200) + + @mock_sqs + @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_happy(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, self.bucket, self.key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_called_once() + mock_wp.delete_registry.assert_called_once() + + @mock_sqs + @mock.patch('requests.get', side_effect=mocked_search_response_data_empty, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_data_empty_ends_cb(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, self.bucket, self.key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_called_once() + mock_wp.delete_registry.assert_not_called() + + @mock_sqs + @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_no_records_ends_cb(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps({"Message":'''{"Records":[]}'''}) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_not_called() + mock_wp.delete_registry.assert_not_called() + + @mock_sqs + @mock.patch('requests.get', side_effect=mocked_search_response_data, autospec=True) + @patch('onestop.WebPublisher') + def test_delete_handler_eventName_not_delete_ends_cb(self, mock_wp, mock_response): + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps({"Message":'''{"Records":[{"eventName":"Unknown"}]}'''}) + ) + + mock_wp.search_onestop.side_effect = mock_response + cb = create_delete_handler(mock_wp) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify search and delete called once. + mock_wp.search_onestop.assert_not_called() + mock_wp.delete_registry.assert_not_called() + + @mock_sqs + @patch('onestop.WebPublisher') + @patch('onestop.util.S3Utils') + def test_upload_handler_happy(self, mock_s3_utils, mock_wp): + bucket = self.bucket + key = self.key + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, bucket, key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + cb = create_upload_handler(mock_wp, mock_s3_utils, self.s3_message_adapter) + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify get uuid called + mock_s3_utils.connect.assert_called_with('resource', 's3', None) + mock_s3_utils.get_uuid_metadata.assert_called_with( + mock_s3_utils.connect(), + bucket, + key) + # Verify uuid not added + mock_s3_utils.add_uuid_metadata.assert_not_called() + # Verify publish called & transform called + mock_wp.publish_registry.assert_called_with( + 'granule', + mock_s3_utils.get_uuid_metadata(), + json.dumps(self.s3_message_adapter.transform(json.loads(message['Message'])['Records'][0]).to_dict(), cls=EnumEncoder), + 'POST' + ) + + @mock_sqs + @patch('onestop.WebPublisher') + @patch('onestop.util.S3Utils') + def test_upload_handler_adds_uuid(self, mock_s3_utils, mock_wp): + bucket = self.bucket + key = self.key + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, bucket, key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + mock_s3_utils.get_uuid_metadata.return_value = None + cb = create_upload_handler(mock_wp, mock_s3_utils, self.s3_message_adapter) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify add uuid called + mock_s3_utils.add_uuid_metadata.assert_called_with( + mock_s3_utils.connect(), + bucket, + key) + + @mock_sqs + @patch('onestop.WebPublisher') + @patch('onestop.util.S3Utils') + def test_upload_handler_bucket_as_backup_PATCH(self, mock_s3_utils, mock_wp): + bucket = "testing_backup_bucket" # backup in bucket means a PATCH should happen. + key = self.key + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.region) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + sqs_queue = sqs_resource.Queue(queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.region) + message = create_delete_message(self.region, bucket, key) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody=json.dumps(message) + ) + + cb = create_upload_handler(mock_wp, mock_s3_utils, self.s3_message_adapter) + + self.sqs_consumer.receive_messages(sqs_queue, 1, cb) + + # Verify publish called + mock_wp.publish_registry.assert_called_with( + 'granule', + mock_s3_utils.get_uuid_metadata(), + json.dumps(self.s3_message_adapter.transform(json.loads(message['Message'])['Records'][0]).to_dict(), cls=EnumEncoder), + 'PATCH' + ) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/test/unit/test_WebPublisher.py b/onestop-python-client/test/unit/test_WebPublisher.py new file mode 100644 index 0000000..af0802f --- /dev/null +++ b/onestop-python-client/test/unit/test_WebPublisher.py @@ -0,0 +1,145 @@ +import json +import unittest + +from unittest.mock import ANY +from unittest import mock +from moto import mock_s3 +from onestop.WebPublisher import WebPublisher + +class test_WebPublisher(unittest.TestCase): + username="admin" + password="a_password" + uuid = "9f0a5ff2-fcc0-5bcb-a225-024b669c9bba" + registry_base_url = "https://localhost/onestop/api/registry" + registry_full_url_granule = registry_base_url + "/metadata/granule/" + uuid + registry_full_url_collection = registry_base_url + "/metadata/collection/" + uuid + onestop_base_url = "https://localhost/onestop/api/search" + + payloadDict = { + "fileInformation": { + "name": "file2.csv", + "size": 1385, + "checksums": [{ + "algorithm": "MD5", + "value": "44d2452e8bc2c8013e9c673086fbab7a" + }] + }, + "relationships": [ + {"type": "COLLECTION", + "id": "fdb56230-87f4-49f2-ab83-104cfd073177" + } + ], + "fileLocations": { + "nesdis-ncei-csb-dev/csv/file2.csv": { + "uri": "https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com/csv/file2.csv", + "type": "ACCESS", + "restricted": False, + "serviceType": "HTTPS", + "asynchronous": False + } + }, + "discovery": { + "title": "file2.csv", + "parentIdentifier": "fdb56230-87f4-49f2-ab83-104cfd073177", + "fileIdentifier": "gov.noaa.ncei.csb:file2" + } + } + + addlocDict = { + "fileLocations": { + "Crt3a-Hq2SGUp8n8QSRNpFIf59kmMONqaKlJ_7-Igd8ijMM62deLdtVkiYwlaePbC4JNCsfeg5i-DWDmwxLIx9V-OGgiQp_CZ0rEFXIZxM_ZPyGu7TTv8wwos5SvAI6xDURhzoCH-w": { + "uri": "/282856304593/vaults/noaa-nesdis-ncei-vault-test/archives/Crt3a-Hq2SGUp8n8QSRNpFIf59kmMONqaKlJ_7-Igd8ijMM62deLdtVkiYwlaePbC4JNCsfeg5i-DWDmwxLIx9V-OGgiQp_CZ0rEFXIZxM_ZPyGu7TTv8wwos5SvAI6xDURhzoCH-w", + "type": "ACCESS", + "restricted": True, + "serviceType": "Amazon:AWS:Glacier", + "asynchronous": True + } + } + } + + + def setUp(self): + print("Set it up!") + + self.wp = WebPublisher(self.registry_base_url, + self.username, + self.password, + self.onestop_base_url, + 'DEBUG') + + def tearDown(self): + print("Tear it down!") + + def mocked_requests_patch(*args, **kwargs): + class MockResponse: + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + print ("args: "+str(args)+" kwargs: "+str(kwargs)) + + return MockResponse({"key1":"value1"}, 200) + + @mock_s3 + @mock.patch('requests.post', side_effect=mocked_requests_patch, autospec=True) + def test_publish(self, mock_get): + payload = json.dumps(self.payloadDict) + self.wp.publish_registry("granule", self.uuid, payload, "POST") + + mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.put', side_effect=mocked_requests_patch, autospec=True) + def test_publish(self, mock_get): + payload = json.dumps(self.payloadDict) + self.wp.publish_registry("granule", self.uuid, payload, "PUT") + + mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.patch', side_effect=mocked_requests_patch, autospec=True) + def test_add_glacier_location(self, mock_get): + payload = json.dumps(self.addlocDict) + self.wp.publish_registry("granule", self.uuid, payload, "PATCH") + + mock_get.assert_called_with(url = self.registry_full_url_granule, auth = ANY, data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), data = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = payload, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, data = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.delete', side_effect=mocked_requests_patch, autospec=True) + def test_delete_registry_granule(self, mock_get): + self.wp.delete_registry("granule", self.uuid) + + mock_get.assert_called_with(url = self.registry_full_url_granule, headers = ANY, auth = ANY, verify = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + + @mock_s3 + @mock.patch('requests.delete', side_effect=mocked_requests_patch, autospec=True) + def test_delete_registry_collection(self, mock_get): + self.wp.delete_registry("collection", self.uuid) + + mock_get.assert_called_with(url = self.registry_full_url_collection, headers = ANY, auth = ANY, verify = ANY) + mock_get.assert_called_with(url = ANY, auth = (self.username, self.password), verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = False, headers = ANY) + mock_get.assert_called_with(url = ANY, auth = ANY, verify = ANY, headers = {'Content-Type': 'application/json'}) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/test/unit/util/__init__.py b/onestop-python-client/test/unit/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/onestop-python-client/test/unit/util/test_S3MessageAdapter.py b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py new file mode 100644 index 0000000..8dee317 --- /dev/null +++ b/onestop-python-client/test/unit/util/test_S3MessageAdapter.py @@ -0,0 +1,78 @@ +import unittest + +from onestop.util.S3MessageAdapter import S3MessageAdapter + +class S3MessageAdapterTest(unittest.TestCase): + config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', + 's3_message_adapter_metadata_type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'log_level': 'DEBUG' + } + + record = { + 'eventVersion': '2.1', + 'eventSource': 'aws:s3', + 'awsRegion': 'us-east-1', + 'eventTime': '2020-11-10T00:44:20.642Z', + 'eventName': 'ObjectCreated:Put', + 'userIdentity': {'principalId': 'AWS:AIDAUDW4MV7I5RW5LQJIO'}, + 'requestParameters': {'sourceIPAddress': '65.113.158.185'}, + 'responseElements': {'x-amz-request-id': '7D394F43C682BB87', 'x-amz-id-2': 'k2Yn5BGg7DM5fIEAnwv5RloBFLYERjGRG3mT+JsPbdX033USr0eNObqkHiw3m3x+BQ17DD4C0ErB/VdhYt2Az01LJ4mQ/aqS'}, + 's3': { + 's3SchemaVersion': '1.0', + 'configurationId': 'csbS3notification', + 'bucket': { + 'name': 'nesdis-ncei-csb-dev', + 'ownerIdentity': { + 'principalId': 'A3PGJENIF5D10L' + }, + 'arn': 'arn:aws:s3:::nesdis-ncei-csb-dev' + }, + 'object': { + 'key': 'csv/file1.csv', 'size': 1385, + 'eTag': '44d2452e8bc2c8013e9c673086fbab7a', + 'versionId': 'q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf', + 'sequencer': '005FA9E26498815778' + } + } + } + + def test_init_metadata_type_valid(self): + publisher = S3MessageAdapter(**self.config_dict) + + self.assertEqual(publisher.metadata_type, self.config_dict['s3_message_adapter_metadata_type']) + + def test_init_metadata_type_invalid(self): + wrong_metadata_type_config = dict(self.config_dict) + wrong_metadata_type_config['s3_message_adapter_metadata_type'] = "invalid_type" + + self.assertRaises(ValueError, S3MessageAdapter, **wrong_metadata_type_config) + + def test_init_metadata_type_lowercase(self): + metadata_type = 'collection' + uppercase_metadata_type = metadata_type.upper() + config = dict(self.config_dict) + config['s3_message_adapter_metadata_type'] = metadata_type + + s3MA = S3MessageAdapter(**config) + + self.assertEqual(uppercase_metadata_type, s3MA.metadata_type) + + def test_init_extra_parameters_constructor(self): + test_params = dict(self.config_dict) + test_params['extra'] = 'extra value' + self.assertRaises(Exception, S3MessageAdapter(**test_params)) + + def test_transform_happy(self): + s3MA = S3MessageAdapter(**self.config_dict) + payload = s3MA.transform(self.record) + + self.assertIsNotNone(payload) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/test/unit/util/test_S3Utils.py b/onestop-python-client/test/unit/util/test_S3Utils.py new file mode 100644 index 0000000..830a1d8 --- /dev/null +++ b/onestop-python-client/test/unit/util/test_S3Utils.py @@ -0,0 +1,267 @@ +import csv +import unittest +import uuid +import json + +from unittest import mock +from moto import mock_s3, mock_sqs +from moto import mock_glacier +from test.utils import abspath_from_relative +from onestop.util.S3Utils import S3Utils +from boto.glacier.layer1 import Layer1 +from botocore.response import StreamingBody +from io import StringIO + +class S3UtilsTest(unittest.TestCase): + + def setUp(self): + print("Set it up!") + + config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 'access_bucket': 'https://archive-testing-demo.s3-us-east-2.amazonaws.com', + 'metadata_type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'log_level': 'DEBUG' + } + + self.s3_utils = S3Utils(**config_dict) + + self.region = 'us-east-2' + self.region2 = 'eu-north-1' + self.bucket = 'archive-testing-demo' + + @mock_sqs + def test_connect_session(self): + session = self.s3_utils.connect('Session', None, self.region) + + # No exception is called for unique method call + session.client('sqs') + session.resource('s3') + + @mock_sqs + def test_connect_client(self): + client = self.s3_utils.connect('Client', 'sqs', self.region) + + # No exception is called for unique method call + client.list_queues() + + @mock_sqs + def test_connect_resource(self): + resource = self.s3_utils.connect('Resource', 'sqs', self.region) + + # No exception is called for unique method call + resource.Queue(url='test') + + @mock_sqs + def test_connect_exception_for_invalid_connection_type(self): + with self.assertRaises(Exception): + self.s3_utils.connect('junk', 'sqs', self.region) + + @mock_s3 + def test_get_uuid_metadata(self): + boto_client = self.s3_utils.connect('resource', 's3', None) + s3_key = "csv/file1.csv" + + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + obj_uuid = str(uuid.uuid4()) + boto_client.Object(self.bucket, s3_key).put(Bucket=self.bucket, Key=s3_key, Body="my_body", Metadata={'object-uuid': obj_uuid}) + + self.assertFalse(self.s3_utils.get_uuid_metadata(boto_client, self.bucket, s3_key) == None) + + @mock_s3 + def test_add_uuid_metadata(self): + boto_client = self.s3_utils.connect('resource', 's3', self.region) + + s3_key = "csv/file1.csv" + + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + boto_client.Object(self.bucket, s3_key).put(Bucket=self.bucket, Key=s3_key, Body="my_body") + + self.assertTrue(self.s3_utils.add_uuid_metadata(boto_client, self.bucket, s3_key)) + + @mock_s3 + def test_add_file_s3_overwrite(self): + boto_client = self.s3_utils.connect('client', 's3', None) + local_file = abspath_from_relative(__file__, "../../data/file4.csv") + s3_key = "csv/file4.csv" + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_key, True)) + + @mock_s3 + def test_add_file_s3_nooverwrite(self): + boto_client = self.s3_utils.connect('client', 's3', None) + local_file = abspath_from_relative(__file__, "../../data/file4.csv") + s3_key = "csv/file4.csv" + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_key, False)) + + @mock_s3 + def test_get_csv_s3(self): + boto_session = self.s3_utils.connect('session', None, self.region) + s3 = self.s3_utils.connect('client', 's3', self.region) + location = {'LocationConstraint': self.region} + s3_key = "csv/file1.csv" + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.put_object(Bucket=self.bucket, Key=s3_key, Body="body") + + sm_open_file = self.s3_utils.get_csv_s3(boto_session, self.bucket, s3_key) + + # print("reading csv:" + line.decode('utf-8')) + csv_reader = csv.DictReader(sm_open_file) + for row in csv_reader: + print(str(row["LON"])) + + @mock_s3 + def test_read_bytes_s3(self): + boto_client = self.s3_utils.connect('client', 's3', None) + s3_key = "csv/file1.csv" + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration={'LocationConstraint': self.region}) + boto_client.put_object(Bucket=self.bucket, Key=s3_key, Body="body") + + self.assertTrue(self.s3_utils.read_bytes_s3(boto_client, self.bucket, s3_key)) + + @mock_s3 + def test_add_files(self): + boto_client = self.s3_utils.connect('client', 's3', None) + local_files = ["file1_s3.csv", "file2.csv", "file3.csv"] + location = {'LocationConstraint': self.region} + boto_client.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + + for file in local_files: + local_file = abspath_from_relative(__file__, "../../data/" + file) + s3_file = "csv/" + file + self.assertTrue(self.s3_utils.upload_s3(boto_client, local_file, self.bucket, s3_file, True)) + + @mock_s3 + @mock_glacier + def test_s3_cross_region(self): + print('Cross Region Vault Upload ------------- ') + key = "csv/file1.csv" + + # makes connection to low level s3 client + s3 = self.s3_utils.connect('client', 's3', self.region) + location = {'LocationConstraint': self.region} + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.put_object(Bucket=self.bucket, Key=key, Body="body") + + # Reads object data and stores it into a variable + file_data = self.s3_utils.read_bytes_s3(s3, self.bucket, key) + + # Redirecting upload to vault in second region + glacier = self.s3_utils.connect('client', 'glacier', self.region2) + vault_name = 'archive-vault-new' + glacier.create_vault(vaultName=vault_name) + print('vault name: ' + str(vault_name)) + print('region name: ' + str(self.region2)) + print('-------file data---------') + print(file_data) + response = self.s3_utils.upload_archive(glacier, vault_name, file_data) + + self.assertTrue(response['archiveId']!=None) + + @mock_s3 + @mock_glacier + def test_s3_to_glacier(self): + """ + Changes the storage class of an object from S3 to Glacier + Requires the configure and credential locations as parameters as well as the key of the object + """ + + print("S3 to Glacier---------") + key = "csv/file1_s3.csv" + + # Create boto3 low level api connection + s3 = self.s3_utils.connect('client', 's3', self.region) + location = {'LocationConstraint': self.region} + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.put_object(Bucket=self.bucket, Key=key, Body="body") + + # Using the S3 util class invoke the change of storage class + response = self.s3_utils.s3_to_glacier(s3, self.bucket, key) + print(response['ResponseMetadata']['HTTPHeaders']['x-amz-storage-class']) + # Assert 'x-amz-storage-class': 'GLACIER' + + self.assertTrue(response['ResponseMetadata']['HTTPHeaders']['x-amz-storage-class'] == "GLACIER") + + @mock_s3 + def test_s3_restore(self): + """ + Uses high level api to restore object from glacier to s3 + """ + + key = "csv/file1_s3.csv" + days = 3 + + # use high level api + s3 = self.s3_utils.connect('resource', 's3' , self.region2) + location = {'LocationConstraint': self.region2} + s3.create_bucket(Bucket=self.bucket, CreateBucketConfiguration=location) + s3.Object(self.bucket, key).put(Bucket=self.bucket, Key=key, Body="body") + + self.assertTrue(self.s3_utils.s3_restore(s3, self.bucket, key, days) != None) + + @mock_glacier + def test_retrieve_inventory(self): + """ + Initiates job for archive retrieval. Takes 3-5 hours to complete if not mocked. + """ + + # Using glacier api initiates job and returns archive results + # Connect to your glacier vault for retrieval + glacier = self.s3_utils.connect('client', 'glacier', self.region2) + vault_name = 'archive-vault-new' + glacier.create_vault(vaultName=vault_name) + + response = self.s3_utils.retrieve_inventory(glacier, vault_name) + print('jobid %s'%response['jobId']) + self.assertTrue(response['jobId'] != None) + + @mock_glacier + @mock_s3 + def test_retrieve_inventory_results(self): + """ + Once the job has been completed, use the job id to retrieve archive results + """ + + # Connect to your glacier vault for retrieval + glacier = mock.Mock(spec=Layer1)#self.s3_utils.connect('client', 'glacier', self.region) + vault_name = 'archive-vault-new' + glacier.create_vault(vaultName=vault_name) + + body_json = {'Body': [{'test':'value'}]} + body_encoded = json.dumps(body_json)#.encode("utf-16") + + body = StreamingBody( + StringIO(str(body_encoded)), + len(str(body_encoded)) + ) + + mocked_response = { + 'body': body + } + glacier.get_job_output.return_value = mocked_response + with mock.patch('boto.glacier.job.tree_hash_from_str') as t: + t.return_value = 'tree_hash' + inventory = self.s3_utils.retrieve_inventory_results(vault_name, glacier, 'ASDF78') + + self.assertEqual(body_json, inventory) + + @mock_s3 + def test_extra_parameters_constructor(self): + testParams = {"access_key": "blah", + "secret_key": "blah", + "log_level": "DEBUG", + "extra": "extra value"} + self.assertRaises(Exception, S3Utils(**testParams)) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/test/unit/util/test_SqsConsumer.py b/onestop-python-client/test/unit/util/test_SqsConsumer.py new file mode 100644 index 0000000..e3dee71 --- /dev/null +++ b/onestop-python-client/test/unit/util/test_SqsConsumer.py @@ -0,0 +1,178 @@ +import unittest +import json + +from moto import mock_sqs +from unittest.mock import MagicMock, ANY +from onestop.util.S3Utils import S3Utils +from onestop.util.SqsConsumer import SqsConsumer + +class SqsConsumerTest(unittest.TestCase): + config_dict = { + 'access_key': 'test_access_key', + 'secret_key': 'test_secret_key', + 's3_region': 'us-east-2', + 's3_bucket': 'archive-testing-demo', + 'sqs_url': 'https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs', + 'metadata_type': 'COLLECTION', + 'file_id_prefix': 'gov.noaa.ncei.csb:', + 'collection_id': 'fdb56230-87f4-49f2-ab83-104cfd073177', + 'registry_base_url': 'http://localhost/onestop/api/registry', + 'registry_username': 'admin', + 'registry_password': 'whoknows', + 'onestop_base_url': 'http://localhost/onestop/api/search/search', + 'log_level': 'DEBUG' + } + + records = [{"eventVersion":"2.1"}] + message = json.dumps( + {"Type": "Notification", + "MessageId": "9d0691d2-ae9c-58f9-a9f4-c8dcf05d87be", + "TopicArn": "arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1", + "Subject": "Amazon S3 Notification", + "Message": json.dumps({"Records": records}), + "Timestamp": "2021-05-06T21:15:45.427Z", + "SignatureVersion": "1", + "Signature": "Ui5s4uVgcMr5fjGmePCMgmi14Dx9oS8hIpjXXiQo+xZPgsHkUayz7dEeGmMGGt45l8blmZTZEbxJG+HVGfIUmQGRqoimwiLm+mIAaNIN/BV76FVFcQUIkORX8gYN0a4RS3HU8/ElrKFK8Iz0zpxJdjwxa3xPCDwu+dTotiLTJxSouvg8MmkkDnq758a8vZ9WK2PaOlZiZ3m8Mv2ZvLrozZ/DAAz48HSad6Mymhit82RpGCUxy4SDwXVlP/nLB01AS11Gp2HowJR8NXyStrZYzzQEc+PebITaExyikgTMiVhRHkmb7JrtZPpgZu2daQsSooqpwyIzb6pvgwu9W54jkw==", + "SigningCertURL": "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem", + "UnsubscribeURL": "https://sns.us-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1:e7a9a9f5-792e-48a6-9ec8-40f7f5a8f600" + }) + + message_wo_records = json.dumps( + {"Type": "Notification", + "MessageId": "9d0691d2-ae9c-58f9-a9f4-c8dcf05d87be", + "TopicArn": "arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1", + "Subject": "Amazon S3 Notification", + "Message": "{}", + "Timestamp": "2021-05-06T21:15:45.427Z", + "SignatureVersion": "1", + "Signature": "Ui5s4uVgcMr5fjGmePCMgmi14Dx9oS8hIpjXXiQo+xZPgsHkUayz7dEeGmMGGt45l8blmZTZEbxJG+HVGfIUmQGRqoimwiLm+mIAaNIN/BV76FVFcQUIkORX8gYN0a4RS3HU8/ElrKFK8Iz0zpxJdjwxa3xPCDwu+dTotiLTJxSouvg8MmkkDnq758a8vZ9WK2PaOlZiZ3m8Mv2ZvLrozZ/DAAz48HSad6Mymhit82RpGCUxy4SDwXVlP/nLB01AS11Gp2HowJR8NXyStrZYzzQEc+PebITaExyikgTMiVhRHkmb7JrtZPpgZu2daQsSooqpwyIzb6pvgwu9W54jkw==", + "SigningCertURL": "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem", + "UnsubscribeURL": "https://sns.us-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-1:798276211865:archive-testing-demo-backup-use-1:e7a9a9f5-792e-48a6-9ec8-40f7f5a8f600" + }) + + @mock_sqs + def setUp(self): + print("Set it up!") + + self.s3_utils = S3Utils(**self.config_dict) + self.sqs_consumer = SqsConsumer(**self.config_dict) + + def tearDown(self): + print("Tear it down!") + + @mock_sqs + def test_connect(self): + queue_name = 'test' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) + expQueue = sqs_resource.create_queue(QueueName=queue_name) + queue = self.sqs_consumer.connect(sqs_resource, queue_name) + + self.assertEqual(expQueue.url, queue.url) + + # Kind of pointless since we catch every exception this doesn't fail when it should.... + @mock_sqs + def test_receive_messages_no_records(self): + mock_cb = MagicMock() + + # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + + # Send a test message lacking Records field + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody= self.message_wo_records + ) + queue = sqs_resource.Queue(queue_name) + + self.sqs_consumer.receive_messages(queue, 1, mock_cb) + + # Verify callback function was called once with expected message attributes + mock_cb.assert_not_called() + + @mock_sqs + def test_receive_messages_fails_invalid_sqs_max_polls(self): + with self.assertRaises(ValueError): + self.sqs_consumer.receive_messages(MagicMock(), 0, MagicMock()) + + @mock_sqs + def test_receive_messages_polls_msgs_expected_times(self): + mock_cb = MagicMock() + queue = MagicMock() + + sqs_max_polls = 2 + self.sqs_consumer.receive_messages(queue, sqs_max_polls, mock_cb) + + # Verify polling called expected times + self.assertEqual(queue.receive_messages.call_count, sqs_max_polls) + + @mock_sqs + def test_receive_messages_callback_occurs(self): + mock_cb = MagicMock() + + # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) + sqs_queue_url = sqs_resource.create_queue(QueueName=queue_name).url + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) + sqs_client.send_message( + QueueUrl=sqs_queue_url, + MessageBody= self.message + ) + queue = sqs_resource.Queue(queue_name) + + self.sqs_consumer.receive_messages(queue, 1, mock_cb) + + # Verify callback function was called once with expected message attributes + mock_cb.assert_called_with(self.records[0], ANY) + + @mock_sqs + def test_happy_path(self): + mock_cb = MagicMock() + + # Create the mock queue beforehand and set SqsConsumer's 'sqs_url' to the mock's URL + queue_name = 'test_queue' + sqs_resource = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) + queue = self.sqs_consumer.connect(sqs_resource, queue_name) #sqs_resource.create_queue(QueueName=queue_name) + + # Send a test message + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) + sqs_client.send_message( + QueueUrl=queue.url, + MessageBody= self.message + ) + + self.sqs_consumer.receive_messages(queue, 1, mock_cb) + + # Verify callback function was called once with expected message attributes + mock_cb.assert_called_with(self.records[0], ANY) + + # An example using external send/receive methods + @unittest.skip + @mock_sqs + def test_write_message_valid(self): + "Test the write_message method with a valid message" + sqs_client = self.s3_utils.connect('client', 'sqs' , self.config_dict['s3_region']) + sqs = self.s3_utils.connect('resource', 'sqs', self.config_dict['s3_region']) + queue = sqs.create_queue(QueueName='test-skype-sender') + self.sqs_consumer.sqs_url = queue.url + skype_message = 'Testing with a valid message' + channel = 'test' + expected_message = str({'msg':f'{skype_message}', 'channel':channel}) + message = str({'msg':f'{skype_message}', 'channel':channel}) + queue.send_message(MessageBody=(message)) + + sqs_messages = queue.receive_messages() + print('Message: %s'%sqs_messages) + print('Message0: %s'%sqs_messages[0]) + assert sqs_messages[0].body == expected_message, 'Message in skype-sender does not match expected' + print(f'The message in skype-sender SQS matches what we sent') + assert len(sqs_messages) == 1, 'Expected exactly one message in SQS' + print(f'\nExactly one message in skype-sender SQS') + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/onestop-python-client/tests/utils.py b/onestop-python-client/test/utils.py similarity index 83% rename from onestop-python-client/tests/utils.py rename to onestop-python-client/test/utils.py index 2f1e6d5..fc124fb 100644 --- a/onestop-python-client/tests/utils.py +++ b/onestop-python-client/test/utils.py @@ -15,7 +15,8 @@ def create_delete_message(region, bucket, key): "Message": '''{ "Records": [{ "eventVersion": "2.1", "eventSource": "aws:s3", "awsRegion": "''' + region + '''", - "eventTime": "2020-12-14T20:56:08.725Z", "eventName": "ObjectRemoved:Delete", + "eventTime": "2020-12-14T20:56:08.725Z", + "eventName": "ObjectRemoved:Delete", "userIdentity": {"principalId": "AX8TWPQYA8JEM"}, "requestParameters": {"sourceIPAddress": "65.113.158.185"}, "responseElements": {"x-amz-request-id": "D8059E6A1D53597A", @@ -25,7 +26,11 @@ def create_delete_message(region, bucket, key): "bucket": {"name": "''' + bucket + '''", "ownerIdentity": {"principalId": "AX8TWPQYA8JEM"}, "arn": "arn:aws:s3:::''' + bucket + '''"}, - "object": {"key": "''' + key + '''", "sequencer": "005FD7D1765F04D8BE"} + "object": {"key": "''' + key + '''", + "sequencer": "005FD7D1765F04D8BE", + "eTag": "44d2452e8bc2c8013e9c673086fbab7a", + "size": 1385, + "versionId": "q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf"} } }] }''', diff --git a/onestop-python-client/tests/KafkaPublisherTest.py b/onestop-python-client/tests/KafkaPublisherTest.py deleted file mode 100644 index 7d992ae..0000000 --- a/onestop-python-client/tests/KafkaPublisherTest.py +++ /dev/null @@ -1,25 +0,0 @@ -import unittest - -import json - -from onestop.KafkaPublisher import KafkaPublisher - -class KafkaPublisherTest(unittest.TestCase): - kp = None - - def setUp(self): - print("Set it up!") - self.kp = KafkaPublisher("../config/kafka-publisher-config-dev.yml") - - def tearDown(self): - print("Tear it down!") - - def test_parse_config(self): - self.assertFalse(self.kp.conf['brokers']==None) - - def test_publish_collection(self): - print("Publish collection") - # Integration test TBD - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/onestop-python-client/tests/SqsHandlersTest.py b/onestop-python-client/tests/SqsHandlersTest.py deleted file mode 100644 index 12323ef..0000000 --- a/onestop-python-client/tests/SqsHandlersTest.py +++ /dev/null @@ -1,89 +0,0 @@ -import json -import unittest -import boto3 - -from moto import mock_s3 -from moto import mock_sqs -from tests.utils import abspath_from_relative, create_delete_message -from onestop.WebPublisher import WebPublisher -from onestop.util.S3Utils import S3Utils -from onestop.util.S3MessageAdapter import S3MessageAdapter -from onestop.util.SqsConsumer import SqsConsumer -from onestop.util.SqsHandlers import create_delete_handler - - -class SqsHandlerTest(unittest.TestCase): - wp = None - su = None - s3ma = None - sqs = None - wp_config = abspath_from_relative(__file__, "../config/web-publisher-config-local.yml") - aws_config = abspath_from_relative(__file__, "../config/aws-util-config-dev.yml") - cred_config = abspath_from_relative(__file__, "../config/credentials-template.yml") - csb_config = abspath_from_relative(__file__, "../config/csb-data-stream-config.yml") - - collection_uuid = '5b58de08-afef-49fb-99a1-9c5d5c003bde' - payloadDict = { - "fileInformation": { - "name": "OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc", - "size": 30551050, - "checksums": [{ - "algorithm": "SHA1", - "value": "bf4c5b58f8d5f9445f7b277f988e5861184f775a" - }], - "format": "NetCDF" - }, - "relationships": [{ - "type": "COLLECTION", - "id": collection_uuid - }], - "fileLocations": { - "s3://noaa-goes16/ABI-L1b-RadF/2019/298/17/OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc": { - "uri": "s3://noaa-goes16/ABI-L1b-RadF/2019/298/17/OR_ABI-L1b-RadF-M6C13_G16_s20192981730367_e20192981740087_c20192981740157.nc", - "type": "ACCESS", - "deleted": "false", - "restricted": "false", - "asynchronous": "false", - "locality": "us-east-2", - "lastModified": 1572025823000, - "serviceType": "Amazon:AWS:S3", - "optionalAttributes": {} - } - } - } - - def setUp(self): - print("Set it up!") - self.wp = WebPublisher(self.wp_config, self.cred_config) - self.su = S3Utils(self.aws_config, self.cred_config) - self.s3ma = S3MessageAdapter(self.csb_config, self.su) - - def tearDown(self): - print("Tear it down!") - - @mock_s3 - @mock_sqs - def init_s3(self): - bucket = self.su.conf['s3_bucket'] - key = self.su.conf['s3_key'] - boto_client = self.su.connect("s3", None) - boto_client.create_bucket(Bucket=bucket) - boto_client.put_object(Bucket=bucket, Key=key, Body="foobar") - - sqs_client = boto3.client('sqs', region_name=self.su.conf['s3_region']) - sqs_queue = sqs_client.create_queue(QueueName=self.su.conf['sqs_name']) - self.sqs = SqsConsumer(self.aws_config, self.cred_config) - message = create_delete_message(self.su.conf['s3_region'], bucket, key) - sqs_client.send_message(QueueUrl=sqs_queue['QueueUrl'], MessageBody=json.dumps(message)) - return sqs_queue['QueueUrl'] - - def delete_handler_wrapper(self, recs): - handler = create_delete_handler(self.wp) - result = handler(recs) - self.assertTrue(result) - - @mock_sqs - def test_delete_handler(self): - mock_queue_url = self.init_s3() - sqs_queue = boto3.resource('sqs', region_name=self.su.conf['s3_region']).Queue(mock_queue_url) - self.sqs.receive_messages(sqs_queue, self.su.conf['sqs_max_polls'], self.delete_handler_wrapper) diff --git a/onestop-python-client/tests/extractor/CsbExtractorTest.py b/onestop-python-client/tests/extractor/CsbExtractorTest.py deleted file mode 100644 index 7dbbc9e..0000000 --- a/onestop-python-client/tests/extractor/CsbExtractorTest.py +++ /dev/null @@ -1,95 +0,0 @@ -import unittest -from onestop.extract.CsbExtractor import CsbExtractor -from onestop.util.S3Utils import S3Utils -from tests.utils import abspath_from_relative - - -class CsbExtractorTest(unittest.TestCase): - - # def setUp(self): - # print("Set it up!") - # file_name = '../data/file4.csv' - # self.csb_extractor = CsbExtractor(file_name) - - def setUp(self): - print("Set it up!") - key = "public/NESDIS/CSB/file4.csv" - self.su = S3Utils( abspath_from_relative( __file__, "../../config/aws-util-config-dev.yml" ), - abspath_from_relative(__file__, "../../config/credentials.yml") ) - self.csb_extractor = CsbExtractor(self.su, key) - - def tearDown(self): - print("Tear it down!") - - def test_is_csv(self): - csv_str = '.csv' - self.assertTrue(self.csb_extractor.is_csv(self.csb_extractor.file_name)) - - - def test_get_geospatial_temporal_bounds(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') - coords = bounds_dict["geospatial"] - print(str(coords)) - self.assertEqual(coords[0], -96.847995) - self.assertEqual(coords[1], 29.373065) - self.assertEqual(coords[2], -92.747995) - self.assertEqual(coords[3], 33.373065) - - date_rng = bounds_dict["temporal"] - self.assertEqual(date_rng[0], '2018-04-10T14:00:06.000Z' ) - self.assertEqual(date_rng[1], '2020-04-10T14:00:06.000Z' ) - - - def test_get_min_lon(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') - coords = bounds_dict["geospatial"] - min_lon = coords[0] - self.assertEqual(min_lon, -96.847995) - - - def test_get_max_datetime(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') - date_rng = bounds_dict["temporal"] - end_date = date_rng[1] - self.assertEqual(end_date, '2020-04-10T14:00:06.000Z') - - - def test_get_min_datetime(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') - date_rng = bounds_dict["temporal"] - begin_date = date_rng[0] - self.assertEqual(begin_date, '2018-04-10T14:00:06.000Z') - - - def test_extract_coords(self): - bounds_dict = self.csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') - coords = bounds_dict["geospatial"] - - min_lon = coords[0] - min_lat = coords[1] - max_lon = coords[2] - max_lat = coords[3] - - coords = self.csb_extractor.extract_coords(max_lon, max_lat, min_lon, min_lat) - result = [[ - -94.847995, - 29.373065 - ], - [ - -96.847995, - 29.373065 - ], - [ - -94.847995, - 33.373065 - ], - [ - -92.747995, - 29.383065 - ] - ] - self.assertEqual(coords, result) - - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/onestop-python-client/tests/util/IntegrationTest.py b/onestop-python-client/tests/util/IntegrationTest.py deleted file mode 100644 index 381e4d7..0000000 --- a/onestop-python-client/tests/util/IntegrationTest.py +++ /dev/null @@ -1 +0,0 @@ -#TBD \ No newline at end of file diff --git a/onestop-python-client/tests/util/S3MessageAdapterTest.py b/onestop-python-client/tests/util/S3MessageAdapterTest.py deleted file mode 100644 index 41a8f9d..0000000 --- a/onestop-python-client/tests/util/S3MessageAdapterTest.py +++ /dev/null @@ -1,84 +0,0 @@ -import unittest -from moto import mock_s3 -from tests.utils import abspath_from_relative -from onestop.util.S3Utils import S3Utils -from onestop.util.S3MessageAdapter import S3MessageAdapter - -class S3MessageAdapterTest(unittest.TestCase): - s3ma = None - - recs1 = \ - [{ - 'eventVersion': '2.1', - 'eventSource': 'aws:s3', - 'awsRegion': 'us-east-1', - 'eventTime': '2020-11-10T00:44:20.642Z', - 'eventName': 'ObjectCreated:Put', - 'userIdentity': {'principalId': 'AWS:AIDAUDW4MV7I5RW5LQJIO'}, - 'requestParameters': {'sourceIPAddress': '65.113.158.185'}, - 'responseElements': {'x-amz-request-id': '7D394F43C682BB87', 'x-amz-id-2': 'k2Yn5BGg7DM5fIEAnwv5RloBFLYERjGRG3mT+JsPbdX033USr0eNObqkHiw3m3x+BQ17DD4C0ErB/VdhYt2Az01LJ4mQ/aqS'}, - 's3': {'s3SchemaVersion': '1.0', 'configurationId': 'csbS3notification', - 'bucket': {'name': 'nesdis-ncei-csb-dev', - 'ownerIdentity': {'principalId': 'A3PGJENIF5D10L'}, - 'arn': 'arn:aws:s3:::nesdis-ncei-csb-dev'}, - 'object': {'key': 'csv/file1.csv', 'size': 1385, - 'eTag': '44d2452e8bc2c8013e9c673086fbab7a', - 'versionId': 'q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf', - 'sequencer': '005FA9E26498815778'} - } - }] - - recs2 = \ - [{ - 'eventVersion': '2.1', - 'eventSource': 'aws:s3', - 'awsRegion': 'us-east-1', - 'eventTime': '2020-11-10T00:44:20.642Z', - 'eventName': 'ObjectCreated:Put', - 'userIdentity': {'principalId': 'AWS:AIDAUDW4MV7I5RW5LQJIO'}, - 'requestParameters': {'sourceIPAddress': '65.113.158.185'}, - 'responseElements': {'x-amz-request-id': '7D394F43C682BB87', 'x-amz-id-2': 'k2Yn5BGg7DM5fIEAnwv5RloBFLYERjGRG3mT+JsPbdX033USr0eNObqkHiw3m3x+BQ17DD4C0ErB/VdhYt2Az01LJ4mQ/aqS'}, - 's3': {'s3SchemaVersion': '1.0', 'configurationId': 'csbS3notification', - 'bucket': {'name': 'nesdis-ncei-csb-dev', - 'ownerIdentity': {'principalId': 'A3PGJENIF5D10L'}, - 'arn': 'arn:aws:s3:::nesdis-ncei-csb-dev'}, - 'object': {'key': 'csv/file2.csv', 'size': 1386, - 'eTag': '44d2452e8bc2c8013e9c673086fbab7a', - 'versionId': 'q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf', - 'sequencer': '005FA9E26498815778'} - } - }] - - def setUp(self): - print("Set it up!") - self.s3_utils = S3Utils(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"), - abspath_from_relative(__file__, "../../config/credentials-template.yml")) - self.s3ma = S3MessageAdapter(abspath_from_relative(__file__, "../../config/csb-data-stream-config-template.yml"), - self.s3_utils) - - def tearDown(self): - print("Tear it down!") - - def test_parse_config(self): - self.assertFalse(self.s3ma.conf['collection_id']==None) - - - @mock_s3 - def test_transform(self): - s3 = self.s3_utils.connect('s3', self.s3_utils.conf['s3_region']) - location = {'LocationConstraint': self.s3_utils.conf['s3_region']} - bucket = 'nesdis-ncei-csb-dev' - key = 'csv/file1.csv' - key2 = 'csv/file2.csv' - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.put_object(Bucket=bucket, Key=key, Body="body") - s3.put_object(Bucket=bucket, Key=key2, Body="body") - - payload = self.s3ma.transform(self.recs1) - print(payload) - - payload = self.s3ma.transform(self.recs2) - print(payload) - self.assertTrue(payload!=None) - - diff --git a/onestop-python-client/tests/util/S3UtilsTest.py b/onestop-python-client/tests/util/S3UtilsTest.py deleted file mode 100644 index 34850ad..0000000 --- a/onestop-python-client/tests/util/S3UtilsTest.py +++ /dev/null @@ -1,209 +0,0 @@ -import csv -import unittest -import uuid -from moto import mock_s3 -from moto import mock_glacier - -from tests.utils import abspath_from_relative -from onestop.util.S3Utils import S3Utils - -class S3UtilsTest(unittest.TestCase): - su = None - - def setUp(self): - print("Set it up!") - self.su = S3Utils(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"), - abspath_from_relative(__file__, "../../config/credentials.yml")) - - def tearDown(self): - print("Tear it down!") - # Remove files from bucket - - def test_parse_config(self): - self.assertFalse(self.su.conf['sqs_url']==None) - - @mock_s3 - def test_get_uuid_metadata(self): - boto_client = self.su.connect("s3_resource", None) - s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - region = self.su.conf['s3_region'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - obj_uuid = str(uuid.uuid4()) - boto_client.Object(bucket, s3_key).put(Bucket=bucket, Key=s3_key, Body="my_body", Metadata={'object-uuid': obj_uuid}) - - self.assertFalse(self.su.get_uuid_metadata(boto_client, bucket, s3_key) == None) - - @mock_s3 - def test_add_uuid_metadata(self): - region = self.su.conf['s3_region'] - boto_client = self.su.connect("s3_resource", region) - - s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - boto_client.Object(bucket, s3_key).put(Bucket=bucket, Key=s3_key, Body="my_body") - - self.assertTrue(self.su.add_uuid_metadata(boto_client, bucket, s3_key)) - - @mock_s3 - def test_add_file_s3(self): - boto_client = self.su.connect("s3", None) - local_file = abspath_from_relative(__file__, "../data/file4.csv") - s3_key = "csv/file4.csv" - bucket = self.su.conf['s3_bucket'] - region = self.su.conf['s3_region'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - overwrite = True - - self.assertTrue(self.su.upload_s3(boto_client, local_file, bucket, s3_key, overwrite)) - - def test_get_csv_s3(self): - boto_client = self.su.connect("session", None) - s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - sm_open_file = self.su.get_csv_s3(boto_client, bucket, s3_key) - - # print("reading csv:" + line.decode('utf-8')) - csv_reader = csv.DictReader(sm_open_file) - for row in csv_reader: - print(str(row["LON"])) - - def test_read_bytes_s3(self): - boto_client = self.su.connect("s3", None) - s3_key = "csv/file1.csv" - bucket = self.su.conf['s3_bucket'] - self.assertTrue(self.su.read_bytes_s3(boto_client, bucket, s3_key)) - - @mock_s3 - def test_add_files(self): - boto_client = self.su.connect("s3", None) - local_files = ["file1_s3.csv", "file2.csv", "file3.csv"] - bucket = self.su.conf['s3_bucket'] - region = self.su.conf['s3_region'] - location = {'LocationConstraint': region} - boto_client.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - overwrite = True - s3_file = None - for file in local_files: - local_file = abspath_from_relative(__file__, "../data/" + file) - s3_file = "csv/" + file - self.assertTrue(self.su.upload_s3(boto_client, local_file, bucket, s3_file, overwrite)) - - @mock_s3 - @mock_glacier - def test_s3_cross_region(self): - print('Cross Region Vault Upload ------------- ') - key = "csv/file1.csv" - # grabs te region and bucket name from the config file - region = self.su.conf['s3_region'] - bucket = self.su.conf['s3_bucket'] - - # makes connection to low level s3 client - s3 = self.su.connect('s3', region) - location = {'LocationConstraint': region} - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.put_object(Bucket=bucket, Key=key, Body="body") - - # Reads object data and stores it into a variable - file_data = self.su.read_bytes_s3(s3, bucket, key) - - # Redirecting upload to vault in second region - glacier = self.su.connect("glacier", self.su.conf['s3_region2']) - vault_name = self.su.conf['vault_name'] - glacier.create_vault(vaultName=vault_name) - print('vault name: ' + str(vault_name)) - print('region name: ' + str(self.su.conf['s3_region2'])) - print('-------file data---------') - print(file_data) - response = self.su.upload_archive(glacier, vault_name, file_data) - - self.assertTrue(response['archiveId']!=None) - - @mock_s3 - @mock_glacier - def test_s3_to_glacier(self): - """ - Changes the storage class of an object from S3 to Glacier - Requires the configure and credential locations as parameters as well as the key of the object - """ - - print("S3 to Glacier---------") - key = "csv/file1_s3.csv" - # grabs te region and bucket name from the config file - region = self.su.conf['s3_region'] - bucket = self.su.conf['s3_bucket'] - - # Create boto3 low level api connection - s3 = self.su.connect('s3', region) - location = {'LocationConstraint': region} - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.put_object(Bucket=bucket, Key=key, Body="body") - - # Using the S3 util class invoke the change of storage class - response = self.su.s3_to_glacier(s3, bucket, key) - print(response['ResponseMetadata']['HTTPHeaders']['x-amz-storage-class']) - # Assert 'x-amz-storage-class': 'GLACIER' - - self.assertTrue(response['ResponseMetadata']['HTTPHeaders']['x-amz-storage-class'] == "GLACIER") - - @mock_s3 - def test_s3_restore(self): - """ - Uses high level api to restore object from glacier to s3 - """ - - region = self.su.conf['s3_region2'] - bucket = self.su.conf['s3_bucket'] - key = "csv/file1_s3.csv" - days = 3 - - # use high level api - s3 = self.su.connect('s3_resource', region) - location = {'LocationConstraint': region} - s3.create_bucket(Bucket=bucket, CreateBucketConfiguration=location) - s3.Object(bucket, key).put(Bucket=bucket, Key=key, Body="body") - - self.assertTrue(self.su.s3_restore(s3, bucket, key, days) != None) - - @mock_glacier - def test_retrieve_inventory(self): - """ - Initiates job for archive retrieval. Takes 3-5 hours to complete - """ - - # Using glacier api initiates job and returns archive results - # Connect to your glacier vault for retrieval - glacier = self.su.connect("glacier", self.su.conf['s3_region2']) - vault_name = self.su.conf['vault_name'] - glacier.create_vault(vaultName=vault_name) - - - response = self.su.retrieve_inventory(glacier, vault_name) - self.assertTrue(response['jobId']!= None) - - ''' - Excluding for now because it's an asynchronous test - def test_retrieve_inventory_results(self, jobid): - """ - Once the job has been completed, use the job id to retrieve archive results - """ - - # Connect to your glacier vault for retrieval - glacier = self.su.connect("glacier", self.su.conf['region']) - vault_name = self.su.conf['vault_name'] - - # Retrieve the job results - inventory = self.su.retrieve_inventory_results(vault_name, glacier, jobid) - - self.assertTrue(inventory != None) - ''' - - - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/onestop-python-client/tests/util/SqsConsumerTest.py b/onestop-python-client/tests/util/SqsConsumerTest.py deleted file mode 100644 index 4d6be77..0000000 --- a/onestop-python-client/tests/util/SqsConsumerTest.py +++ /dev/null @@ -1,34 +0,0 @@ -import unittest -import boto3 -from moto import mock_sqs -from tests.utils import abspath_from_relative -from onestop.util.SqsConsumer import SqsConsumer - -class SqsConsumerTest(unittest.TestCase): - sc = None - - def setUp(self): - print("Set it up!") - self.sc = SqsConsumer(abspath_from_relative(__file__, "../../config/aws-util-config-dev.yml"), - abspath_from_relative(__file__, "../../config/credentials-template.yml")) - - def tearDown(self): - print("Tear it down!") - - def test_parse_config(self): - self.assertFalse(self.sc.conf['sqs_url']==None) - - @mock_sqs - def test_poll_messages(self): - # Create the mock queue beforehand and set its mock URL as the 'sqs_url' config value for SqsConsumer - boto_session = boto3.Session(aws_access_key_id=self.sc.cred['sandbox']['access_key'], - aws_secret_access_key=self.sc.cred['sandbox']['secret_key']) - sqs_session = boto_session.resource('sqs', region_name=self.sc.conf['s3_region']) - res = sqs_session.create_queue(QueueName="test_queue") - self.sc.conf['sqs_url'] = res.url - queue = self.sc.connect() - self.sc.receive_messages(queue, self.sc.conf['sqs_max_polls'], lambda *args, **kwargs: None) - - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/scripts/README.md b/scripts/README.md index e0276cb..4773928 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,60 +1,112 @@ -#Scripts +# Using onestop-python-client ## Table of Contents -* [Quickstart](#quickstart) -* [Kubectl Pod Verification](#kubectl-pod-verification) -* [Load Data](#load-data) -* [Updating Containers](#updating-containers) +* [Setup](#setup) + * [Helm](#helm) + * [Use Helm to Create a Script Container](#use-helm-to-create-a-script-container) + * [Using Helm Config File](#using-helm-config-file) + * [Helm Pulling of Image](#helm-pulling-of-image) + * [Startup Helm Script Container](#startup-helm-script-container) + * [Manually Setup Environment](#manually-setup-environment) +* [Building](#building) + * [Rebuilding Code or Scripts](#rebuilding-code-or-scripts) + * [Rebuilding Containers](#rebuilding-containers) +* [Load Data into OneStop](#load-data-into-onestop) + * [onestop-test-data repository](#onestop-test-data-repositoryhttpsgithubcomcedardevsonestop-test-data) + * [osim-deployment repository](#osim-deployment-repositoryhttpsgithubcomcedardevsosim-deployment) +* [OneStop Quickstart](https://cedardevs.github.io/onestop/developer/quickstart) -This directory contains scripts that use the onestop-python-library to send data to a OneStop. - -## Quickstart -- Install conda (miniconda works). -- Restart terminal or source files to recognize conda commands. -- Create a new conda environment and activate it - - `conda create -n onestop-clients python=3` - - `conda activate onestop-clients` - - `pip install setuptools` - -- Install any libraries needed by your sme script - - Ex: `pip install PyYaml` - -- Build the latest onestop-python-client - - `pip uninstall onestop-python-client-cedardevs` - - `pip install ./onestop-python-client` (run from root of this repository) - -- Input credentials for helm in the file `helm/onestop-sqs-consumer/values.yaml` - - Then: - - `helm uninstall sme` - - `helm install sme helm/onestop-sqs-consumer` - -## Kubectl Pod Verification -- Verify onestop-client pod is running, copy the pod name. - - `kubectl get pods` - -- Exec into it - - `kubectl exec -it -- sh` where the is listed in `kubectl get pods` - -- Check logs - - `kubectl logs ` - -## Load Data -There are several repositories to aid in loading data into a OneStop. Please read the appropriate repository's readme for accurate and up to date usage information. +## Setup +To use onestop-python-client there are two options: helm or manually. + +### Helm +#### Use Helm to Create a Script Container +We use helm to pull a OneStop-Clients image (specified in `helm//values.yml`) and deploy a kubernetes container that can communicate to the configured OneStop. It also copies over the onestop-python-client and scripts directories to the container. + +Those configuration values are in this repo under `helm//values.yml`. Our helm is configured to create a configuration file in the script container at `/etc/confif/confif.yml` from the appropriate values.yml. You can use this or create your own configuration file and put it in the script container. Our scripts are configured to use the command-line parameter `conf` or will look for the helm configuration file that isn't specified. + +#### Using Helm Config File +If you are going to use the helm generated configuration file then you should probably edit the conf section in the helm values.yaml file for the container you will have helm create (Ex. 1helm/onestop-sqs-consumer/values.yaml1). + * *_metadata_type - should be granule or collection, depending on what you are sending/receiving. + * schema_registry, registry_base_url, and onestop_base_url - set to what you are communicating with, especially if not on cedar-devs talking to its OneStop. + * AWS section - there's several config values for AWS you probably need to change, many are set to testing values. + * Kafka section - There is a whole Kafka section that if you are using kafka you might need to adjust this. This isn't perhaps the most preferred way to submit to OneStop. + * log_level - If you are troubleshooting or just want to see a more granular log level set this to DEBUG. + +#### Helm Pulling of Image +When you run the helm install command helm pulls the specified image from the repository that is indicated in the helm values yaml file. + +#### Startup Helm Script Container +The helm install command, done from the root of this repository, will use the charts in the helm directory to create a container called `sme` using the helm charts and configuration information in this repo fom `helm/onestop-sqs-consumer` + * cd to the root of this repository + * `helm uninstall sme` + * `helm install sme helm/onestop-sqs-consumer` + +To check on the container run this and look for the pod with the : + +`kubectl get pods` +``` +(base) ~/repo/onestop-clients 07:00 PM$ kubectl get pods +NAME READY STATUS RESTARTS AGE +sme-onestop-sqs-consumer-5c678675f7-q2s7h 0/1 Pending 0 26s +``` +If it isn't in a 'Running' state within 10 seconds then something is probably wrong. If it hasn't crashed yet, CrashBackLoop state, then it is probably a timeout problem trying to connect to a resource. -- To load data locally you will need a OneStop running locally. This is an example of how to do that, more info in the OneStop repository. - - `skaffold dev --status-check false` - -- To load test collections from onestop-test-data repository (read the README for more information) to your local OneStop: - - `./upload.sh demo http://localhost/onestop/api/registry` - -- From the osim-deployment repository there is a staging-scripts directory with scripts for loading some data: - - `./copyS3objects.sh -max_files=5 copy-config/archive-testing-demo-csb.sh` +Once the container is running, which should only be a matter of seconds, you can "ssh" into the container via this command. -## Updating Containers -- If the onestop-python-client code changes then run: - - `docker build . -t cedardevs/onestop-python-client:latest` +NOTE: you need to have the container name listed in the `kubectl get pods` command results for this command: -- If just the scripts change - - `docker build ./scripts/sqs-to-registry -t cedardevs/onestop-s3-handler` +`kubectl exec --stdin --tty sme-onestop-sqs-consumer-5c678675f7-kmpvn -- /bin/bash` + +### Manually Setup Environment +* Install conda (miniconda works). +* Restart terminal or source files to recognize conda commands. +* Create a new conda environment and activate it (not convinced you need this) + * `conda create -n onestop-clients python=3` + * `conda activate onestop-clients` + * `pip install setuptools` + +* Install any libraries needed by your script + * Ex: `pip install PyYaml` + + `pip install ./onestop-python-client` + + To test the import, try this and it shouldn't give an error: + + ``` + $ python3 + >>> import onestop_client + ``` + +## Building +Building locally is not necessary if you are using the images that we build automatically. Currently, we build an image via docker files with the tag 'latest' when *any* commits, even branches, are made to github and trigger CircleCI. +You might want to do this is to make code changes, build them, and then run your python script against that pip installed onestop-python-client locally. + +### Rebuilding Code or Scripts +* Install the latest onestop-python-client into directory + + `pip uninstall onestop-python-client-cedardevs` + + `pip install ./onestop-python-client` (run from root of this repository) + +### Rebuilding Containers +* If the onestop-python-client code changes then run: + + `docker build . -t cedardevs/onestop-python-client:latest` + +* If just the scripts change + + `docker build ./scripts/sqs-to-registry -t cedardevs/onestop-s3-handler` + + `docker build ./scripts/sme/ -t cedardevs/onestop-sme:latest` + +## Load Data into OneStop +There are several repositories to aid in loading data into a OneStop. Please read the appropriate repository's readme for accurate and up to date usage information. +### [onestop-test-data repository](https://github.com/cedardevs/onestop-test-data) + `./upload.sh demo http://localhost/onestop/api/registry` +### [osim-deployment repository](https://github.com/cedardevs/osim-deployment) + From the osim-deployment repository there is a staging-scripts directory with scripts for loading some data: + + `./copyS3objects.sh -max_files=5 copy-config/archive-testing-demo-csb.sh` diff --git a/scripts/archive_client_integration.py b/scripts/archive_client_integration.py index 2831045..be672f8 100644 --- a/scripts/archive_client_integration.py +++ b/scripts/archive_client_integration.py @@ -1,64 +1,74 @@ import argparse -from onestop.util.S3Utils import S3Utils - - -def handler(): - ''' - Simultaneously upload files to main bucket 'noaa-nccf-dev' in us-east-2 and glacier in cross region bucket 'noaa-nccf-dev-archive' in us-west-2. - - :return: str - Returns response from boto3 indicating if upload was successful. - ''' - print("Handler...") +import yaml +import os - # config for s3 low level api for us-east-2 - s3 = s3_utils.connect('s3', s3_utils.conf['s3_region']) - bucket_name = s3_utils.conf['s3_bucket'] - - # config for s3 low level api cross origin us-west-2 - s3_cross_region = s3_utils.connect('s3', s3_utils.conf['s3_region2']) - bucket_name_cross_region = s3_utils.conf['s3_bucket2'] - - overwrite = True - - # Add 3 files to bucket - local_files = ["file1.csv", "file2.csv"] - s3_file = None - for file in local_files: - local_file = "tests/data/" + file - # changed the key for testing - s3_file = "public/NESDIS/CSB/" + file - s3_utils.upload_s3(s3, local_file, bucket_name, s3_file, overwrite) +from onestop.util.S3Utils import S3Utils - # Upload file to cross region bucket then transfer to glacier right after - s3_utils.upload_s3(s3_cross_region, local_file, bucket_name_cross_region, s3_file, overwrite) - s3_utils.s3_to_glacier(s3_cross_region, bucket_name_cross_region, s3_file) +config_dict = {} if __name__ == '__main__': + # Example command: python3 archive_client_integration.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + # python3 archive_client_integration.py -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml parser = argparse.ArgumentParser(description="Launches archive client integration") - parser.add_argument('-conf', dest="conf", required=True, + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="AWS config filepath") - parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") args = vars(parser.parse_args()) - # Get configuration file path locations + # Generate configuration dictionary conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) # Upload a test file to s3 bucket - s3_utils = S3Utils(conf_loc, cred_loc) - - handler() - - - - + s3_utils = S3Utils(**config_dict) - + s3 = s3_utils.connect('client', 's3', config_dict['s3_region']) + # config for s3 low level api cross origin us-west-2 + s3_cross_region = s3_utils.connect('client', 's3', config_dict['s3_region2']) + bucket_name_cross_region = config_dict['s3_bucket2'] + overwrite = True + # Files to upload - TODO: User should change these paths. + local_files = ["/scripts/data/file1.csv", "/scripts/data/file2.csv"] + for file in local_files: + print("Uploading file: %s"%file) + # changed the key for testing + s3_file = "public/NESDIS/CSB/" + file + upload = s3_utils.upload_s3(s3, file, config_dict['s3_bucket'], s3_file, overwrite) + if not upload: + raise Exception("Unknown, upload to s3 failed.") + # Upload file to cross region bucket then transfer to glacier right after + upload = s3_utils.upload_s3(s3_cross_region, file, bucket_name_cross_region, s3_file, overwrite) + if not upload: + raise Exception("Unknown, upload to s3 failed.") + s3_utils.s3_to_glacier(s3_cross_region, bucket_name_cross_region, s3_file) diff --git a/scripts/bucket_automation.py b/scripts/bucket_automation.py index a64f11c..5c922ee 100644 --- a/scripts/bucket_automation.py +++ b/scripts/bucket_automation.py @@ -1,7 +1,12 @@ import argparse import json +import os +import yaml + from onestop.util.S3Utils import S3Utils +config_dict = {} + def handler(): ''' Creates bucket with defined key paths @@ -10,43 +15,42 @@ def handler(): Returns boto3 response indicating if bucket creation was successful ''' # connect to low level api - s3 = s3_utils.connect("s3", s3_utils.conf['s3_region']) + s3 = s3_utils.connect('client', 's3', config_dict['s3_region']) # use s3_resource api to check if the bucket exists - s3_resource = s3_utils.connect("s3_resource", s3_utils.conf['s3_region']) + s3_resource = s3_utils.connect('resource', 's3', config_dict['s3_region']) # Create bucket name bucket_name = "noaa-nccf-dev" - # checks to see if the bucket is already created, if it isn't create yet then it will create the bucket, set bucket policy, and create key paths + # Create bucket policy + bucket_policy = { + "Version": "2012-10-17", + "Id": "noaa-nccf-dev-policy", + "Statement": [ + { + "Sid": "PublicRead", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": f'arn:aws:s3:::{bucket_name}/public/*' + }] + } + # Convert the policy from JSON dict to string + bucket_policy_str = json.dumps(bucket_policy) + + # checks to see if the bucket is already created, if it isn't create it, then it will create the bucket, set bucket policy, and create key paths if not s3_resource.Bucket(bucket_name) in s3_resource.buckets.all(): """ - Create bucket - need to specify bucket location for every region except us-east-1 -> https://github.com/aws/aws-cli/issues/2603 """ s3.create_bucket(Bucket=bucket_name, - CreateBucketConfiguration={'LocationConstraint': s3_utils.conf['s3_region']}, + CreateBucketConfiguration={'LocationConstraint': config_dict['s3_region']}, ObjectLockEnabledForBucket=True) - # Create bucket policy - bucket_policy = { - "Version": "2012-10-17", - "Id": "noaa-nccf-dev-policy", - "Statement": [ - { - "Sid": "PublicRead", - "Effect": "Allow", - "Principal": "*", - "Action": "s3:GetObject", - "Resource": f'arn:aws:s3:::{bucket_name}/public/*' - }] - } - - # Convert the policy from JSON dict to string - bucket_policy = json.dumps(bucket_policy) - # Set new bucket policy - s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy) + s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy_str) """ - Create Public Key Paths @@ -86,6 +90,9 @@ def handler(): s3.put_object(Bucket=bucket_name, Body='', Key='private/OMAO/') s3.put_object(Bucket=bucket_name, Body='', Key='private/OAR/') + else: + #Set bucket policy + s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy_str) # Set CORS bucket config cors_config = { @@ -109,12 +116,6 @@ def handler(): } s3.put_bucket_cors(Bucket=bucket_name, CORSConfiguration=cors_config) - # Convert the policy from JSON dict to string - bucket_policy = json.dumps(bucket_policy) - - #Set new bucket policy - s3.put_bucket_policy(Bucket=bucket_name, Policy=bucket_policy) - """ - Set ACL for public read """ @@ -131,18 +132,42 @@ def handler(): if __name__ == '__main__': parser = argparse.ArgumentParser(description="Launches e2e test") - parser.add_argument('-conf', dest="conf", required=True, + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="AWS config filepath") - parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") args = vars(parser.parse_args()) - # Get configuration file path locations + # Generate configuration dictionary conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) # Create S3Utils instance - s3_utils = S3Utils(conf_loc, cred_loc) + s3_utils = S3Utils(**config_dict) handler() \ No newline at end of file diff --git a/scripts/config/aws-util-config-dev.yml b/scripts/config/aws-util-config-dev.yml index e054f49..9102be0 100644 --- a/scripts/config/aws-util-config-dev.yml +++ b/scripts/config/aws-util-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # AWS config values sqs_url: https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs diff --git a/scripts/config/aws-util-config-test.yml b/scripts/config/aws-util-config-test.yml index 6aac07a..9de4618 100644 --- a/scripts/config/aws-util-config-test.yml +++ b/scripts/config/aws-util-config-test.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: DEBUG # AWS config values sqs_url: 'test-queue' diff --git a/scripts/config/csb-data-stream-config.yml b/scripts/config/csb-data-stream-config.yml index 1556ab9..f110852 100644 --- a/scripts/config/csb-data-stream-config.yml +++ b/scripts/config/csb-data-stream-config.yml @@ -1,12 +1,15 @@ -log_level: INFO +# COLLECTION or GRANULE +kafka_consumer_metadata_type: COLLECTION +kafka_publisher_metadata_type: COLLECTION +s3_message_adapter_metadata_type: COLLECTION + format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER -type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 -psi_registry_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com +registry_base_url: https://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com -file_identifier_prefix: "gov.noaa.ncei.csb:" +file_id_prefix: "gov.noaa.ncei.csb:" prefixMap: NESDIS/CSB: 'fdb56230-87f4-49f2-ab83-104cfd073177' diff --git a/scripts/config/kafka-publisher-config-dev.yml b/scripts/config/kafka-publisher-config-dev.yml index 85a66f3..8a94bf3 100644 --- a/scripts/config/kafka-publisher-config-dev.yml +++ b/scripts/config/kafka-publisher-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: DEBUG # COLLECTION or GRANULE metadata_type: GRANULE @@ -7,8 +6,8 @@ metadata_type: GRANULE # Kafka config values brokers: onestop-dev-cp-kafka:9092 schema_registry: http://onestop-dev-cp-schema-registry:8081 -collection_topic_produce: psi-granules-by-collection -granule_topic_produce: psi-granule-parsed +collection_topic_publish: psi-granules-by-collection +granule_topic_publish: psi-granule-parsed collection_topic_consume: psi-collection-input-unknown granule_topic_consume: psi-granule-input-unknown group_id: sme-test diff --git a/scripts/config/web-publisher-config-dev.yml b/scripts/config/web-publisher-config-dev.yml index 9b08391..387d252 100644 --- a/scripts/config/web-publisher-config-dev.yml +++ b/scripts/config/web-publisher-config-dev.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # COLLECTION or GRANULE metadata_type: granule diff --git a/scripts/config/web-publisher-config-local.yml b/scripts/config/web-publisher-config-local.yml index 32db955..3ce7d88 100644 --- a/scripts/config/web-publisher-config-local.yml +++ b/scripts/config/web-publisher-config-local.yml @@ -1,5 +1,4 @@ # Example config values for osim client -log_level: INFO # COLLECTION or GRANULE metadata_type: granule diff --git a/scripts/launch_delete_handler.py b/scripts/launch_delete_handler.py index 7bb3983..6d000d4 100644 --- a/scripts/launch_delete_handler.py +++ b/scripts/launch_delete_handler.py @@ -1,79 +1,59 @@ -import json -import boto3 import argparse -from moto import mock_s3 -from moto import mock_sqs -from tests.utils import create_delete_message +import os +import yaml + from onestop.WebPublisher import WebPublisher from onestop.util.S3Utils import S3Utils from onestop.util.SqsConsumer import SqsConsumer from onestop.util.SqsHandlers import create_delete_handler - -def mock_init_s3(s3u): - """ Sets up bucket, object, SQS queue, and delete message. - - Assumes there are additional keys passed in via config - - :param s3u: S3Utils object - :return: URL of the mock queue created in SQS - """ - boto_client = s3u.connect("s3", None) - bucket = s3u.conf['s3_bucket'] - region = s3u.conf['s3_region'] - key = s3u.conf['s3_key'] - boto_client.create_bucket(Bucket=bucket) - boto_client.put_object(Bucket=bucket, Key=key, Body="foobar") - - sqs_client = boto3.client('sqs', region_name=region) - sqs_queue = sqs_client.create_queue(QueueName=s3u.conf['sqs_name']) - message = create_delete_message(region, bucket, key) - sqs_client.send_message(QueueUrl=sqs_queue['QueueUrl'], MessageBody=json.dumps(message)) - return sqs_queue['QueueUrl'] - +config_dict = {} if __name__ == '__main__': # All command-line arguments have defaults that use test data, with AWS mocking set to true parser = argparse.ArgumentParser(description="Launches SQS delete test") - parser.add_argument('--aws-conf', dest="aws_conf", required=False, default="config/aws-util-config-test.yml", + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="AWS config filepath") - parser.add_argument('--osim-conf', dest="osim_conf", required=False, default="config/web-publisher-config-local.yml", - help="OSIM config filepath") - parser.add_argument('-mock', dest="mock", required=False, default=True, help="Use mock AWS or real values") - - parser.add_argument('-cred', dest="cred", required=False, default="config/credentials-template.yml", + parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") args = vars(parser.parse_args()) - wp_config = args.pop('osim_conf') - aws_config = args.pop('aws_conf') - cred_config = args.pop('cred') - use_mocks = args.pop('mock') - - web_publisher = WebPublisher(wp_config, cred_config) - s3_utils = S3Utils(aws_config, cred_config) - sqs_consumer = SqsConsumer(aws_config, cred_config) - - if use_mocks is True: - mock_1 = mock_s3() - mock_2 = mock_sqs() - mock_1.start() - mock_2.start() - mock_queue_url = mock_init_s3(s3_utils) - # Need to override the config value here so that sqs_consumer.connect will use the correct url for the queue - sqs_consumer.conf['sqs_url'] = mock_queue_url - - sqs_max_polls = s3_utils.conf['sqs_max_polls'] + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + + web_publisher = WebPublisher(**config_dict) + s3_utils = S3Utils(**config_dict) + sqs_consumer = SqsConsumer(**config_dict) + + sqs_max_polls = config_dict['sqs_max_polls'] delete_handler = create_delete_handler(web_publisher) + s3_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region']) + queue = sqs_consumer.connect(s3_resource, config_dict['sqs_name']) - queue = sqs_consumer.connect() - try: - sqs_consumer.receive_messages(queue, sqs_max_polls, delete_handler) - if use_mocks is True: - mock_1.stop() - mock_2.stop() - except Exception as e: - print("Message queue consumption failed: {}".format(e)) - if use_mocks is True: - mock_1.stop() - mock_2.stop() + sqs_consumer.receive_messages(queue, sqs_max_polls, delete_handler) diff --git a/scripts/launch_e2e.py b/scripts/launch_e2e.py index 2d5b79b..2af68c3 100644 --- a/scripts/launch_e2e.py +++ b/scripts/launch_e2e.py @@ -1,69 +1,73 @@ import argparse import json import os +import yaml + from onestop.util.SqsConsumer import SqsConsumer from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.WebPublisher import WebPublisher -from onestop.extract.CsbExtractor import CsbExtractor from onestop.schemas.util.jsonEncoder import EnumEncoder +from onestop.util.ClientLogger import ClientLogger +config_dict = {} -def handler(recs): +def handler(rec, log_level): ''' Processes metadata information from sqs message triggered by S3 event and uploads to registry through web publisher (https). Also uploads s3 object to glacier. - :param recs: dict + :param rec: dict sqs message triggered by s3 event :return: str IM registry response and boto3 glacier response ''' - print("Handler...") - - # Now get boto client for object-uuid retrieval - object_uuid = None - bucket = None - print(recs) - if recs is None: - print("No records retrieved") + logger = ClientLogger.get_logger('launch_e2e.handler', log_level, False) + logger.info('In Handler') + + # If record exists try to get object-uuid retrieval + logger.debug('Record:%s'%rec) + if rec is None: + logger.info('No record retrieved, doing nothing.') + return + + logger.debug('Record: %s'%rec) + bucket = rec['s3']['bucket']['name'] + s3_key = rec['s3']['object']['key'] + logger.info("Getting uuid") + s3_resource = s3_utils.connect('resource', 's3', None) + object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) + if object_uuid is not None: + logger.info('Retrieved object-uuid: %s'% object_uuid) else: - rec = recs[0] - print(rec) - bucket = rec['s3']['bucket']['name'] - s3_key = rec['s3']['object']['key'] - print("Getting uuid") - # High-level api - s3_resource = s3_utils.connect("s3_resource", None) - object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) - if object_uuid is not None: - print("Retrieved object-uuid: " + object_uuid) - else: - print("Adding uuid") - s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) - - im_message = s3ma.transform(recs) + logger.info('UUID not found, adding uuid to bucket=%s key=%s'%(bucket, s3_key)) + s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) + s3ma = S3MessageAdapter(**config_dict) + im_message = s3ma.transform(rec) + logger.debug('S3MessageAdapter.transform: %s'%im_message) json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder) + logger.debug('S3MessageAdapter.transform.json dump: %s'%json_payload) - print(json_payload) - - + wp = WebPublisher(**config_dict) registry_response = wp.publish_registry("granule", object_uuid, json_payload, "POST") - #print(registry_response.json()) + logger.debug('publish_registry response: %s'%registry_response.json()) # Upload to archive file_data = s3_utils.read_bytes_s3(s3_client, bucket, s3_key) - glacier = s3_utils.connect("glacier", s3_utils.conf['s3_region']) - vault_name = s3_utils.conf['vault_name'] - + glacier = s3_utils.connect('client', 'glacier', config_dict['s3_region']) + vault_name = config_dict['vault_name'] resp_dict = s3_utils.upload_archive(glacier, vault_name, file_data) + logger.debug('Upload to cloud, Response: %s'%resp_dict) + if resp_dict == None: + logger.error('Error uploading to s3 archive, see prior log statements.') + return - print("archiveLocation: " + resp_dict['location']) - print("archiveId: " + resp_dict['archiveId']) - print("sha256: " + resp_dict['checksum']) + logger.info('upload archived location: %s'% resp_dict['location']) + logger.info('archiveId: %s'% resp_dict['archiveId']) + logger.info('sha256: %s'% resp_dict['checksum']) addlocPayload = { "fileLocations": { @@ -80,97 +84,60 @@ def handler(recs): json_payload = json.dumps(addlocPayload, indent=2) # Send patch request next with archive location registry_response = wp.publish_registry("granule", object_uuid, json_payload, "PATCH") - + logger.debug('publish to registry response: %s'% registry_response) + logger.info('Finished publishing to registry.') if __name__ == '__main__': - """ - parser = argparse.ArgumentParser(description="Launches e2e test") - parser.add_argument('-conf', dest="conf", required=True, - help="AWS config filepath") - parser.add_argument('-cred', dest="cred", required=True, - help="Credentials filepath") - args = vars(parser.parse_args()) - # Get configuration file path locations - conf_loc = args.pop('conf') - cred_loc = args.pop('cred') - # Upload a test file to s3 bucket - s3_utils = S3Utils(conf_loc, cred_loc) - # Low-level api ? Can we just use high level revisit me! - s3 = s3_utils.connect("s3", None) - registry_user = os.environ.get("REGISTRY_USERNAME") - registry_pwd = os.environ.get("REGISTRY_PASSWORD") - print(registry_user) - access_key = os.environ.get("AWS_ACCESS") - access_secret = os.environ.get("AWS_SECRET") - print(access_key) - - # High-level api - s3_resource = s3_utils.connect("s3_resource", None) - bucket = s3_utils.conf['s3_bucket'] - overwrite = True - sqs_max_polls = s3_utils.conf['sqs_max_polls'] - # Add 3 files to bucket - local_files = ["file1.csv", "file4.csv"] - s3_file = None - for file in local_files: - local_file = "tests/data/" + file - s3_file = "csv/" + file - s3_utils.upload_s3(s3, local_file, bucket, s3_file, overwrite) - # Receive s3 message and MVM from SQS queue - sqs_consumer = SqsConsumer(conf_loc, cred_loc) - s3ma = S3MessageAdapter("scripts/config/csb-data-stream-config.yml", s3_utils) - # Retrieve data from s3 object - #csb_extractor = CsbExtractor() - wp = WebPublisher("config/web-publisher-config-dev.yml", cred_loc) - queue = sqs_consumer.connect() - try: - debug = False - sqs_consumer.receive_messages(queue, sqs_max_polls, handler) - except Exception as e: - print("Message queue consumption failed: {}".format(e)) - """ - parser = argparse.ArgumentParser(description="Launches e2e test") - parser.add_argument('-conf', dest="conf", required=True, + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="AWS config filepath") - parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") args = vars(parser.parse_args()) - # Get configuration file path locations + # Generate configuration dictionary conf_loc = args.pop('conf') - cred_loc = args.pop('cred') - - # Upload a test file to s3 bucket - s3_utils = S3Utils(conf_loc, cred_loc) + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) - # Low-level api ? Can we just use high level revisit me! - s3_client = s3_utils.connect("s3", None) - - bucket = s3_utils.conf['s3_bucket'] - - sqs_max_polls = s3_utils.conf['sqs_max_polls'] - - # Add 3 files to bucket + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + + s3_utils = S3Utils(**config_dict) + s3_client = s3_utils.connect('client', 's3', config_dict['s3_region']) + + # Upload test files to s3 bucket local_files = ["file1.csv", "file4.csv"] s3_file = None for file in local_files: - local_file = "data/" + file + local_file = "scripts/data/" + file # s3_file = "csv/" + file - s3_file = "NESDIS/CSB/" + file - if not s3_utils.upload_s3(s3_client, local_file, bucket, s3_file, True): + s3_file = "public/" + file + if not s3_utils.upload_s3(s3_client, local_file, config_dict['s3_bucket'], s3_file, True): exit("Error setting up for e2e: The test files were not uploaded to the s3 bucket therefore the tests cannot continue.") # Receive s3 message and MVM from SQS queue - sqs_consumer = SqsConsumer(conf_loc, cred_loc) - s3ma = S3MessageAdapter("config/csb-data-stream-config.yml", s3_utils) - wp = WebPublisher("config/web-publisher-config-dev.yml", cred_loc) - - queue = sqs_consumer.connect() - try: - debug = False - sqs_consumer.receive_messages(queue, sqs_max_polls, handler) - - except Exception as e: - print("Message queue consumption failed: {}".format(e)) + sqs_consumer = SqsConsumer(**config_dict) + sqs_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region']) + queue = sqs_consumer.connect(sqs_resource, config_dict['sqs_name']) + sqs_consumer.receive_messages(queue, config_dict['sqs_max_polls'], handler) diff --git a/scripts/launch_kafka_publisher.py b/scripts/launch_kafka_publisher.py index f4a853d..85283c2 100644 --- a/scripts/launch_kafka_publisher.py +++ b/scripts/launch_kafka_publisher.py @@ -1,17 +1,21 @@ import argparse +import yaml + from onestop.KafkaPublisher import KafkaPublisher if __name__ == '__main__': ''' Uploads collection to Kafka collection topic ''' - parser = argparse.ArgumentParser(description="Launches KafkaPublisher to publish kafkda topics") - parser.add_argument('-conf', dest="conf", required=True, + parser = argparse.ArgumentParser(description="Launches KafkaPublisher to publish kafka topics") + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', help="Config filepath") args = vars(parser.parse_args()) conf_loc = args.pop('conf') + with open(conf_loc) as f: + conf = yaml.load(f, Loader=yaml.FullLoader) # "discovery": # { @@ -22,13 +26,19 @@ # FileIdentifier: gov.noaa.ngdc.mgg.dem:yaquina_bay_p210_30m collection_uuid = '3ee5976e-789a-41d5-9cae-d51e7b92a247' content_dict = {'discovery': {'title': 'My Extra New Title!', - 'fileIdentifier': 'gov.noaa.osim2.mgg.dem:yaquina_bay_p210_30m' + 'fileIdentifier': 'gov.noaa.osim2.mgg.dem:yaquina_bay_p210_30m', + "links": [ + { + "linkFunction": "download", "linkName": "Amazon S3", "linkProtocol": "HTTPS", + "linkUrl": "https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2" + } + ] } } # method one of POST, PUT, PATCH, DELETE method = 'POST' #Update - kafka_publisher = KafkaPublisher(conf_loc) + kafka_publisher = KafkaPublisher(**conf) metadata_producer = kafka_publisher.connect() kafka_publisher.publish_collection(metadata_producer, collection_uuid, content_dict, method) diff --git a/scripts/launch_pyconsumer.py b/scripts/launch_pyconsumer.py index f9dbcf6..1ad7282 100644 --- a/scripts/launch_pyconsumer.py +++ b/scripts/launch_pyconsumer.py @@ -1,85 +1,108 @@ +import argparse import os +import yaml +import json + from onestop.util.SqsConsumer import SqsConsumer from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.WebPublisher import WebPublisher +from onestop.util.ClientLogger import ClientLogger +from onestop.schemas.util.jsonEncoder import EnumEncoder +from botocore.exceptions import ClientError +config_dict = {} -def handler(recs): +def handler(rec, log_level): ''' Processes metadata information from sqs message triggered by S3 event and uploads to registry through web publisher (https). Utilizes helm for credentials and conf. - :param recs: dict + :param rec: dict sqs message triggered by s3 event :return: str IM registry response ''' - print("Handling message...") + logger = ClientLogger.get_logger('launch_pyconsumer.handler', log_level, False) + logger.info('In Handler') # Now get boto client for object-uuid retrieval object_uuid = None - if recs is None: - print("No records retrieved") - else: - rec = recs[0] - bucket = rec['s3']['bucket']['name'] - s3_key = rec['s3']['object']['key'] + if rec is None: + logger.info('No record retrieved, doing nothing.') + return - # Fetch the object to get the uuid + bucket = rec['s3']['bucket']['name'] + s3_key = rec['s3']['object']['key'] + logger.debug('Rec: %s'%rec) + # Fetch the object to get the uuid + logger.info("Getting uuid") + s3_resource = s3_utils.connect('resource', 's3', None) + try: object_uuid = s3_utils.get_uuid_metadata(s3_resource, bucket, s3_key) + except ClientError as e: + logger.error(e) + return - if object_uuid is not None: - print("Retrieved object-uuid: " + object_uuid) - else: - print("Adding uuid") - s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) + if object_uuid is not None: + logger.info('Retrieved object-uuid: %s'% object_uuid) + else: + logger.info('UUID not found, adding uuid to bucket=%s key=%s'%(bucket, s3_key)) + s3_utils.add_uuid_metadata(s3_resource, bucket, s3_key) # Convert s3 message to IM message - s3ma = S3MessageAdapter(conf_loc, s3_utils) - json_payload = s3ma.transform(recs) + s3ma = S3MessageAdapter(**config_dict) + im_message = s3ma.transform(rec) + logger.debug('S3MessageAdapter.transform: %s'%im_message) + json_payload = json.dumps(im_message.to_dict(), cls=EnumEncoder) + logger.debug('S3MessageAdapter.transform.json dump: %s'%json_payload) #Send the message to Onestop - wp = WebPublisher(conf_loc, cred_loc) - registry_response = wp.publish_registry("granule", object_uuid, json_payload.serialize(), "POST") - print("RESPONSE: ") - print(registry_response.json()) + wp = WebPublisher(**config_dict) + registry_response = wp.publish_registry("granule", object_uuid, json_payload, "POST") + logger.debug('publish_registry response: %s'%registry_response.json()) if __name__ == '__main__': - conf_loc = "/etc/config/config.yml" - cred_loc = "creds.yml" - - registry_user = os.environ.get("REGISTRY_USERNAME") - registry_pwd = os.environ.get("REGISTRY_PASSWORD") - access_key = os.environ.get("ACCESS_KEY") - access_secret = os.environ.get("SECRET_KEY") - - f = open(cred_loc, "w+") - -#write creds to a file to avoid changing the python library - s = """sandbox: - access_key: {key} - secret_key: {secret} - -registry: - username: {user} - password: {pw} - """.format(key=access_key, secret=access_secret, user=registry_user, pw=registry_pwd) - f.write(s) - f.close() - r = open(cred_loc, "r") - - # # Receive s3 message and MVM from SQS queue - s3_utils = S3Utils(conf_loc, cred_loc) - sqs_max_polls = s3_utils.conf['sqs_max_polls'] - sqs_consumer = SqsConsumer(conf_loc, cred_loc) - queue = sqs_consumer.connect() - - try: - debug = False - # # Pass in the handler method - sqs_consumer.receive_messages(queue, sqs_max_polls, handler) - - except Exception as e: - print("Message queue consumption failed: {}".format(e)) + parser = argparse.ArgumentParser(description="Launches e2e test") + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', + help="AWS config filepath") + parser.add_argument('-cred', dest="cred", required=True, + help="Credentials filepath") + args = vars(parser.parse_args()) + + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + + s3_utils = S3Utils(**config_dict) + + # Receive s3 message and MVM from SQS queue + sqs_consumer = SqsConsumer(**config_dict) + sqs_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region']) + queue = sqs_consumer.connect(sqs_resource, config_dict['sqs_name']) + sqs_consumer.receive_messages(queue, config_dict['sqs_max_polls'], handler) diff --git a/scripts/sme/Dockerfile b/scripts/sme/Dockerfile index d4b48fa..19051c3 100644 --- a/scripts/sme/Dockerfile +++ b/scripts/sme/Dockerfile @@ -1,6 +1,8 @@ +# Expect this to copy the scripts directory over and install onestop-python-client. FROM cedardevs/onestop-python-client:latest -COPY . . + +# Install additional python libraries needed by scripts RUN pip install argparse RUN pip install psycopg2 -#ENTRYPOINT [ "python" ,"scripts/sme/sme.py", "-cmd consume", "-b localhost:9092", "-s http://localhost:8081", "-t psi-collection-extractor-to" , "-g sme-test", "-o earliest" ] + CMD tail -f /dev/null diff --git a/scripts/sme/sme.py b/scripts/sme/sme.py index 6509aa3..9dd92d8 100644 --- a/scripts/sme/sme.py +++ b/scripts/sme/sme.py @@ -1,6 +1,7 @@ -import argparse import json import os +import yaml +import argparse from onestop.extract.CsbExtractor import CsbExtractor from onestop.KafkaConsumer import KafkaConsumer @@ -9,13 +10,15 @@ from onestop.schemas.geojsonSchemaClasses.org.cedar.schemas.avro.geojson.point import Point from onestop.schemas.geojsonSchemaClasses.point_type import PointType from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.parsed_record import ParsedRecord -from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.checksum_algorithm import ChecksumAlgorithm from onestop.schemas.psiSchemaClasses.org.cedar.schemas.avro.psi.temporal_bounding import TemporalBounding -from onestop.schemas.util.jsonEncoder import EnumEncoder, as_enum, EnumEncoderValue +from onestop.schemas.util.jsonEncoder import as_enum, EnumEncoderValue from onestop.KafkaPublisher import KafkaPublisher -from spatial import script_generation, postgres_insert +#from spatial import script_generation, postgres_insert +from onestop.util.ClientLogger import ClientLogger -def handler(key, value): +config_dict = {} + +def handler(key, value, log_level = 'INFO'): ''' Consumes message from psi-input-unknown, extracts geospatial data, uploads new payload to parsed-record topic in kafka, and uploads geospatial data to Postgres @@ -27,99 +30,158 @@ def handler(key, value): :return: str returns response message from kafka ''' - print('Key:', key) - print('Value: ' ,value) # Grabs the contents of the message and turns the dict string into a dictionary using json.loads - try: - content_dict = json.loads(value['content'], object_hook=as_enum) - - parsed_record = ParsedRecord().from_dict(content_dict) - - # Geospatial Extraction - # Extract the bucket key for csb_extractor object initialization - bucket_key = content_dict['discovery']['links'][0]['linkUrl'].split('.com/')[1] - - csb_extractor = CsbExtractor(su, bucket_key) - if csb_extractor.is_csv(bucket_key): - geospatial = csb_extractor.get_spatial_temporal_bounds('LON', 'LAT', 'TIME') - begin_date, end_date = geospatial['temporal'][0], geospatial['temporal'][1] - max_lon, max_lat, min_lon, min_lat = geospatial['geospatial'][2], geospatial['geospatial'][3], \ - geospatial['geospatial'][0], geospatial['geospatial'][1] - coords = csb_extractor.extract_coords(max_lon, max_lat, min_lon, min_lat) - - # Create spatial bounding types based on the given coords - pointType = PointType('Point') - point = Point(coordinates=coords[0], type=pointType) - - # Create temp bounding obj - tempBounding = TemporalBounding(beginDate=begin_date, endDate=end_date) - - # Update parsed record object with geospatial data - parsed_record.discovery.temporalBounding = tempBounding - parsed_record.discovery.spatialBounding = point - - """ - # Insert data into postgres - script = script_generation(coords[0], key) - postgres_insert(script) - """ - - # update content dict - parsed_record.type = value['type'] - content_dict = parsed_record.to_dict() - # reformat Relationship field - relationship_type = content_dict['relationships'][0]['type']['type'] - content_dict['relationships'][0]['type'] = relationship_type - - # reformat File Locations - filelocation_type = content_dict['fileLocations']['type']['type'] - content_dict['fileLocations']['type'] = filelocation_type - + logger = ClientLogger.get_logger('sme.handler', log_level, False) + logger.info('In Handler: key=%s value=%s'%(key, value)) + + # This is an example for testing purposes. + test_value = { + "type": "granule", + "content": "{" + "\"discovery\": {\n " + "\"fileIdentifier\": \"92ade5dc-946d-11ea-abe4-0242ac120004\",\n " + "\"links\": [\n {\n " + "\"linkFunction\": \"download\",\n " + "\"linkName\": \"Amazon S3\",\n " + "\"linkProtocol\": \"HTTPS\",\n " + "\"linkUrl\": \"https://s3.amazonaws.com/nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n " + "}\n ],\n " + "\"parentIdentifier\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n " + "\"spatialBounding\": null,\n " + "\"temporalBounding\": {\n " + "\"beginDate\": \"2020-05-12T16:20:15.158Z\", \n " + "\"endDate\": \"2020-05-12T16:21:51.494Z\"\n " + "},\n " + "\"title\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n " + "},\n " + "\"fileInformation\": {\n " + "\"checksums\": [{" + "\"algorithm\": \"MD5\"," + "\"value\": \"44d2452e8bc2c8013e9c673086fbab7a\"" + "}]\n, " + "\"optionalAttributes\":{}, " + "\"format\": \"HSD\",\n " + "\"name\": \"HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\",\n " + "\"size\": 208918\n " + "},\n " + "\"fileLocations\": {\n " + "\"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\": {\n" + "\"optionalAttributes\":{}, " + "\"uri\":\"//nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\", " + "\"asynchronous\": false,\n " + "\"deleted\": false,\n " + "\"lastModified\": 1589300890000,\n " + "\"locality\": \"us-east-1\",\n " + "\"restricted\": false,\n " + "\"serviceType\": \"Amazon:AWS:S3\",\n " + "\"type\": {\"__enum__\": \"FileLocationType.INGEST\"},\n " + "\"uri\": \"s3://nesdis-incoming-data/Himawari-8/AHI-L1b-Japan/2020/05/12/1620/HS_H08_20200512_1620_B05_JP01_R20_S0101.DAT.bz2\"\n " + "}\n " + "},\n " + "\"relationships\": [\n {\n " + "\"id\": \"0fad03df-0805-434a-86a6-7dc42d68480f\",\n " + "\"type\": {\"__enum__\": \"RelationshipType.COLLECTION\"} }\n ]\n " + "}", + "contentType": "application/json", + "method": "PUT", + "source": "unknown", + "operation": "ADD" + } + logger.debug('content: %s'%value['content']) + content_dict = json.loads(value['content'], object_hook=as_enum) # this can fail if input values fail to map to avro ENUM values. + logger.debug('content_dict: %s'%content_dict) + parsed_record = ParsedRecord.from_dict(content_dict) # or ParsedRecord(**content_dict) # this can fail if input values fail to map to avro class values. + + # Geospatial Extraction + bucket_key = content_dict['discovery']['links'][0]['linkUrl'].split('.com/')[1] + logger.info("Bucket key="+bucket_key) + if CsbExtractor.is_csv(bucket_key): + logger.info('Extracting geospatial information') + sm_open_file = su.get_csv_s3(su.connect("session", None), config_dict['s3_bucket'], bucket_key) + geospatial = CsbExtractor.get_spatial_temporal_bounds(sm_open_file, 'LON', 'LAT', 'TIME') + begin_date, end_date = geospatial['temporal'][0], geospatial['temporal'][1] + max_lon, max_lat, min_lon, min_lat = geospatial['geospatial'][2], geospatial['geospatial'][3], \ + geospatial['geospatial'][0], geospatial['geospatial'][1] + coords = CsbExtractor.extract_coords(sm_open_file, max_lon, max_lat, min_lon, min_lat) + + # Create spatial bounding types based on the given coords + pointType = PointType('Point') + point = Point(coordinates=coords[0], type=pointType) content_dict['discovery']['spatialBounding']['type'] = pointType.value - # Transform content_dict to appropiate payload - # cls=EnumEncoderValue argument looks for instances of Enum classes and extracts only the value of the Enum - content_dict = json.dumps(content_dict, cls=EnumEncoderValue) - content_dict = json.loads(content_dict) - - # Produce new information to kafka - kafka_publisher = KafkaPublisher("scripts/config/kafka-publisher-config-dev.yml") - metadata_producer = kafka_publisher.connect() - collection_id = parsed_record.relationships[0].id - kafka_publisher.publish_granule(metadata_producer, collection_id, collection_id, content_dict) - - except: - print('Invalid Format') - + # Create temp bounding obj + logger.debug('beginDate=%s endDate=%s'%(begin_date, end_date)) + tempBounding = TemporalBounding(beginDate=begin_date, endDate=end_date) + + # Update parsed record object with geospatial data + parsed_record.discovery.temporalBounding = tempBounding + parsed_record.discovery.spatialBounding = point + + """ + # Insert data into postgres + script = script_generation(coords[0], key) + postgres_insert(script) + """ + else: + logger.info('Record not CSV - Skipping extracting geospatial information') + + # update content dict + parsed_record.type = value['type'] + content_dict = parsed_record.to_dict() + + # Transform content_dict to appropriate payload + # cls=EnumEncoderValue argument looks for instances of Enum classes and extracts only the value of the Enum + content_dict = json.dumps(content_dict, cls=EnumEncoderValue) + content_dict = json.loads(content_dict) + + # Produce new information to publish to kafka, TODO: Be wary of cyclical publish/consuming here, since the consumer calls this handler. + kafka_publisher = KafkaPublisher(**config_dict) + metadata_producer = kafka_publisher.connect() + collection_id = parsed_record.relationships[0].id + kafka_publisher.publish_granule(metadata_producer, collection_id, content_dict) if __name__ == '__main__': - # This is where helm will mount the config - conf_loc = "/etc/config/config.yml" - # this is where we are about to write the cred yaml - cred_loc = "creds.yml" - - registry_user = os.environ.get("REGISTRY_USERNAME") - registry_pwd = os.environ.get("REGISTRY_PASSWORD") - access_key = os.environ.get("ACCESS_KEY") - access_secret = os.environ.get("SECRET_KEY") - - f = open(cred_loc, "w+") - - # TODO revisit this when we make a standard that all scripts will follow - # write creds to a file to avoid changing the python library - s = """ - sandbox: - access_key: {key} - secret_key: {secret} - registry: - username: {user} - password: {pw} - """.format(key=access_key, secret=access_secret, user=registry_user, pw=registry_pwd) - f.write(s) - f.close() - r = open(cred_loc, "r") - - su = S3Utils(conf_loc, cred_loc) - kafka_consumer = KafkaConsumer(conf_loc) + # Example command: python3 sme.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + # python3 archive_client_integration.py -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + parser = argparse.ArgumentParser(description="Launches sme test") + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', + help="AWS config filepath") + parser.add_argument('-cred', dest="cred", required=True, + help="Credentials filepath") + args = vars(parser.parse_args()) + + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') + if cred_loc is not None: + with open(cred_loc) as f: + creds = yaml.load(f, Loader=yaml.FullLoader) + registry_username = creds['registry']['username'] + registry_password = creds['registry']['password'] + access_key = creds['sandbox']['access_key'] + access_secret = creds['sandbox']['secret_key'] + else: + print("Using env variables for config parameters") + registry_username = os.environ.get("REGISTRY_USERNAME") + registry_password = os.environ.get("REGISTRY_PASSWORD") + access_key = os.environ.get("ACCESS_KEY") + access_secret = os.environ.get("SECRET_KEY") + + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + + su = S3Utils(**config_dict) + + kafka_consumer = KafkaConsumer(**config_dict) metadata_consumer = kafka_consumer.connect() - kafka_consumer.consume(metadata_consumer, lambda k, v: handler(k, v)) \ No newline at end of file +# handler('', '', config_dict['log_level']) # For testing purposes + kafka_consumer.consume(metadata_consumer, handler) \ No newline at end of file diff --git a/scripts/sme/smeFunc.py b/scripts/sme/smeFunc.py index 2e11d51..f07c7b6 100644 --- a/scripts/sme/smeFunc.py +++ b/scripts/sme/smeFunc.py @@ -1,8 +1,11 @@ +import yaml import argparse -import json from onestop.KafkaConsumer import KafkaConsumer +from onestop.util.ClientLogger import ClientLogger -def handler(key,value): +config_dict = {} + +def handler(key, value, log_level = 'INFO'): ''' Prints key, value pair of items in topic @@ -13,9 +16,10 @@ def handler(key,value): :return: None ''' - print(key) - print('VALUE-------') - print(value) + logger = ClientLogger.get_logger('smeFunc.handler', log_level, False) + logger.info('In Handler') + logger.info('key=%s value=%s'%(key, value)) + """ if (value['type'] == 'collection' or not bool(value['fileInformation'])): print(value['discovery']['fileIdentifier']) @@ -25,51 +29,20 @@ def handler(key,value): if __name__ == '__main__': - - kafka_consumer = KafkaConsumer("scripts/config/kafka-publisher-config-dev.yml") - kafka_consumer.granule_topic = 'psi-granule-parsed' + # Example command: python3 smeFunc.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml + # python3 smeFunc.py + parser = argparse.ArgumentParser(description="Launches smeFunc test") + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', + help="AWS config filepath") + args = vars(parser.parse_args()) + + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + + kafka_consumer = KafkaConsumer(**config_dict) + kafka_consumer.granule_topic_consume = 'psi-granule-parsed' metadata_consumer = kafka_consumer.connect() - kafka_consumer.consume(metadata_consumer, lambda k, v: handler(k, v)) - """ - parser = argparse.ArgumentParser(description="Allows smeFunc to produce or consume messagges from kafkda topics") - parser.add_argument('-cmd', dest="command", required=True, - help="Command (produce/consume)") - - parser.add_argument('-b', dest="bootstrap.servers", required=True, - help="Bootstrap broker(s) (host[:port])") - parser.add_argument('-s', dest="schema.registry.url", required=True, - help="Schema Registry (http(s)://host[:port]") - parser.add_argument('-t', dest="topic", required=True, - help="Topic name") - parser.add_argument('-g', dest="group.id", required=False, - help="Consumer group") - parser.add_argument('-o', dest="auto.offset.reset", required=False, - help="offset") - - config = vars(parser.parse_args()) - topic = config.pop('topic') - cmd = config.pop('command') - - if (cmd=="consume"): - consume(config, topic, lambda k, v: handler(k, v)) - - - if (cmd=="produce"): - - #Example content - value = { - "type": "collection", - "content": "Update!", - "contentType": "application/json", - "method": "PUT", - "source": "unknown", - "operation": "ADD" - } - - key = "3ee5976e-789a-41d5-9cae-d51e7b92a247" - - data = {key: value} - - produce(config, topic, data) - """ - + kafka_consumer.consume(metadata_consumer, handler) diff --git a/scripts/sqs-to-registry/Dockerfile b/scripts/sqs-to-registry/Dockerfile index 9db0598..985421d 100644 --- a/scripts/sqs-to-registry/Dockerfile +++ b/scripts/sqs-to-registry/Dockerfile @@ -1,10 +1,9 @@ +# Expect this to copy the scripts directory over and install onestop-python-client. FROM cedardevs/onestop-python-client:latest -COPY . . + #required by the sme script, not our library RUN pip install argparse -#I should not have to do this, since it is done in the base image -#RUN pip install -r ./onestop-python-client/requirements.txt ENTRYPOINT [ "python" ] CMD [ "s3_notification_handler.py" ] #CMD tail -f /dev/null \ No newline at end of file diff --git a/scripts/sqs-to-registry/config/e2e.yml b/scripts/sqs-to-registry/config/e2e.yml index 4c2c800..a2bdcfc 100644 --- a/scripts/sqs-to-registry/config/e2e.yml +++ b/scripts/sqs-to-registry/config/e2e.yml @@ -14,14 +14,16 @@ s3_bucket2: noaa-nccf-dev-archive #CSB stream config format: csv headers: UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER -type: COLLECTION collection_id: fdb56230-87f4-49f2-ab83-104cfd073177 access_bucket: https://archive-testing-demo.s3-us-east-2.amazonaws.com #access_bucket: https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com file_identifier_prefix: "gov.noaa.ncei.csb:" # COLLECTION or GRANULE -metadata_type: granule +kafka_consumer_metadata_type: GRANULE +kafka_publisher_metadata_type: GRANULE +s3_message_adapter_metadata_type: COLLECTION + registry_base_url: http://onestop-registry:80 onestop_base_url: http://onestop-search:8080 diff --git a/scripts/sqs-to-registry/s3_notification_handler.py b/scripts/sqs-to-registry/s3_notification_handler.py index 2b26ab5..23f4165 100644 --- a/scripts/sqs-to-registry/s3_notification_handler.py +++ b/scripts/sqs-to-registry/s3_notification_handler.py @@ -1,55 +1,53 @@ import os import yaml +import json + from onestop.util.SqsConsumer import SqsConsumer from onestop.util.S3Utils import S3Utils from onestop.util.S3MessageAdapter import S3MessageAdapter from onestop.WebPublisher import WebPublisher from onestop.util.SqsHandlers import create_delete_handler from onestop.util.SqsHandlers import create_upload_handler +from onestop.util.ClientLogger import ClientLogger -from datetime import date import argparse +config_dict = {} -def handler(recs): - print("Handling message...") - # Now get boto client for object-uuid retrieval - object_uuid = None +def handler(rec, log_level): + logger = ClientLogger.get_logger('s3_notification_handler.handler', log_level, False) + logger.info('In Handler') - if recs is None: - print("No records retrieved" + date.today()) - else: - rec = recs[0] - print(rec) - if 'ObjectRemoved' in rec['eventName']: - print("SME - calling delete handler") - print(rec['eventName']) - delete_handler(recs) - else: - print("SME - calling upload handler") - upload_handler(recs) - #copy_handler(recs) + if rec is None: + logger.info('No record retrieved, doing nothing.') + return + logger.info('Record:%s'%rec) -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description="Launch SQS to Registry consumer") - parser.add_argument('-conf', dest="conf", required=False, - help="Config filepath") + if 'ObjectRemoved' in rec['eventName']: + delete_handler(rec) + else: + upload_handler(rec) - parser.add_argument('-cred', dest="cred", required=False, +if __name__ == '__main__': + # Example command: python3 archive_client_integration.py -conf /Users/whoever/repo/onestop-clients/scripts/config/combined_template.yml -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + # python3 archive_client_integration.py -cred /Users/whoever/repo/onestop-clients/scripts/config/credentials.yml + parser = argparse.ArgumentParser(description="Launches archive client integration") + # Set default config location to the Helm mounted pod configuration location + parser.add_argument('-conf', dest="conf", required=False, default='/etc/config/config.yml', + help="AWS config filepath") + parser.add_argument('-cred', dest="cred", required=True, help="Credentials filepath") - args = vars(parser.parse_args()) - cred_loc = args.pop('cred') - #credentials from either file or env - registry_username = None - registry_password = None - access_key = None - access_secret = None + # Generate configuration dictionary + conf_loc = args.pop('conf') + with open(conf_loc) as f: + config_dict.update(yaml.load(f, Loader=yaml.FullLoader)) + # Get credentials from passed in fully qualified path or ENV. + cred_loc = args.pop('cred') if cred_loc is not None: with open(cred_loc) as f: creds = yaml.load(f, Loader=yaml.FullLoader) @@ -64,60 +62,68 @@ def handler(recs): access_key = os.environ.get("ACCESS_KEY") access_secret = os.environ.get("SECRET_KEY") - # default config location mounted in pod - if args.pop('conf') is None: - conf_loc = "/etc/config/config.yml" - else: - conf_loc = args.pop('conf') - - conf = None - with open(conf_loc) as f: - conf = yaml.load(f, Loader=yaml.FullLoader) - - #TODO organize the config - #System - log_level = conf['log_level'] - sqs_max_polls = conf['sqs_max_polls'] - - #Destination - registry_base_url = conf['registry_base_url'] - onestop_base_url = conf['onestop_base_url'] + config_dict.update({ + 'registry_username' : registry_username, + 'registry_password' : registry_password, + 'access_key' : access_key, + 'secret_key' : access_secret + }) + sqs_consumer = SqsConsumer(**config_dict) - #Source - access_bucket = conf['access_bucket'] - sqs_url = conf['sqs_url'] - s3_region = conf['s3_region'] - s3_bucket2 = conf['s3_bucket2'] - s3_region2 = conf['s3_region2'] + wp = WebPublisher(**config_dict) + s3_utils = S3Utils(**config_dict) - #Onestop related - prefix_map = conf['prefixMap'] - file_id_prefix = conf['file_identifier_prefix'] - file_format = conf['format'] - headers = conf['headers'] - type = conf['type'] - - sqs_consumer = SqsConsumer(access_key, access_secret, s3_region, sqs_url, log_level) - - wp = WebPublisher(registry_base_url=registry_base_url, username=registry_username, password=registry_password, - onestop_base_url=onestop_base_url, log_level=log_level) - - s3_utils = S3Utils(access_key, access_secret, log_level) - s3ma = S3MessageAdapter(access_bucket, prefix_map, format, headers, type, file_id_prefix, log_level) + s3ma = S3MessageAdapter(**config_dict) delete_handler = create_delete_handler(wp) upload_handler = create_upload_handler(wp, s3_utils, s3ma) - queue = sqs_consumer.connect() - - try: - debug = False - # # Pass in the handler method - #Hack to make this stay up forever - #TODO add feature to client library for polling indefinitely - while True: - sqs_consumer.receive_messages(queue, sqs_max_polls, handler) - - except Exception as e: - print("Message queue consumption failed: {}".format(e)) + s3_resource = s3_utils.connect('resource', 'sqs', config_dict['s3_region']) + queue = sqs_consumer.connect(s3_resource, config_dict['sqs_name']) + + # Send a test message + test_message = { + "Type": "Notification", + "MessageId": "e12f0129-0236-529c-aeed-5978d181e92a", + "TopicArn": "arn:aws:sns:" + config_dict['s3_region'] + ":798276211865:cloud-archive-client-sns", + "Subject": "Amazon S3 Notification", + "Message": '''{ + "Records": [{ + "eventVersion": "2.1", "eventSource": "aws:s3", "awsRegion": "''' + config_dict['s3_region'] + '''", + "eventTime": "2020-12-14T20:56:08.725Z", + "eventName": "ObjectRemoved:Delete", + "userIdentity": {"principalId": "AX8TWPQYA8JEM"}, + "requestParameters": {"sourceIPAddress": "65.113.158.185"}, + "responseElements": {"x-amz-request-id": "D8059E6A1D53597A", + "x-amz-id-2": "7DZF7MAaHztZqVMKlsK45Ogrto0945RzXSkMnmArxNCZ+4/jmXeUn9JM1NWOMeKK093vW8g5Cj5KMutID+4R3W1Rx3XDZOio"}, + "s3": { + "s3SchemaVersion": "1.0", "configurationId": "archive-testing-demo-event", + "bucket": {"name": "''' + config_dict['s3_bucket'] + '''", + "ownerIdentity": {"principalId": "AX8TWPQYA8JEM"}, + "arn": "arn:aws:s3:::''' + config_dict['s3_bucket'] + '''"}, + "object": {"key": "123", + "sequencer": "005FD7D1765F04D8BE", + "eTag": "44d2452e8bc2c8013e9c673086fbab7a", + "size": 1385, + "versionId": "q6ls_7mhqUbfMsoYiQSiADnHBZQ3Fbzf"} + } + }] + }''', + "Timestamp": "2020-12-14T20:56:23.786Z", + "SignatureVersion": "1", + "Signature": "MB5P0H5R5q3zOFoo05lpL4YuZ5TJy+f2c026wBWBsQ7mbNQiVxAy4VbbK0U1N3YQwOslq5ImVjMpf26t1+zY1hoHoALfvHY9wPtc8RNlYqmupCaZgtwEl3MYQz2pHIXbcma4rt2oh+vp/n+viARCToupyysEWTvw9a9k9AZRuHhTt8NKe4gpphG0s3/C1FdvrpQUvxoSGVizkaX93clU+hAFsB7V+yTlbKP+SNAqP/PaLtai6aPY9Lb8reO2ZjucOl7EgF5IhBVT43HhjBBj4JqYBNbMPcId5vMfBX8qI8ANIVlGGCIjGo1fpU0ROxSHsltuRjkmErpxUEe3YJJM3Q==", + "SigningCertURL": "https://sns.us-east-2.amazonaws.com/SimpleNotificationService-010a507c1833636cd94bdb98bd93083a.pem", + "UnsubscribeURL": "https://sns.us-east-2.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-2:798276211865:cloud-archive-client-sns:461222e7-0abf-40c6-acf7-4825cef65cce" + } + +# sqs_client = s3_utils.connect('client', 'sqs' , config_dict['s3_region']) +# sqs_client.send_message( +# QueueUrl='https://sqs.us-east-2.amazonaws.com/798276211865/cloud-archive-client-sqs', +# MessageBody=json.dumps(test_message) +# ) + + #Hack to make this stay up forever + #TODO add feature to client library for polling indefinitely + while True: + sqs_consumer.receive_messages(queue, config_dict['sqs_max_polls'], handler) diff --git a/serverless/conf.py b/serverless/conf.py index b41eb0b..26ef3cd 100644 --- a/serverless/conf.py +++ b/serverless/conf.py @@ -3,6 +3,6 @@ HEADERS = 'UNIQUE_ID,FILE_UUID,LON,LAT,DEPTH,TIME,PLATFORM_NAME,PROVIDER' TYPE = 'COLLECTION' COLLECTION_ID = 'fdb56230-87f4-49f2-ab83-104cfd073177' -PSI_REGISTRY_URL = 'http://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com' +REGISTRY_BASE_URL = 'http://internal-a683c98a66fb011eaa4230e0d5e5657f-369075387.us-east-1.elb.amazonaws.com' ACCESS_BUCKET = 'https://odp-noaa-nesdis-ncei-test.s3-us-west-2.amazonaws.com' FILE_IDENTIFIER_PREFIX = 'gov.noaa.ncei.csb:' diff --git a/serverless/lambda_function.py b/serverless/lambda_function.py index abe8fb7..3b6cd97 100644 --- a/serverless/lambda_function.py +++ b/serverless/lambda_function.py @@ -9,7 +9,7 @@ def lambda_handler(event, context): - registry_url = conf.PSI_REGISTRY_URL + "/metadata/granule" + registry_url = conf.REGISTRY_BASE_URL + "/metadata/granule" for rec in event['Records']: