test changes

timburke-hackit · timburke-hackit · commit 6ace49dde90b · 2025-03-12T09:28:57.000Z
diff --git a/lambdas/govnotify_api_ingestion_customer_services/main.py b/lambdas/govnotify_api_ingestion_customer_services/main.py
@@ -1,3 +1,5 @@
+# test rebuild lambdas
+
 """
 Script to call the GovNotify API to retrieve data from the
 Customer Services account and write to S3.
@@ -6,17 +8,18 @@
 Both zones are crawled so that data is exposed in the Glue data catalog.
 """
 
-from datetime import datetime
-from io import BytesIO
 import json
 import logging
+from datetime import datetime
+from io import BytesIO
 from os import getenv
 
-from botocore.exceptions import ClientError
 import boto3
-from notifications_python_client.notifications import NotificationsAPIClient
-from notifications_python_client.errors import HTTPError
 import pandas as pd
+from botocore.exceptions import ClientError
+from notifications_python_client.errors import HTTPError
+from notifications_python_client.notifications import NotificationsAPIClient
+
 
 # Set up logging
 logging.basicConfig(level=logging.INFO)
diff --git a/lambdas/govnotify_api_ingestion_repairs/main.py b/lambdas/govnotify_api_ingestion_repairs/main.py
@@ -1,21 +1,24 @@
+# test rebuild lambdas
+
 """
 Script to call the GovNotify API to retrieve data and write to S3.
 Retrieved data is written to S3 Landing as a json string and parquet file.
 Data is then normalised and written to s3 Raw for use by analysts.
 Both zones are crawled so that data is exposed in the Glue data catalog.
 """
 
-from datetime import datetime
-from io import BytesIO
 import json
 import logging
+from datetime import datetime
+from io import BytesIO
 from os import getenv
 
-from botocore.exceptions import ClientError
 import boto3
-from notifications_python_client.notifications import NotificationsAPIClient
-from notifications_python_client.errors import HTTPError
 import pandas as pd
+from botocore.exceptions import ClientError
+from notifications_python_client.errors import HTTPError
+from notifications_python_client.notifications import NotificationsAPIClient
+
 
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -29,7 +32,7 @@ def initialize_s3_client():
     Returns:
         boto3.client: S3 client instance.
     """
-    return boto3.client('s3')
+    return boto3.client("s3")
 
 
 def get_api_secret(api_secret_name, region_name):
@@ -58,9 +61,7 @@ def get_response(query):
     try:
         response = query
     except HTTPError as e:
-        logger.error(
-            f"Error requesting response from {query}: {e}"
-        )
+        logger.error(f"Error requesting response from {query}: {e}")
         raise
     return response
 
@@ -98,7 +99,7 @@ def json_to_parquet(response, label):
     """
     df = pd.DataFrame.from_dict(response[label])
     parquet_buffer = BytesIO()
-    df.to_parquet(parquet_buffer, index=False, engine='pyarrow')
+    df.to_parquet(parquet_buffer, index=False, engine="pyarrow")
     return parquet_buffer
 
 
@@ -113,12 +114,12 @@ def json_to_parquet_normalised(response, label):
     data = json.loads(response)
     df = pd.json_normalize(data[label], max_level=1)
     parquet_buffer = BytesIO()
-    df.to_parquet(parquet_buffer, index=False, engine='pyarrow')
+    df.to_parquet(parquet_buffer, index=False, engine="pyarrow")
     return parquet_buffer
 
 
 def prepare_json(response):
-    return json.dumps(response).encode('utf-8')
+    return json.dumps(response).encode("utf-8")
 
 
 def add_date_partition_key_to_s3_prefix(s3_prefix):
@@ -129,8 +130,8 @@ def add_date_partition_key_to_s3_prefix(s3_prefix):
 
 def lambda_handler(event, context):
     logger.info("Set up S3 client...")
-    s3_client = boto3.client('s3')
-    glue_client = boto3.client('glue')
+    s3_client = boto3.client("s3")
+    glue_client = boto3.client("glue")
 
     api_secret_name = getenv("API_SECRET_NAME")
     region_name = getenv("AWS_REGION")
@@ -148,41 +149,62 @@ def lambda_handler(event, context):
     client = initialise_notification_client(api_key)
 
     # GovNotify queries to retrieve
-    api_queries = ['notifications', 'received_text_messages']
+    api_queries = ["notifications", "received_text_messages"]
     api_queries_dict = {
-        'notifications': {'query': client.get_all_notifications(include_jobs=True),
-                          'file_name': 'notifications'},
-        'received_text_messages': {'query': client.get_received_texts(),
-                                   'file_name': 'received_text_messages'}
+        "notifications": {
+            "query": client.get_all_notifications(include_jobs=True),
+            "file_name": "notifications",
+        },
+        "received_text_messages": {
+            "query": client.get_received_texts(),
+            "file_name": "received_text_messages",
+        },
     }
 
     logger.info("Get API responses...")
     for api_query in api_queries:
-        query = api_queries_dict.get(api_query).get('query')
+        query = api_queries_dict.get(api_query).get("query")
         response = get_response(query)
-        file_name = api_queries_dict.get(api_query).get('file_name')
+        file_name = api_queries_dict.get(api_query).get("file_name")
 
-        output_folder_json = add_date_partition_key_to_s3_prefix(f'{output_folder}{file_name}/json/')
-        output_folder_parquet = add_date_partition_key_to_s3_prefix(f'{output_folder}{file_name}/parquet/')
+        output_folder_json = add_date_partition_key_to_s3_prefix(
+            f"{output_folder}{file_name}/json/"
+        )
+        output_folder_parquet = add_date_partition_key_to_s3_prefix(
+            f"{output_folder}{file_name}/parquet/"
+        )
 
         # convert response to json formatted string
         json_str = prepare_json(response=response)
 
         # Upload the json string to landing only
-        upload_to_s3(output_s3_bucket_landing, s3_client, json_str, f'{output_folder_json}{file_name}.json')
+        upload_to_s3(
+            output_s3_bucket_landing,
+            s3_client,
+            json_str,
+            f"{output_folder_json}{file_name}.json",
+        )
 
         # Upload parquet buffer to both S3 landing and raw; run crawler
         parquet_buffer_landing = json_to_parquet(response=response, label=file_name)
         parquet_buffer_landing.seek(0)
-        s3_client.upload_fileobj(parquet_buffer_landing, output_s3_bucket_landing,
-                                 f'{output_folder_parquet}{file_name}.parquet')
-        glue_client.start_crawler(Name=f'{crawler_landing} {file_name}')
+        s3_client.upload_fileobj(
+            parquet_buffer_landing,
+            output_s3_bucket_landing,
+            f"{output_folder_parquet}{file_name}.parquet",
+        )
+        glue_client.start_crawler(Name=f"{crawler_landing} {file_name}")
 
-        parquet_buffer_raw = json_to_parquet_normalised(response=json_str, label=file_name)
+        parquet_buffer_raw = json_to_parquet_normalised(
+            response=json_str, label=file_name
+        )
         parquet_buffer_raw.seek(0)
-        s3_client.upload_fileobj(parquet_buffer_raw, output_s3_bucket_raw,
-                                 f'{output_folder_parquet}{file_name}.parquet')
-        glue_client.start_crawler(Name=f'{crawler_raw} {file_name}')
+        s3_client.upload_fileobj(
+            parquet_buffer_raw,
+            output_s3_bucket_raw,
+            f"{output_folder_parquet}{file_name}.parquet",
+        )
+        glue_client.start_crawler(Name=f"{crawler_raw} {file_name}")
 
     logger.info("Job finished")
 
diff --git a/lambdas/mtfh_export_lambda/main.py b/lambdas/mtfh_export_lambda/main.py
@@ -1,10 +1,13 @@
+# test rebuild lambdas
+
 import json
 import logging
 from datetime import datetime
 from os import getenv
 
 import boto3
 
+
 logger = logging.getLogger()
 
 
diff --git a/lambdas/redshift_stage_and_load_tables/main.py b/lambdas/redshift_stage_and_load_tables/main.py
@@ -1,7 +1,9 @@
+# test rebuild lambdas
 import os
 
 import boto3
 
+
 redshift_data_client = boto3.client("redshift-data")