diff --git a/data/docker-compose.yml b/data/docker-compose.yml index e3707af2..12d56903 100644 --- a/data/docker-compose.yml +++ b/data/docker-compose.yml @@ -7,6 +7,8 @@ services: environment: - GOOGLE_APPLICATION_CREDENTIALS=/app/service-account-key.json - VACANT_LOTS_DB + - POSTGRES_PASSWORD + - POSTGRES_PORT=5434 - CLEAN_GREEN_GOOGLE_KEY - PYTHONUNBUFFERED=1 - GOOGLE_CLOUD_BUCKET_NAME diff --git a/data/src/classes/backup_archive_database.py b/data/src/classes/backup_archive_database.py index 94302971..0d0c51f4 100644 --- a/data/src/classes/backup_archive_database.py +++ b/data/src/classes/backup_archive_database.py @@ -4,16 +4,17 @@ from datetime import datetime, timedelta import sqlalchemy as sa +from sqlalchemy import inspect + +from classes.featurelayer import google_cloud_bucket from config.config import ( log_level, max_backup_schema_days, - tiles_file_id_prefix, tile_file_backup_directory, + tiles_file_id_prefix, ) from config.psql import conn, local_engine, url from data_utils.utils import mask_password -from sqlalchemy import inspect -from classes.featurelayer import google_cloud_bucket log.basicConfig(level=log_level) @@ -125,7 +126,10 @@ def is_backup_schema_exists(self) -> bool: def backup_tiles_file(self): """backup the main tiles file to a timestamped copy in the backup/ folder in GCP""" - bucket = google_cloud_bucket() + bucket = google_cloud_bucket(require_write_access=True) + if not bucket: + log.warning("No GCP bucket found. Skipping tiles file backup.") + return count: int = 0 for blob in bucket.list_blobs(prefix=tiles_file_id_prefix): suffix: str = "_" + self.timestamp_string diff --git a/data/src/classes/bucket_manager.py b/data/src/classes/bucket_manager.py new file mode 100644 index 00000000..045571aa --- /dev/null +++ b/data/src/classes/bucket_manager.py @@ -0,0 +1,68 @@ +import logging as log +import os + +from google.cloud import storage + +from config.config import log_level + +log.basicConfig(level=log_level) + + +class GCSBucketManager: + """ + A manager for interacting with a Google Cloud Storage bucket. + + This class initializes a bucket client using Application Default Credentials, + an optional service account key, or falls back to an anonymous client for read-only access. + """ + + def __init__( + self, bucket_name: str = None, credential_path: str = None, client=None + ): + """ + Initialize the GCSBucketManager. + + Args: + bucket_name (str): Name of the bucket. Defaults to the environment variable + 'GOOGLE_CLOUD_BUCKET_NAME' or "cleanandgreenphl" if not set. + credential_path (str): Optional path to a service account credentials file. + client: Optional storage. Client instance for dependency injection in testing. + """ + self.bucket_name = bucket_name or os.getenv( + "GOOGLE_CLOUD_BUCKET_NAME", "cleanandgreenphl" + ) + + self.read_only = False + + if client is not None: + self._client = client + else: + self._client = self._init_client(credential_path) + + self.bucket = self._client.bucket(self.bucket_name) + + def _init_client(self, credential_path: str = None): + """ + Attempt to initialize the storage client using a credential file, application default + credentials or fall back to anonymous/read-only. + """ + project_name = os.getenv("GOOGLE_CLOUD_PROJECT", "clean-and-green-philly") + credentials_path = credential_path or "/app/service-account-key.json" + is_credentials_file = os.path.exists(credentials_path) + + if is_credentials_file: + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path + + try: + # This will use application default credentials if GOOGLE_APPLICATION_CREDENTIALS is not set + if is_credentials_file: + print(f"Using service account key at {credentials_path}") + else: + print("Using application default credentials") + return storage.Client(project=project_name) + + except Exception as e: + log.warning(f"Failed to initialize client with service account key: {e}") + log.warning("Falling back to anonymous client (read-only mode)") + self.read_only = True + return storage.Client.create_anonymous_client() diff --git a/data/src/classes/diff_report.py b/data/src/classes/diff_report.py index 102d0352..def32159 100644 --- a/data/src/classes/diff_report.py +++ b/data/src/classes/diff_report.py @@ -5,6 +5,8 @@ import subprocess from email.mime.text import MIMEText +from slack_sdk import WebClient + from classes.backup_archive_database import backup_schema_name from classes.featurelayer import google_cloud_bucket from config.config import ( @@ -16,7 +18,6 @@ ) from config.psql import conn, url from data_utils.utils import mask_password -from slack_sdk import WebClient log.basicConfig(level=log_level) @@ -213,20 +214,30 @@ def compare_table(self, diff_table: DiffTable) -> str: output = complete_process.stdout.decode() return re.sub(r"\nExtra-Info:.*", "", output, flags=re.DOTALL) - def send_report_to_slack(self): + def send_report_to_slack(self, slack_token=None): """ post the summary report to the slack channel if configured. """ - if report_to_slack_channel: - token = os.environ["CAGP_SLACK_API_TOKEN"] - client = WebClient(token=token) - - # Send a message - client.chat_postMessage( - channel=report_to_slack_channel, - text=self.report, - username="CAGP Diff Bot", + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not report_to_slack_channel: + log.warning( + "Skipping Slack reporting. Configure report_to_slack_channel in config.py to enable." ) + return + if not token: + log.warning( + "Skipping Slack reporting. Configure CAGP_SLACK_API_TOKEN in environment to enable." + ) + return + + client = WebClient(token=token) + + # Send a message + client.chat_postMessage( + channel=report_to_slack_channel, + text=self.report, + username="CAGP Diff Bot", + ) def email_report(self): """ diff --git a/data/src/classes/featurelayer.py b/data/src/classes/featurelayer.py index 4eb30e3b..487846fb 100644 --- a/data/src/classes/featurelayer.py +++ b/data/src/classes/featurelayer.py @@ -7,6 +7,11 @@ import pandas as pd import requests import sqlalchemy as sa +from esridump.dumper import EsriDumper +from google.cloud import storage +from shapely import Point, wkb + +from classes.bucket_manager import GCSBucketManager from config.config import ( FORCE_RELOAD, USE_CRS, @@ -15,31 +20,22 @@ write_production_tiles_file, ) from config.psql import conn, local_engine -from esridump.dumper import EsriDumper -from google.cloud import storage -from google.cloud.storage.bucket import Bucket -from shapely import Point, wkb log.basicConfig(level=log_level) -def google_cloud_bucket() -> Bucket: - """Build the google cloud bucket with name configured in your environ or default of cleanandgreenphl - - Returns: - Bucket: the gcp bucket +def google_cloud_bucket(require_write_access: bool = False) -> storage.Bucket | None: + """ + Initialize a Google Cloud Storage bucket client using Application Default Credentials. + If a writable bucket is requested and the user does not have write access None is returned. + Args: + require_write_access (bool): Whether it is required that the bucket should be writable. Defaults to False. """ - credentials_path = os.path.expanduser("/app/service-account-key.json") - - if not os.path.exists(credentials_path): - raise FileNotFoundError(f"Credentials file not found at {credentials_path}") - - os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path - bucket_name = os.getenv("GOOGLE_CLOUD_BUCKET_NAME", "cleanandgreenphl") - project_name = os.getenv("GOOGLE_CLOUD_PROJECT", "clean-and-green-philly") - storage_client = storage.Client(project=project_name) - return storage_client.bucket(bucket_name) + bucket_manager = GCSBucketManager() + if require_write_access and bucket_manager.read_only: + return None + return bucket_manager.bucket class FeatureLayer: @@ -58,7 +54,6 @@ def __init__( from_xy=False, use_wkb_geom_field=None, cols: list[str] = None, - bucket: Bucket = None, ): self.name = name self.esri_rest_urls = ( @@ -75,7 +70,6 @@ def __init__( self.psql_table = name.lower().replace(" ", "_") self.input_crs = "EPSG:4326" if not from_xy else USE_CRS self.use_wkb_geom_field = use_wkb_geom_field - self.bucket = bucket or google_cloud_bucket() inputs = [self.esri_rest_urls, self.carto_sql_queries, self.gdf] non_none_inputs = [i for i in inputs if i is not None] @@ -330,7 +324,13 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None: df_no_geom.to_parquet(temp_parquet) # Upload Parquet to Google Cloud Storage - blob_parquet = self.bucket.blob(f"{tiles_file_id_prefix}.parquet") + bucket = google_cloud_bucket(require_write_access=True) + if bucket is None: + print( + "Skipping Parquest and PMTiles upload due to read-only bucket access." + ) + return + blob_parquet = bucket.blob(f"{tiles_file_id_prefix}.parquet") try: blob_parquet.upload_from_filename(temp_parquet) parquet_size = os.stat(temp_parquet).st_size @@ -399,7 +399,7 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None: # Upload PMTiles to Google Cloud Storage for file in write_files: - blob = self.bucket.blob(file) + blob = bucket.blob(file) try: blob.upload_from_filename(temp_merged_pmtiles) print(f"PMTiles upload successful for {file}!") diff --git a/data/src/classes/slack_error_reporter.py b/data/src/classes/slack_error_reporter.py index 1c443d4b..e0d02bd4 100644 --- a/data/src/classes/slack_error_reporter.py +++ b/data/src/classes/slack_error_reporter.py @@ -1,16 +1,19 @@ import os + from slack_sdk import WebClient -def send_error_to_slack(error_message: str) -> None: +def send_error_to_slack(error_message: str, slack_token: str | None = None) -> None: """Send error message to Slack.""" - token: str | None = os.getenv("CAGP_SLACK_API_TOKEN") # token can be None - if token: - client = WebClient(token=token) - client.chat_postMessage( - channel="clean-and-green-philly-back-end", # Replace with actual Slack channel ID - text=error_message, - username="Backend Error Reporter", - ) - else: - raise ValueError("Slack API token not found in environment variables.") + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not token: + print("Slack API token not found in environment variables.") + print("Skipping QC profile report to Slack.") + return + + client = WebClient(token=token) + client.chat_postMessage( + channel="clean-and-green-philly-back-end", # Replace with actual Slack channel ID + text=error_message, + username="Backend Error Reporter", + ) diff --git a/data/src/config/config.py b/data/src/config/config.py index 4b240cf2..a9d2fe56 100644 --- a/data/src/config/config.py +++ b/data/src/config/config.py @@ -1,9 +1,12 @@ import logging from pathlib import Path -FORCE_RELOAD = False +FORCE_RELOAD = True """ During the data load, whether to query the various GIS API services for the data to load into the postgres tables. If True, will query the API services, backup the database, reload the database and report on data differences. If false will read the data from postgres.""" +BACKUP_SCHEMA = False +""" Whether to backup the database schema before loading the data in script.py. """ + USE_CRS = "EPSG:2272" """ the standard geospatial code for Pennsylvania South (ftUS) """ diff --git a/data/src/config/psql.py b/data/src/config/psql.py index f4818363..2f21924a 100644 --- a/data/src/config/psql.py +++ b/data/src/config/psql.py @@ -1,12 +1,36 @@ import os -from config.config import is_docker from sqlalchemy import create_engine -url: str = ( - os.environ["VACANT_LOTS_DB"].replace("localhost", "host.docker.internal") - if is_docker() - else os.environ["VACANT_LOTS_DB"] -) +from config.config import is_docker + + +def get_db_url(): + # Detect if running in Cloud Run: + is_cloud_run = "K_SERVICE" in os.environ or "CLOUD_RUN_JOB" in os.environ + + # Use host.docker.internal when running locally in Docker + # except when running in Cloud Run + host = "localhost" + if is_docker() and not is_cloud_run: + host = "host.docker.internal" + + if os.getenv("VACANT_LOTS_DB"): + # Use the provided database URL + url = os.getenv("VACANT_LOTS_DB") + url = url.replace("localhost", host) + else: + # Use the specified port, pw, db and user to construct the URL + pw = os.environ["POSTGRES_PASSWORD"] + port = os.getenv("POSTGRES_PORT", "5432") + db = os.getenv("POSTGRES_DB", "vacantlotdb") + user = os.getenv("POSTGRES_USER", "postgres") + url: str = f"postgresql://{user}:{pw}@{host}:{port}/{db}" + print(f"Set database url to: postgresql://{user}:****@{host}:{port}/{db}") + return url + + +url = get_db_url() + local_engine = create_engine(url) conn = local_engine.connect() diff --git a/data/src/new_etl/classes/bucket_manager.py b/data/src/new_etl/classes/bucket_manager.py new file mode 100644 index 00000000..045571aa --- /dev/null +++ b/data/src/new_etl/classes/bucket_manager.py @@ -0,0 +1,68 @@ +import logging as log +import os + +from google.cloud import storage + +from config.config import log_level + +log.basicConfig(level=log_level) + + +class GCSBucketManager: + """ + A manager for interacting with a Google Cloud Storage bucket. + + This class initializes a bucket client using Application Default Credentials, + an optional service account key, or falls back to an anonymous client for read-only access. + """ + + def __init__( + self, bucket_name: str = None, credential_path: str = None, client=None + ): + """ + Initialize the GCSBucketManager. + + Args: + bucket_name (str): Name of the bucket. Defaults to the environment variable + 'GOOGLE_CLOUD_BUCKET_NAME' or "cleanandgreenphl" if not set. + credential_path (str): Optional path to a service account credentials file. + client: Optional storage. Client instance for dependency injection in testing. + """ + self.bucket_name = bucket_name or os.getenv( + "GOOGLE_CLOUD_BUCKET_NAME", "cleanandgreenphl" + ) + + self.read_only = False + + if client is not None: + self._client = client + else: + self._client = self._init_client(credential_path) + + self.bucket = self._client.bucket(self.bucket_name) + + def _init_client(self, credential_path: str = None): + """ + Attempt to initialize the storage client using a credential file, application default + credentials or fall back to anonymous/read-only. + """ + project_name = os.getenv("GOOGLE_CLOUD_PROJECT", "clean-and-green-philly") + credentials_path = credential_path or "/app/service-account-key.json" + is_credentials_file = os.path.exists(credentials_path) + + if is_credentials_file: + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path + + try: + # This will use application default credentials if GOOGLE_APPLICATION_CREDENTIALS is not set + if is_credentials_file: + print(f"Using service account key at {credentials_path}") + else: + print("Using application default credentials") + return storage.Client(project=project_name) + + except Exception as e: + log.warning(f"Failed to initialize client with service account key: {e}") + log.warning("Falling back to anonymous client (read-only mode)") + self.read_only = True + return storage.Client.create_anonymous_client() diff --git a/data/src/new_etl/classes/data_diff.py b/data/src/new_etl/classes/data_diff.py index 3dc40bfb..59ab9b90 100644 --- a/data/src/new_etl/classes/data_diff.py +++ b/data/src/new_etl/classes/data_diff.py @@ -1,7 +1,8 @@ -from slack_sdk import WebClient +import os + import pandas as pd +from slack_sdk import WebClient from sqlalchemy import text -import os from config.psql import conn @@ -115,27 +116,31 @@ def generate_diff(self): self.summary_text = "\n".join(summary_lines) - def send_to_slack(self, channel="clean-and-green-philly-pipeline"): + def send_to_slack( + self, channel="clean-and-green-philly-pipeline", slack_token=None + ): """ Sends the diff summary to a Slack channel. Args: channel (str): The Slack channel to post the message to. """ - token = os.getenv("CAGP_SLACK_API_TOKEN") - if token: - client = WebClient(token=token) - try: - client.chat_postMessage( - channel=channel, - text=f"*Data Difference Report*\n\n{self.summary_text}", - username="Diff Reporter", - ) - print("Diff report sent to Slack successfully.") - except Exception as e: - print(f"Failed to send diff report to Slack: {e}") - else: - raise ValueError("Slack API token not found in environment variables.") + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not token: + print("Slack API token not found in environment variables.") + print("Skipping sending diff report to Slack.") + return + + client = WebClient(token=token) + try: + client.chat_postMessage( + channel=channel, + text=f"*Data Difference Report*\n\n{self.summary_text}", + username="Diff Reporter", + ) + print("Diff report sent to Slack successfully.") + except Exception as e: + print(f"Failed to send diff report to Slack: {e}") def run(self, send_to_slack=True, slack_channel="clean-and-green-philly-pipeline"): """ diff --git a/data/src/new_etl/classes/featurelayer.py b/data/src/new_etl/classes/featurelayer.py index 4cc26d28..b64c4201 100644 --- a/data/src/new_etl/classes/featurelayer.py +++ b/data/src/new_etl/classes/featurelayer.py @@ -9,7 +9,6 @@ import requests import sqlalchemy as sa from google.cloud import storage -from google.cloud.storage.bucket import Bucket from shapely import wkb from tqdm import tqdm @@ -21,29 +20,26 @@ write_production_tiles_file, ) from config.psql import conn, local_engine +from new_etl.classes.bucket_manager import GCSBucketManager from new_etl.database import to_postgis_with_schema from new_etl.loaders import load_carto_data, load_esri_data log.basicConfig(level=log_level) -def google_cloud_bucket() -> Bucket: - """Build the google cloud bucket with name configured in your environ or default of cleanandgreenphl - - Returns: - Bucket: the gcp bucket +def google_cloud_bucket(require_write_access: bool = False) -> storage.Bucket | None: """ + Initialize a Google Cloud Storage bucket client using Application Default Credentials. + If a writable bucket is requested and the user does not have write access None is returned. - credentials_path = os.path.expanduser("/app/service-account-key.json") - if not os.path.exists(credentials_path): - raise FileNotFoundError(f"Credentials file not found at {credentials_path}") - - os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path - bucket_name = os.getenv("GOOGLE_CLOUD_BUCKET_NAME", "cleanandgreenphl") - project_name = os.getenv("GOOGLE_CLOUD_PROJECT", "clean-and-green-philly") + Args: + require_write_access (bool): Whether it is required that the bucket should be writable. Defaults to False. + """ - storage_client = storage.Client(project=project_name) - return storage_client.bucket(bucket_name) + bucket_manager = GCSBucketManager() + if require_write_access and bucket_manager.read_only: + return None + return bucket_manager.bucket class FeatureLayer: @@ -149,6 +145,7 @@ def load_data(self): ] # Save GeoDataFrame to PostgreSQL and configure it as a hypertable + print("Saving GeoDataFrame to PostgreSQL...") to_postgis_with_schema(self.gdf, self.psql_table, conn) except Exception as e: @@ -358,6 +355,11 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None: f"{temp_merged_pmtiles} is {file_size} bytes in size but should be at least {min_tiles_file_size_in_bytes}. Therefore, we are not uploading any files to the GCP bucket. The file may be corrupt or incomplete." ) + bucket = google_cloud_bucket(require_write_access=True) + if bucket is None: + print("Skipping PMTiles upload due to read-only bucket access.") + return + # Upload PMTiles to Google Cloud Storage bucket = google_cloud_bucket() for file in write_files: diff --git a/data/src/new_etl/classes/slack_reporters.py b/data/src/new_etl/classes/slack_reporters.py index 6ae08517..97ba2d3d 100644 --- a/data/src/new_etl/classes/slack_reporters.py +++ b/data/src/new_etl/classes/slack_reporters.py @@ -1,12 +1,15 @@ -from sqlalchemy import text import os -from slack_sdk import WebClient import pandas as pd +from slack_sdk import WebClient +from sqlalchemy import text def send_dataframe_profile_to_slack( - df: pd.DataFrame, df_name: str, channel="clean-and-green-philly-pipeline" + df: pd.DataFrame, + df_name: str, + channel="clean-and-green-philly-pipeline", + slack_token: str | None = None, ): """ Profiles a DataFrame and sends the QC profile summary to a Slack channel. @@ -15,7 +18,13 @@ def send_dataframe_profile_to_slack( df (pd.DataFrame): The DataFrame to profile. df_name (str): The name of the DataFrame being profiled. channel (str): The Slack channel to post the message to. + slack_token (str): The Slack API token. If not provided, it will be read from the environment. """ + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not token: + print("Slack API token not found in environment variables.") + print("Skipping QC profile report to Slack.") + return # Step 1: Profile the DataFrame profile_summary = {} @@ -58,27 +67,33 @@ def send_dataframe_profile_to_slack( message += f" - `{col}`: {unique_count} unique values\n" # Step 3: Send to Slack - token = os.getenv("CAGP_SLACK_API_TOKEN") - if token: - client = WebClient(token=token) - try: - client.chat_postMessage( - channel=channel, - text=message, - username="QC Reporter", - ) - print(f"QC profile for `{df_name}` sent to Slack successfully.") - except Exception as e: - print(f"Failed to send QC profile for `{df_name}` to Slack: {e}") - else: - raise ValueError("Slack API token not found in environment variables.") + client = WebClient(token=token) + try: + client.chat_postMessage( + channel=channel, + text=message, + username="QC Reporter", + ) + print(f"QC profile for `{df_name}` sent to Slack successfully.") + except Exception as e: + print(f"Failed to send QC profile for `{df_name}` to Slack: {e}") -def send_pg_stats_to_slack(conn): +def send_pg_stats_to_slack(conn, slack_token: str | None = None): """ Report total sizes for all hypertables using hypertable_detailed_size and send the result to a Slack channel. + + Args: + conn: SQLAlchemy connection to the PostgreSQL database. + slack_token (str): The Slack API token. If not provided, it will be read from the environment. """ + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not token: + print("Slack API token not found in environment variables.") + print("Skipping PostgreSQL stats report to Slack.") + return + # Step 1: Get all hypertable names hypertable_query = """ SELECT hypertable_name @@ -115,20 +130,19 @@ def send_pg_stats_to_slack(conn): message += f"- {size['hypertable']}: {total_size}\n" # Step 4: Send to Slack - token = os.getenv("CAGP_SLACK_API_TOKEN") - if token: - client = WebClient(token=token) - client.chat_postMessage( - channel="clean-and-green-philly-pipeline", - text=message, - username="PG Stats Reporter", - ) - else: - raise ValueError("Slack API token not found in environment variables.") + client = WebClient(token=token) + client.chat_postMessage( + channel="clean-and-green-philly-pipeline", + text=message, + username="PG Stats Reporter", + ) def send_diff_report_to_slack( - diff_summary: str, report_url: str, channel="clean-and-green-philly-pipeline" + diff_summary: str, + report_url: str, + channel="clean-and-green-philly-pipeline", + slack_token: str | None = None, ): """ Sends a difference report summary to a Slack channel. @@ -137,41 +151,48 @@ def send_diff_report_to_slack( diff_summary (str): The summary of differences to post. report_url (str): The URL to the detailed difference report. channel (str): The Slack channel to post the message to. + slack_token (str): The Slack API token. If not provided, it will be read from the environment. """ print( f"send_diff_report_to_slack called with:\ndiff_summary:\n{diff_summary}\nreport_url: {report_url}" ) + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not token: + print("Slack API token not found in environment variables.") + print("Skipping diff report to Slack.") + return # Step 1: Format the message message = f"*Data Difference Report*\n\n{diff_summary}\n\nDetailed report: <{report_url}|View Report>" print(f"Formatted Slack message:\n{message}") # Step 2: Send the message to Slack - token = os.getenv("CAGP_SLACK_API_TOKEN") - if token: - client = WebClient(token=token) - try: - client.chat_postMessage( - channel=channel, - text=message, - username="Diff Reporter", - ) - print("Diff report sent to Slack successfully.") - except Exception as e: - print(f"Failed to send diff report to Slack: {e}") - else: - raise ValueError("Slack API token not found in environment variables.") + client = WebClient(token=token) + try: + client.chat_postMessage( + channel=channel, + text=message, + username="Diff Reporter", + ) + print("Diff report sent to Slack successfully.") + except Exception as e: + print(f"Failed to send diff report to Slack: {e}") -def send_error_to_slack(error_message: str) -> None: +def send_error_to_slack(error_message: str, slack_token: str | None = None) -> None: """Send error message to Slack.""" - token: str | None = os.getenv("CAGP_SLACK_API_TOKEN") # token can be None - if token: - client = WebClient(token=token) - client.chat_postMessage( - channel="clean-and-green-philly-back-end", # Replace with actual Slack channel ID - text=error_message, - username="Backend Error Reporter", - ) - else: - raise ValueError("Slack API token not found in environment variables.") + + token = slack_token or os.getenv("CAGP_SLACK_API_TOKEN") + if not token: + print("Slack API token not found in environment variables.") + print("Skipping error report to Slack.") + print("Error message:") + print(error_message) + return + + client = WebClient(token=token) + client.chat_postMessage( + channel="clean-and-green-philly-back-end", # Replace with actual Slack channel ID + text=error_message, + username="Backend Error Reporter", + ) diff --git a/data/src/script.py b/data/src/script.py index 32a525cb..0e85b692 100644 --- a/data/src/script.py +++ b/data/src/script.py @@ -1,8 +1,10 @@ import time +import traceback from classes.backup_archive_database import BackupArchiveDatabase from classes.diff_report import DiffReport -from config.config import FORCE_RELOAD, tiles_file_id_prefix +from classes.slack_error_reporter import send_error_to_slack +from config.config import BACKUP_SCHEMA, FORCE_RELOAD, tiles_file_id_prefix from config.psql import conn from data_utils.access_process import access_process from data_utils.city_owned_properties import city_owned_properties @@ -15,10 +17,10 @@ from data_utils.gun_crimes import gun_crimes from data_utils.imm_dang_buildings import imm_dang_buildings from data_utils.l_and_i import l_and_i -from data_utils.owner_type import owner_type from data_utils.nbhoods import nbhoods from data_utils.negligent_devs import negligent_devs from data_utils.opa_properties import opa_properties +from data_utils.owner_type import owner_type from data_utils.park_priority import park_priority from data_utils.phs_properties import phs_properties from data_utils.ppr_properties import ppr_properties @@ -29,10 +31,12 @@ from data_utils.unsafe_buildings import unsafe_buildings from data_utils.vacant_properties import vacant_properties + import traceback from classes.slack_error_reporter import send_error_to_slack + try: services = [ city_owned_properties, @@ -60,7 +64,7 @@ # backup sql schema if we are reloading data backup: BackupArchiveDatabase = None - if FORCE_RELOAD: + if FORCE_RELOAD and BACKUP_SCHEMA: # first archive any remaining backup that may exist from a previous run that errored backup = BackupArchiveDatabase() if backup.is_backup_schema_exists(): diff --git a/data/src/streetview.py b/data/src/streetview.py index cd35763b..384e6e51 100644 --- a/data/src/streetview.py +++ b/data/src/streetview.py @@ -9,6 +9,10 @@ # Configure Google bucket = google_cloud_bucket() +# when switching over to new_etl, this will need to be updated to use the new bucket manager +# bucket = google_cloud_bucket(require_write_access=True) +# if bucket is None: +# do a dry run or exit key = os.environ["CLEAN_GREEN_GOOGLE_KEY"] bucket_name = bucket.name diff --git a/data/src/test/test_slack_error_reporter.py b/data/src/test/test_slack_error_reporter.py index 4a30653b..9b79a323 100644 --- a/data/src/test/test_slack_error_reporter.py +++ b/data/src/test/test_slack_error_reporter.py @@ -6,8 +6,8 @@ sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/..") from classes.slack_error_reporter import ( - send_error_to_slack, -) # Ensure correct file import + send_error_to_slack, # Ensure correct file import +) class TestSlackNotifier(unittest.TestCase): @@ -15,7 +15,8 @@ class TestSlackNotifier(unittest.TestCase): "classes.slack_error_reporter.WebClient.chat_postMessage" ) # Correct patching @patch( - "classes.slack_error_reporter.os.getenv", return_value="mock_slack_token" + "classes.slack_error_reporter.os.getenv", + return_value="mock_slack_token", ) # Correct patching def test_send_error_to_slack(self, _mock_getenv, mock_slack_post): """Test that Slack error reporting is triggered correctly.""" @@ -23,7 +24,7 @@ def test_send_error_to_slack(self, _mock_getenv, mock_slack_post): error_message = "Test error message" # Call the Slack notification function - send_error_to_slack(error_message) + send_error_to_slack(error_message, slack_token="test_token") # Verify the Slack API call was made with the correct parameters mock_slack_post.assert_called_once_with( @@ -40,11 +41,6 @@ def test_send_error_to_slack(self, _mock_getenv, mock_slack_post): ) # Simulate missing Slack token def test_no_error_no_slack_message(self, _mock_getenv, mock_slack_post): """Test that Slack notification is not triggered if there's no error.""" - - # Call the Slack notification function (with no valid token) - with self.assertRaises(ValueError): - send_error_to_slack("Test error message") - # Ensure Slack's chat_postMessage was not called due to missing token mock_slack_post.assert_not_called()