Refactor model_input_ingest.py with Redshift helper

david3651 · web-flow · commit c2bb5035987e · 2025-12-09T11:42:05.000-05:00
Updated comments and added a new function to read data from Redshift.
diff --git a/src/components/model_input_ingest.py b/src/components/model_input_ingest.py
@@ -1,4 +1,32 @@
-     * WHAT: Inspect select_sql_from_dict or pd.read_sql usage
-     * WHY: Redshift or S3 → DataFrame conversion
-     * Migration Planning: Equivalent logic would move into
-       prebuilt_bigquery_components.py using BigQuery query components.
+     #* WHAT: Inspect select_sql_from_dict or pd.read_sql usage
+     #* WHY: Redshift or S3 → DataFrame conversion
+     #* Migration Planning: Equivalent logic would move into
+      # prebuilt_bigquery_components.py using BigQuery query components.
+
+
+# Imports
+
+# Component Decorators
+# Consider bigquery_query_job_op ((Google Managed Prebuilt Component that does not require the @component decorator))
+@component(
+    base_image="python:<placeholder>",
+    packages_to_install=["placeholder for packages"]
+)
+def _read_from_redshift(sql_client, sql: str, params: dict = None, chunksize: Optional[int] = None) -> pd.DataFrame:
+    """
+    Exploration helper to read data from Redshift using available SQL access object.
+
+    WHERE: _read_from_redshift ((Place holder for reading from BigQuery))
+    WHAT: example patterns using sql_client.select_sql_from_dict or pandas.read_sql
+    WHY: Redshift is columnar and can be expensive to pull; record trade-offs and auth considerations
+    """
+    try:
+        if hasattr(sql_client, "select_sql_from_dict"):
+            q = {"sql": sql, "params": params or {}}
+            df = sql_client.select_sql_from_dict(q)
+        else:
+            df = pd.read_sql(sql, sql_client.conn, params=params)
+    except Exception:
+        LOG.exception("Redshift read failed; returning empty DataFrame for lab fallback")
+        df = pd.DataFrame()
+    return df