CloudLearningSolution · subl8115 · Dec 16, 2025 · Dec 9, 2025 · Dec 9, 2025 · Dec 9, 2025
diff --git a/src/components/custom_registry_components.py b/src/components/custom_registry_components.py
@@ -21,3 +21,6 @@
         env_prefix="[PROD]"
     )
 """
+     * WHAT: Inspect boto3.upload_file usage
+     * WHY: Durable storage pattern in AWS
+     * Migration Planning: Replace with GCS client logic inside a
diff --git a/src/components/custom_training_components.py b/src/components/custom_training_components.py
@@ -1,24 +1,2 @@
-"""
-Custom Training Components
-=========================
-Custom-built components for model training.
-
-Author: MLOps Team
-Maintained by: ML Engineering Team
-
-Components:
-- train_model_op: Train logistic regression model from BigQuery data
-
-Usage:
-    from components.custom_training_components import train_model_op
-
-    train_task = train_model_op(
-        train_data=bq_train_task.outputs["destination_table"],
-        feature_columns=['Age', 'BMI', 'PlasmaGlucose'],  # <-- Passed as parameter
-        target_column='Diabetic',
-        reg_rate=0.05,
-        project_id=project_id,
-        bq_location=region,
-        env_prefix="[DEV]"
-    )
-"""
+     #* WHAT: Identify boto3/joblib/ds_utils imports
+     #* WHY: These libraries enable artifact persistence in S3
diff --git a/src/components/model_input_ingest.py b/src/components/model_input_ingest.py
@@ -0,0 +1,39 @@
+     #* WHAT: Inspect select_sql_from_dict or pd.read_sql usage
+     #* WHY: Redshift or S3 → DataFrame conversion
+     #* Migration Planning: Equivalent logic would move into
+      # prebuilt_bigquery_components.py using BigQuery query components.
+
+
+# Imports
+
+# Component Decorators
+# Consider bigquery_query_job_op ((Google Managed Prebuilt Component that does not require the @component decorator))
+@component(
+    base_image="python:<placeholder>",
+    packages_to_install=["placeholder for packages"]
+)
+def _read_from_redshift(sql_client, sql: str, params: dict = None, chunksize: Optional[int] = None) -> pd.DataFrame:
+    """
+    Exploration helper to read data from Redshift using available SQL access object.
+
+    WHERE: _read_from_redshift ((Place holder for reading from BigQuery))
+    WHAT: example patterns using sql_client.select_sql_from_dict or pandas.read_sql
+    WHY: Redshift is columnar and can be expensive to pull; record trade-offs and auth considerations
+    """
+    try:
+        if hasattr(sql_client, "select_sql_from_dict"):
+            q = {"sql": sql, "params": params or {}}
+            df = sql_client.select_sql_from_dict(q)
+        else:
+            df = pd.read_sql(sql, sql_client.conn, params=params)
+    except Exception:
+        LOG.exception("Redshift read failed; returning empty DataFrame for lab fallback")
+        df = pd.DataFrame()
+    return df
+
+# Intermediate solution. Considering Prebuilt or Custom components
+     #* WHERE: stage_table_to_s3() in ingest_model.py
+     #* WHAT: Inspect UNLOAD vs client-side upload patterns
+     #* WHY: Efficiency vs cost trade-offs in Redshift
+     #* Migration Planning: Replace with BigQuery export jobs inside
+     #  prebuilt_bigquery_components.py or custom_data_quality_components.py.