|
1 | | - * WHAT: Inspect select_sql_from_dict or pd.read_sql usage |
2 | | - * WHY: Redshift or S3 → DataFrame conversion |
3 | | - * Migration Planning: Equivalent logic would move into |
4 | | - prebuilt_bigquery_components.py using BigQuery query components. |
| 1 | + #* WHAT: Inspect select_sql_from_dict or pd.read_sql usage |
| 2 | + #* WHY: Redshift or S3 → DataFrame conversion |
| 3 | + #* Migration Planning: Equivalent logic would move into |
| 4 | + # prebuilt_bigquery_components.py using BigQuery query components. |
| 5 | + |
| 6 | + |
| 7 | +# Imports |
| 8 | + |
| 9 | +# Component Decorators |
| 10 | +# Consider bigquery_query_job_op ((Google Managed Prebuilt Component that does not require the @component decorator)) |
| 11 | +@component( |
| 12 | + base_image="python:<placeholder>", |
| 13 | + packages_to_install=["placeholder for packages"] |
| 14 | +) |
| 15 | +def _read_from_redshift(sql_client, sql: str, params: dict = None, chunksize: Optional[int] = None) -> pd.DataFrame: |
| 16 | + """ |
| 17 | + Exploration helper to read data from Redshift using available SQL access object. |
| 18 | +
|
| 19 | + WHERE: _read_from_redshift ((Place holder for reading from BigQuery)) |
| 20 | + WHAT: example patterns using sql_client.select_sql_from_dict or pandas.read_sql |
| 21 | + WHY: Redshift is columnar and can be expensive to pull; record trade-offs and auth considerations |
| 22 | + """ |
| 23 | + try: |
| 24 | + if hasattr(sql_client, "select_sql_from_dict"): |
| 25 | + q = {"sql": sql, "params": params or {}} |
| 26 | + df = sql_client.select_sql_from_dict(q) |
| 27 | + else: |
| 28 | + df = pd.read_sql(sql, sql_client.conn, params=params) |
| 29 | + except Exception: |
| 30 | + LOG.exception("Redshift read failed; returning empty DataFrame for lab fallback") |
| 31 | + df = pd.DataFrame() |
| 32 | + return df |
0 commit comments