1919
2020from typing import TYPE_CHECKING , ClassVar , NoReturn
2121
22+ import pandas as pd
23+ import pandas_gbq
2224from airbyte_api .models import DestinationBigquery
25+ from google .oauth2 .service_account import Credentials
2326
2427from airbyte ._processors .sql .bigquery import BigQueryConfig , BigQuerySqlProcessor
2528from airbyte .caches .base import (
2629 CacheBase ,
2730)
28- from airbyte .constants import DEFAULT_ARROW_MAX_CHUNK_SIZE
2931from airbyte .destinations ._translate_cache_to_dest import (
3032 bigquery_cache_to_destination_configuration ,
3133)
3234
3335
3436if TYPE_CHECKING :
37+ from collections .abc import Iterator
38+
3539 from airbyte .shared .sql_processor import SqlProcessorBase
3640
3741
@@ -48,21 +52,35 @@ def paired_destination_config(self) -> DestinationBigquery:
4852 """Return a dictionary of destination configuration values."""
4953 return bigquery_cache_to_destination_configuration (cache = self )
5054
51- def get_arrow_dataset (
55+ def _read_to_pandas_dataframe (
5256 self ,
53- stream_name : str ,
54- * ,
55- max_chunk_size : int = DEFAULT_ARROW_MAX_CHUNK_SIZE ,
56- ) -> NoReturn :
57- """Raises NotImplementedError; BigQuery doesn't support `pd.read_sql_table`.
58-
59- See: https://github.com/airbytehq/PyAirbyte/issues/165
60- """
61- raise NotImplementedError (
62- "BigQuery doesn't currently support to_arrow"
63- "Please consider using a different cache implementation for these functionalities."
57+ table_name : str ,
58+ chunksize : int | None = None ,
59+ ** kwargs ,
60+ ) -> pd .DataFrame | Iterator [pd .DataFrame ]:
61+ # Pop unused kwargs, maybe not the best way to do this
62+ kwargs .pop ("con" , None )
63+ kwargs .pop ("schema" , None )
64+
65+ # Read the table using pandas_gbq
66+ credentials = Credentials .from_service_account_file (self .credentials_path )
67+ result = pandas_gbq .read_gbq (
68+ f"{ self .project_name } .{ self .dataset_name } .{ table_name } " ,
69+ project_id = self .project_name ,
70+ credentials = credentials ,
71+ ** kwargs ,
6472 )
6573
74+ # Cast result to DataFrame if it's not already a DataFrame
75+ if not isinstance (result , pd .DataFrame ):
76+ result = pd .DataFrame (result )
77+
78+ # Return chunks as iterator if chunksize is provided
79+ if chunksize is not None :
80+ return (result [i : i + chunksize ] for i in range (0 , len (result ), chunksize ))
81+
82+ return result
83+
6684
6785# Expose the Cache class and also the Config class.
6886__all__ = [
0 commit comments