|
33 | 33 | from collections.abc import Callable |
34 | 34 | from contextlib import AbstractContextManager |
35 | 35 |
|
36 | | - from sqlspec.core import SQL, SQLResult |
| 36 | + from sqlspec.builder import QueryBuilder |
| 37 | + from sqlspec.core import SQL, SQLResult, Statement, StatementFilter |
| 38 | + from sqlspec.core.result import ArrowResult |
37 | 39 | from sqlspec.driver import SyncDataDictionaryBase |
| 40 | + from sqlspec.typing import StatementParameters |
38 | 41 |
|
39 | 42 | logger = logging.getLogger(__name__) |
40 | 43 |
|
@@ -758,3 +761,137 @@ def data_dictionary(self) -> "SyncDataDictionaryBase": |
758 | 761 |
|
759 | 762 | self._data_dictionary = BigQuerySyncDataDictionary() |
760 | 763 | return self._data_dictionary |
| 764 | + |
| 765 | + def _storage_api_available(self) -> bool: |
| 766 | + """Check if BigQuery Storage API is available. |
| 767 | +
|
| 768 | + Returns: |
| 769 | + True if Storage API is available and working, False otherwise |
| 770 | + """ |
| 771 | + try: |
| 772 | + from google.cloud import bigquery_storage_v1 # type: ignore[attr-defined] |
| 773 | + |
| 774 | + # Try to create client (will fail if API not enabled or credentials missing) |
| 775 | + _ = bigquery_storage_v1.BigQueryReadClient() |
| 776 | + except ImportError: |
| 777 | + # Package not installed |
| 778 | + return False |
| 779 | + except Exception: |
| 780 | + # API not enabled or permissions issue |
| 781 | + return False |
| 782 | + else: |
| 783 | + return True |
| 784 | + |
| 785 | + def select_to_arrow( |
| 786 | + self, |
| 787 | + statement: "Statement | QueryBuilder", |
| 788 | + /, |
| 789 | + *parameters: "StatementParameters | StatementFilter", |
| 790 | + statement_config: "StatementConfig | None" = None, |
| 791 | + return_format: str = "table", |
| 792 | + native_only: bool = False, |
| 793 | + batch_size: int | None = None, |
| 794 | + arrow_schema: Any = None, |
| 795 | + **kwargs: Any, |
| 796 | + ) -> "ArrowResult": |
| 797 | + """Execute query and return results as Apache Arrow (BigQuery native with Storage API). |
| 798 | +
|
| 799 | + BigQuery provides native Arrow via Storage API (query_job.to_arrow()). |
| 800 | + Requires google-cloud-bigquery-storage package and API enabled. |
| 801 | + Falls back to dict conversion if Storage API not available. |
| 802 | +
|
| 803 | + Args: |
| 804 | + statement: SQL statement, string, or QueryBuilder |
| 805 | + *parameters: Query parameters or filters |
| 806 | + statement_config: Optional statement configuration override |
| 807 | + return_format: "table" for pyarrow.Table (default), "batch" for RecordBatch |
| 808 | + native_only: If True, raise error if Storage API unavailable (default: False) |
| 809 | + batch_size: Batch size hint (for future streaming implementation) |
| 810 | + arrow_schema: Optional pyarrow.Schema for type casting |
| 811 | + **kwargs: Additional keyword arguments |
| 812 | +
|
| 813 | + Returns: |
| 814 | + ArrowResult with native Arrow data (if Storage API available) or converted data |
| 815 | +
|
| 816 | + Raises: |
| 817 | + MissingDependencyError: If pyarrow not installed, or if Storage API not available and native_only=True |
| 818 | + SQLExecutionError: If query execution fails |
| 819 | +
|
| 820 | + Example: |
| 821 | + >>> # Will use native Arrow if Storage API available, otherwise converts |
| 822 | + >>> result = driver.select_to_arrow( |
| 823 | + ... "SELECT * FROM dataset.users WHERE age > @age", |
| 824 | + ... {"age": 18}, |
| 825 | + ... ) |
| 826 | + >>> df = result.to_pandas() |
| 827 | +
|
| 828 | + >>> # Force native Arrow (raises if Storage API unavailable) |
| 829 | + >>> result = driver.select_to_arrow( |
| 830 | + ... "SELECT * FROM dataset.users", native_only=True |
| 831 | + ... ) |
| 832 | + """ |
| 833 | + from sqlspec.utils.module_loader import ensure_pyarrow |
| 834 | + |
| 835 | + ensure_pyarrow() |
| 836 | + |
| 837 | + # Check Storage API availability |
| 838 | + if not self._storage_api_available(): |
| 839 | + if native_only: |
| 840 | + from sqlspec.exceptions import MissingDependencyError |
| 841 | + |
| 842 | + msg = ( |
| 843 | + "BigQuery native Arrow requires Storage API.\n" |
| 844 | + "1. Install: pip install google-cloud-bigquery-storage\n" |
| 845 | + "2. Enable API: https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com\n" |
| 846 | + "3. Grant permissions: roles/bigquery.dataViewer" |
| 847 | + ) |
| 848 | + raise MissingDependencyError( |
| 849 | + package="google-cloud-bigquery-storage", install_package="google-cloud-bigquery-storage" |
| 850 | + ) from RuntimeError(msg) |
| 851 | + |
| 852 | + # Fallback to conversion path |
| 853 | + result: ArrowResult = super().select_to_arrow( |
| 854 | + statement, |
| 855 | + *parameters, |
| 856 | + statement_config=statement_config, |
| 857 | + return_format=return_format, |
| 858 | + native_only=native_only, |
| 859 | + batch_size=batch_size, |
| 860 | + arrow_schema=arrow_schema, |
| 861 | + **kwargs, |
| 862 | + ) |
| 863 | + return result |
| 864 | + |
| 865 | + # Use native path with Storage API |
| 866 | + import pyarrow as pa |
| 867 | + |
| 868 | + from sqlspec.core.result import create_arrow_result |
| 869 | + |
| 870 | + # Prepare statement |
| 871 | + config = statement_config or self.statement_config |
| 872 | + prepared_statement = self.prepare_statement(statement, parameters, statement_config=config, kwargs=kwargs) |
| 873 | + |
| 874 | + # Get compiled SQL and parameters |
| 875 | + sql, driver_params = self._get_compiled_sql(prepared_statement, config) |
| 876 | + |
| 877 | + # Execute query using existing _run_query_job method |
| 878 | + with self.handle_database_exceptions(): |
| 879 | + query_job = self._run_query_job(sql, driver_params) |
| 880 | + query_job.result() # Wait for completion |
| 881 | + |
| 882 | + # Native Arrow via Storage API |
| 883 | + arrow_table = query_job.to_arrow() |
| 884 | + |
| 885 | + # Apply schema casting if requested |
| 886 | + if arrow_schema is not None: |
| 887 | + arrow_table = arrow_table.cast(arrow_schema) |
| 888 | + |
| 889 | + # Convert to batch if requested |
| 890 | + if return_format == "batch": |
| 891 | + batches = arrow_table.to_batches() |
| 892 | + arrow_data: Any = batches[0] if batches else pa.RecordBatch.from_pydict({}) |
| 893 | + else: |
| 894 | + arrow_data = arrow_table |
| 895 | + |
| 896 | + # Create ArrowResult |
| 897 | + return create_arrow_result(statement=prepared_statement, data=arrow_data, rows_affected=arrow_data.num_rows) |
0 commit comments