googleapis · tswast · Jul 24, 2025 · Jul 24, 2025 · Jul 24, 2025 · Jul 24, 2025
@@ -184,6 +184,7 @@ def read_gbq(  # type: ignore[overload-overlap]
     use_cache: Optional[bool] = ...,
     col_order: Iterable[str] = ...,
     dry_run: Literal[False] = ...,
+    allow_large_results: bool = ...,
 ) -> bigframes.dataframe.DataFrame:
     ...
 
@@ -200,6 +201,7 @@ def read_gbq(
     use_cache: Optional[bool] = ...,
     col_order: Iterable[str] = ...,
     dry_run: Literal[True] = ...,
+    allow_large_results: bool = ...,
 ) -> pandas.Series:
     ...
 
@@ -215,6 +217,7 @@ def read_gbq(
     use_cache: Optional[bool] = None,
     col_order: Iterable[str] = (),
     dry_run: bool = False,
+    allow_large_results: bool = True,
 ) -> bigframes.dataframe.DataFrame | pandas.Series:
     _set_default_session_location_if_possible(query_or_table)
     return global_session.with_default_session(
@@ -228,6 +231,7 @@ def read_gbq(
         use_cache=use_cache,
         col_order=col_order,
         dry_run=dry_run,
+        allow_large_results=allow_large_results,
     )
 
 
@@ -391,6 +395,7 @@ def read_gbq_query(  # type: ignore[overload-overlap]
     col_order: Iterable[str] = ...,
     filters: vendored_pandas_gbq.FiltersType = ...,
     dry_run: Literal[False] = ...,
+    allow_large_results: bool = ...,
 ) -> bigframes.dataframe.DataFrame:
     ...
 
@@ -407,6 +412,7 @@ def read_gbq_query(
     col_order: Iterable[str] = ...,
     filters: vendored_pandas_gbq.FiltersType = ...,
     dry_run: Literal[True] = ...,
+    allow_large_results: bool = ...,
 ) -> pandas.Series:
     ...
 
@@ -422,6 +428,7 @@ def read_gbq_query(
     col_order: Iterable[str] = (),
     filters: vendored_pandas_gbq.FiltersType = (),
     dry_run: bool = False,
+    allow_large_results: bool = True,
 ) -> bigframes.dataframe.DataFrame | pandas.Series:
     _set_default_session_location_if_possible(query)
     return global_session.with_default_session(
@@ -435,6 +442,7 @@ def read_gbq_query(
         col_order=col_order,
         filters=filters,
         dry_run=dry_run,
+        allow_large_results=allow_large_results,
     )
 
 

@@ -394,6 +394,7 @@ def read_gbq(  # type: ignore[overload-overlap]
         use_cache: Optional[bool] = ...,
         col_order: Iterable[str] = ...,
         dry_run: Literal[False] = ...,
+        allow_large_results: bool = ...,
     ) -> dataframe.DataFrame:
         ...
 
@@ -410,6 +411,7 @@ def read_gbq(
         use_cache: Optional[bool] = ...,
         col_order: Iterable[str] = ...,
         dry_run: Literal[True] = ...,
+        allow_large_results: bool = ...,
     ) -> pandas.Series:
         ...
 
@@ -424,8 +426,8 @@ def read_gbq(
         filters: third_party_pandas_gbq.FiltersType = (),
         use_cache: Optional[bool] = None,
         col_order: Iterable[str] = (),
-        dry_run: bool = False
-        # Add a verify index argument that fails if the index is not unique.
+        dry_run: bool = False,
+        allow_large_results: bool = True,
     ) -> dataframe.DataFrame | pandas.Series:
         # TODO(b/281571214): Generate prompt to show the progress of read_gbq.
         if columns and col_order:
@@ -445,6 +447,7 @@ def read_gbq(
                 use_cache=use_cache,
                 filters=filters,
                 dry_run=dry_run,
+                allow_large_results=allow_large_results,
             )
         else:
             if configuration is not None:
@@ -551,6 +554,7 @@ def read_gbq_query(  # type: ignore[overload-overlap]
         col_order: Iterable[str] = ...,
         filters: third_party_pandas_gbq.FiltersType = ...,
         dry_run: Literal[False] = ...,
+        allow_large_results: bool = ...,
     ) -> dataframe.DataFrame:
         ...
 
@@ -567,6 +571,7 @@ def read_gbq_query(
         col_order: Iterable[str] = ...,
         filters: third_party_pandas_gbq.FiltersType = ...,
         dry_run: Literal[True] = ...,
+        allow_large_results: bool = ...,
     ) -> pandas.Series:
         ...
 
@@ -582,6 +587,7 @@ def read_gbq_query(
         col_order: Iterable[str] = (),
         filters: third_party_pandas_gbq.FiltersType = (),
         dry_run: bool = False,
+        allow_large_results: bool = True,
     ) -> dataframe.DataFrame | pandas.Series:
         """Turn a SQL query into a DataFrame.
 
@@ -631,9 +637,48 @@ def read_gbq_query(
 
         See also: :meth:`Session.read_gbq`.
 
+        Args:
+            query (str):
+                A SQL query to execute.
+            index_col (Iterable[str] or str, optional):
+                The column(s) to use as the index for the DataFrame. This can be
+                a single column name or a list of column names. If not provided,
+                a default index will be used.
+            columns (Iterable[str], optional):
+                The columns to read from the query result. If not
+                specified, all columns will be read.
+            configuration (dict, optional):
+                A dictionary of query job configuration options. See the
+                BigQuery REST API documentation for a list of available options:
+                https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query
+            max_results (int, optional):
+                The maximum number of rows to retrieve from the query
+                result. If not specified, all rows will be loaded.
+            use_cache (bool, optional):
+                Whether to use cached results for the query. Defaults to ``True``.
+                Setting this to ``False`` will force a re-execution of the query.
+            col_order (Iterable[str], optional):
+                The desired order of columns in the resulting DataFrame. This
+                parameter is deprecated and will be removed in a future version.
+                Use ``columns`` instead.
+            filters (list[tuple], optional):
+                A list of filters to apply to the data. Filters are specified
+                as a list of tuples, where each tuple contains a column name,
+                an operator (e.g., '==', '!='), and a value.
+            dry_run (bool, optional):
+                If ``True``, the function will not actually execute the query but
+                will instead return statistics about the query. Defaults to
+                ``False``.
+            allow_large_results (bool, optional):
+                Whether to allow large query results. If ``True``, the query
+                results can be larger than the maximum response size.
+                Defaults to ``True``.
+
         Returns:
-            bigframes.pandas.DataFrame:
-                A DataFrame representing results of the query or table.
+            bigframes.pandas.DataFrame or pandas.Series:
+                A DataFrame representing the result of the query. If ``dry_run``
+                is ``True``, a ``pandas.Series`` containing query statistics is
+                returned.
 
         Raises:
             ValueError:
@@ -657,6 +702,7 @@ def read_gbq_query(
             use_cache=use_cache,
             filters=filters,
             dry_run=dry_run,
+            allow_large_results=allow_large_results,
         )
 
     @overload
@@ -714,9 +760,40 @@ def read_gbq_table(
 
         See also: :meth:`Session.read_gbq`.
 
+        Args:
+            table_id (str):
+                The identifier of the BigQuery table to read.
+            index_col (Iterable[str] or str, optional):
+                The column(s) to use as the index for the DataFrame. This can be
+                a single column name or a list of column names. If not provided,
+                a default index will be used.
+            columns (Iterable[str], optional):
+                The columns to read from the table. If not specified, all
+                columns will be read.
+            max_results (int, optional):
+                The maximum number of rows to retrieve from the table. If not
+                specified, all rows will be loaded.
+            filters (list[tuple], optional):
+                A list of filters to apply to the data. Filters are specified
+                as a list of tuples, where each tuple contains a column name,
+                an operator (e.g., '==', '!='), and a value.
+            use_cache (bool, optional):
+                Whether to use cached results for the query. Defaults to ``True``.
+                Setting this to ``False`` will force a re-execution of the query.
+            col_order (Iterable[str], optional):
+                The desired order of columns in the resulting DataFrame. This
+                parameter is deprecated and will be removed in a future version.
+                Use ``columns`` instead.
+            dry_run (bool, optional):
+                If ``True``, the function will not actually execute the query but
+                will instead return statistics about the table. Defaults to
+                ``False``.
+
         Returns:
-            bigframes.pandas.DataFrame:
-                A DataFrame representing results of the query or table.
+            bigframes.pandas.DataFrame or pandas.Series:
+                A DataFrame representing the contents of the table. If
+                ``dry_run`` is ``True``, a ``pandas.Series`` containing table
+                statistics is returned.
 
         Raises:
             ValueError:

@@ -33,6 +33,7 @@
 import pytest
 
 import bigframes
+import bigframes.core.nodes as nodes
 import bigframes.dataframe
 import bigframes.dtypes
 import bigframes.ml.linear_model
@@ -640,6 +641,37 @@ def test_read_gbq_with_configuration(
     assert df.shape == (9, 3)
 
 
+def test_read_gbq_query_w_allow_large_results(session: bigframes.Session):
+    if not hasattr(session.bqclient, "default_job_creation_mode"):
+        pytest.skip("Jobless query only available on newer google-cloud-bigquery.")
+
+    query = "SELECT 1"
+
+    # Make sure we don't get a cached table.
+    configuration = {"query": {"useQueryCache": False}}
+
+    # Very small results should wrap a local node.
+    df_false = session.read_gbq(
+        query,
+        configuration=configuration,
+        allow_large_results=False,
+    )
+    assert df_false.shape == (1, 1)
+    roots_false = df_false._get_block().expr.node.roots
+    assert any(isinstance(node, nodes.ReadLocalNode) for node in roots_false)
+    assert not any(isinstance(node, nodes.ReadTableNode) for node in roots_false)
+
+    # Large results allowed should wrap a table.
+    df_true = session.read_gbq(
+        query,
+        configuration=configuration,
+        allow_large_results=True,
+    )
+    assert df_true.shape == (1, 1)
+    roots_true = df_true._get_block().expr.node.roots
+    assert any(isinstance(node, nodes.ReadTableNode) for node in roots_true)
+
+
 def test_read_gbq_with_custom_global_labels(
     session: bigframes.Session, scalars_table_id: str
 ):

@@ -25,6 +25,7 @@ def read_gbq(
         filters: FiltersType = (),
         use_cache: Optional[bool] = None,
         col_order: Iterable[str] = (),
+        allow_large_results: bool = True,
     ):
         """Loads a DataFrame from BigQuery.
 
@@ -156,6 +157,11 @@ def read_gbq(
                 `configuration` to avoid conflicts.
             col_order (Iterable[str]):
                 Alias for columns, retained for backwards compatibility.
+            allow_large_results (bool, optional):
+                Whether to allow large query results. If ``True``, the query
+                results can be larger than the maximum response size. This
+                option is only applicable when ``query_or_table`` is a query.
+                Defaults to ``True``.
 
         Raises:
             bigframes.exceptions.DefaultIndexWarning: