Skip to content

Commit 2b0f0fa

Browse files
fix: Fix issue with iterating on >10gb dataframes (#949)
Co-authored-by: Tim Sweña (Swast) <[email protected]>
1 parent 1291110 commit 2b0f0fa

File tree

3 files changed

+12
-1
lines changed

3 files changed

+12
-1
lines changed

bigframes/core/blocks.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,9 @@ def to_pandas_batches(
577577
see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result"""
578578
dtypes = dict(zip(self.index_columns, self.index.dtypes))
579579
dtypes.update(zip(self.value_columns, self.dtypes))
580-
_, query_job = self.session._execute(self.expr, ordered=True)
580+
_, query_job = self.session._executor.execute(
581+
self.expr, ordered=True, use_explicit_destination=True
582+
)
581583
results_iterator = query_job.result(
582584
page_size=page_size, max_results=max_results
583585
)

bigframes/session/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1324,6 +1324,7 @@ def _execute(
13241324
*,
13251325
ordered: bool = True,
13261326
col_id_overrides: Mapping[str, str] = {},
1327+
use_explicit_destination: bool = False,
13271328
) -> tuple[bigquery.table.RowIterator, bigquery.QueryJob]:
13281329
return self._executor.execute(
13291330
array_value,

bigframes/session/executor.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def execute(
102102
*,
103103
ordered: bool = True,
104104
col_id_overrides: Mapping[str, str] = {},
105+
use_explicit_destination: bool = False,
105106
):
106107
"""
107108
Execute the ArrayValue, storing the result to a temporary session-owned table.
@@ -113,6 +114,13 @@ def execute(
113114
array_value, ordered=ordered, col_id_overrides=col_id_overrides
114115
)
115116
job_config = bigquery.QueryJobConfig()
117+
# Use explicit destination to avoid 10GB limit of temporary table
118+
if use_explicit_destination:
119+
schema = array_value.schema.to_bigquery()
120+
destination_table = self.storage_manager.create_temp_table(
121+
schema, cluster_cols=[]
122+
)
123+
job_config.destination = destination_table
116124
# TODO(swast): plumb through the api_name of the user-facing api that
117125
# caused this query.
118126
return self._run_execute_query(

0 commit comments

Comments
 (0)