69
69
import bigframes .exceptions as bfe
70
70
import bigframes .operations as ops
71
71
import bigframes .operations .aggregations as agg_ops
72
- from bigframes .session import dry_runs
72
+ from bigframes .session import dry_runs , execution_spec
73
73
from bigframes .session import executor as executors
74
74
75
75
# Type constraint for wherever column labels are used
@@ -257,7 +257,10 @@ def shape(self) -> typing.Tuple[int, int]:
257
257
except Exception :
258
258
pass
259
259
260
- row_count = self .session ._executor .execute (self .expr .row_count ()).to_py_scalar ()
260
+ row_count = self .session ._executor .execute (
261
+ self .expr .row_count (),
262
+ execution_spec .ExecutionSpec (promise_under_10gb = True , ordered = False ),
263
+ ).to_py_scalar ()
261
264
return (row_count , len (self .value_columns ))
262
265
263
266
@property
@@ -557,8 +560,17 @@ def to_arrow(
557
560
allow_large_results : Optional [bool ] = None ,
558
561
) -> Tuple [pa .Table , Optional [bigquery .QueryJob ]]:
559
562
"""Run query and download results as a pyarrow Table."""
563
+ under_10gb = (
564
+ (not allow_large_results )
565
+ if (allow_large_results is not None )
566
+ else not bigframes .options ._allow_large_results
567
+ )
560
568
execute_result = self .session ._executor .execute (
561
- self .expr , ordered = ordered , use_explicit_destination = allow_large_results
569
+ self .expr ,
570
+ execution_spec .ExecutionSpec (
571
+ promise_under_10gb = under_10gb ,
572
+ ordered = ordered ,
573
+ ),
562
574
)
563
575
pa_table = execute_result .to_arrow_table ()
564
576
@@ -647,8 +659,15 @@ def try_peek(
647
659
self , n : int = 20 , force : bool = False , allow_large_results = None
648
660
) -> typing .Optional [pd .DataFrame ]:
649
661
if force or self .expr .supports_fast_peek :
650
- result = self .session ._executor .peek (
651
- self .expr , n , use_explicit_destination = allow_large_results
662
+ # really, we should just block insane peek values and always assume <10gb
663
+ under_10gb = (
664
+ (not allow_large_results )
665
+ if (allow_large_results is not None )
666
+ else not bigframes .options ._allow_large_results
667
+ )
668
+ result = self .session ._executor .execute (
669
+ self .expr ,
670
+ execution_spec .ExecutionSpec (promise_under_10gb = under_10gb , peek = n ),
652
671
)
653
672
df = result .to_pandas ()
654
673
return self ._copy_index_to_pandas (df )
@@ -665,10 +684,18 @@ def to_pandas_batches(
665
684
666
685
page_size and max_results determine the size and number of batches,
667
686
see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result"""
687
+
688
+ under_10gb = (
689
+ (not allow_large_results )
690
+ if (allow_large_results is not None )
691
+ else not bigframes .options ._allow_large_results
692
+ )
668
693
execute_result = self .session ._executor .execute (
669
694
self .expr ,
670
- ordered = True ,
671
- use_explicit_destination = allow_large_results ,
695
+ execution_spec .ExecutionSpec (
696
+ promise_under_10gb = under_10gb ,
697
+ ordered = True ,
698
+ ),
672
699
)
673
700
674
701
# To reduce the number of edge cases to consider when working with the
@@ -714,10 +741,17 @@ def _materialize_local(
714
741
) -> Tuple [pd .DataFrame , Optional [bigquery .QueryJob ]]:
715
742
"""Run query and download results as a pandas DataFrame. Return the total number of results as well."""
716
743
# TODO(swast): Allow for dry run and timeout.
744
+ under_10gb = (
745
+ (not materialize_options .allow_large_results )
746
+ if (materialize_options .allow_large_results is not None )
747
+ else (not bigframes .options ._allow_large_results )
748
+ )
717
749
execute_result = self .session ._executor .execute (
718
750
self .expr ,
719
- ordered = materialize_options .ordered ,
720
- use_explicit_destination = materialize_options .allow_large_results ,
751
+ execution_spec .ExecutionSpec (
752
+ promise_under_10gb = under_10gb ,
753
+ ordered = materialize_options .ordered ,
754
+ ),
721
755
)
722
756
sample_config = materialize_options .downsampling
723
757
if execute_result .total_bytes is not None :
@@ -1598,9 +1632,19 @@ def retrieve_repr_request_results(
1598
1632
config = executors .CacheConfig (optimize_for = "head" , if_cached = "reuse-strict" ),
1599
1633
)
1600
1634
head_result = self .session ._executor .execute (
1601
- self .expr .slice (start = None , stop = max_results , step = None )
1635
+ self .expr .slice (start = None , stop = max_results , step = None ),
1636
+ execution_spec .ExecutionSpec (
1637
+ promise_under_10gb = True ,
1638
+ ordered = True ,
1639
+ ),
1602
1640
)
1603
- row_count = self .session ._executor .execute (self .expr .row_count ()).to_py_scalar ()
1641
+ row_count = self .session ._executor .execute (
1642
+ self .expr .row_count (),
1643
+ execution_spec .ExecutionSpec (
1644
+ promise_under_10gb = True ,
1645
+ ordered = False ,
1646
+ ),
1647
+ ).to_py_scalar ()
1604
1648
1605
1649
head_df = head_result .to_pandas ()
1606
1650
return self ._copy_index_to_pandas (head_df ), row_count , head_result .query_job
0 commit comments