35
35
import bigframes .core as core
36
36
import bigframes .core .guid as guid
37
37
import bigframes .core .indexes as indexes
38
- import bigframes .core .joins as joins
39
38
import bigframes .core .joins .name_resolution as join_names
40
39
import bigframes .core .ordering as ordering
41
40
import bigframes .core .utils
@@ -378,7 +377,7 @@ def _to_dataframe(self, result) -> pd.DataFrame:
378
377
"""Convert BigQuery data to pandas DataFrame with specific dtypes."""
379
378
dtypes = dict (zip (self .index_columns , self .index_dtypes ))
380
379
dtypes .update (zip (self .value_columns , self .dtypes ))
381
- return self ._expr ._session ._rows_to_dataframe (result , dtypes )
380
+ return self ._expr .session ._rows_to_dataframe (result , dtypes )
382
381
383
382
def to_pandas (
384
383
self ,
@@ -422,7 +421,7 @@ def to_pandas_batches(self):
422
421
dtypes .update (zip (self .value_columns , self .dtypes ))
423
422
results_iterator , _ = self ._expr .start_query ()
424
423
for arrow_table in results_iterator .to_arrow_iterable (
425
- bqstorage_client = self ._expr ._session .bqstoragereadclient
424
+ bqstorage_client = self ._expr .session .bqstoragereadclient
426
425
):
427
426
df = bigframes .session ._io .pandas .arrow_to_pandas (arrow_table , dtypes )
428
427
self ._copy_index_to_pandas (df )
@@ -454,7 +453,9 @@ def _compute_and_count(
454
453
455
454
results_iterator , query_job = expr .start_query (max_results = max_results )
456
455
457
- table_size = expr ._get_table_size (query_job .destination ) / _BYTES_TO_MEGABYTES
456
+ table_size = (
457
+ expr .session ._get_table_size (query_job .destination ) / _BYTES_TO_MEGABYTES
458
+ )
458
459
fraction = (
459
460
max_download_size / table_size
460
461
if (max_download_size is not None ) and (table_size != 0 )
@@ -819,7 +820,9 @@ def aggregate_all_and_stack(
819
820
axis : int | str = 0 ,
820
821
value_col_id : str = "values" ,
821
822
dropna : bool = True ,
822
- dtype = pd .Float64Dtype (),
823
+ dtype : typing .Union [
824
+ bigframes .dtypes .Dtype , typing .Tuple [bigframes .dtypes .Dtype , ...]
825
+ ] = pd .Float64Dtype (),
823
826
) -> Block :
824
827
axis_n = utils .get_axis_number (axis )
825
828
if axis_n == 0 :
@@ -829,7 +832,7 @@ def aggregate_all_and_stack(
829
832
result_expr = self .expr .aggregate (aggregations , dropna = dropna ).unpivot (
830
833
row_labels = self .column_labels .to_list (),
831
834
index_col_ids = ["index" ],
832
- unpivot_columns = [(value_col_id , self .value_columns )] ,
835
+ unpivot_columns = tuple ( [(value_col_id , tuple ( self .value_columns ))]) ,
833
836
dtype = dtype ,
834
837
)
835
838
return Block (result_expr , index_columns = ["index" ], column_labels = [None ])
@@ -841,7 +844,7 @@ def aggregate_all_and_stack(
841
844
stacked_expr = expr_with_offsets .unpivot (
842
845
row_labels = self .column_labels .to_list (),
843
846
index_col_ids = [guid .generate_guid ()],
844
- unpivot_columns = [(value_col_id , self .value_columns )],
847
+ unpivot_columns = [(value_col_id , tuple ( self .value_columns ) )],
845
848
passthrough_columns = [* self .index_columns , offset_col ],
846
849
dtype = dtype ,
847
850
)
@@ -1029,13 +1032,13 @@ def summarize(
1029
1032
for col_id in column_ids
1030
1033
]
1031
1034
columns = [
1032
- (col_id , [ f"{ col_id } -{ stat .name } " for stat in stats ] )
1035
+ (col_id , tuple ( f"{ col_id } -{ stat .name } " for stat in stats ) )
1033
1036
for col_id in column_ids
1034
1037
]
1035
1038
expr = self .expr .aggregate (aggregations ).unpivot (
1036
1039
labels ,
1037
- unpivot_columns = columns ,
1038
- index_col_ids = [label_col_id ],
1040
+ unpivot_columns = tuple ( columns ) ,
1041
+ index_col_ids = tuple ( [label_col_id ]) ,
1039
1042
)
1040
1043
labels = self ._get_labels_for_columns (column_ids )
1041
1044
return Block (expr , column_labels = labels , index_columns = [label_col_id ])
@@ -1342,7 +1345,7 @@ def stack(self, how="left", levels: int = 1):
1342
1345
passthrough_columns = self .index_columns ,
1343
1346
unpivot_columns = unpivot_columns ,
1344
1347
index_col_ids = added_index_columns ,
1345
- dtype = dtypes ,
1348
+ dtype = tuple ( dtypes ) ,
1346
1349
how = how ,
1347
1350
)
1348
1351
new_index_level_names = self .column_labels .names [- levels :]
@@ -1382,7 +1385,7 @@ def _create_stack_column(
1382
1385
dtype = self ._column_type (input_id )
1383
1386
input_columns .append (input_id )
1384
1387
# Input column i is the first one that
1385
- return input_columns , dtype or pd .Float64Dtype ()
1388
+ return tuple ( input_columns ) , dtype or pd .Float64Dtype ()
1386
1389
1387
1390
def _column_type (self , col_id : str ) -> bigframes .dtypes .Dtype :
1388
1391
col_offset = self .value_columns .index (col_id )
@@ -1497,8 +1500,7 @@ def merge(
1497
1500
sort : bool ,
1498
1501
suffixes : tuple [str , str ] = ("_x" , "_y" ),
1499
1502
) -> Block :
1500
- joined_expr = joins .join_by_column (
1501
- self .expr ,
1503
+ joined_expr = self .expr .join (
1502
1504
left_join_ids ,
1503
1505
other .expr ,
1504
1506
right_join_ids ,
@@ -1708,7 +1710,7 @@ def _is_monotonic(
1708
1710
return result
1709
1711
1710
1712
1711
- def block_from_local (data , session = None ) -> Block :
1713
+ def block_from_local (data ) -> Block :
1712
1714
pd_data = pd .DataFrame (data )
1713
1715
columns = pd_data .columns
1714
1716
@@ -1730,7 +1732,7 @@ def block_from_local(data, session=None) -> Block:
1730
1732
)
1731
1733
index_ids = pd_data .columns [: len (index_labels )]
1732
1734
1733
- keys_expr = core .ArrayValue .mem_expr_from_pandas (pd_data , session )
1735
+ keys_expr = core .ArrayValue .from_pandas (pd_data )
1734
1736
return Block (
1735
1737
keys_expr ,
1736
1738
column_labels = columns ,
0 commit comments