File tree Expand file tree Collapse file tree 3 files changed +14
-17
lines changed
tests/benchmark/read_gbq_colab Expand file tree Collapse file tree 3 files changed +14
-17
lines changed Original file line number Diff line number Diff line change 4242 17486432.0 ,
4343 1919625975.0 ,
4444 ],
45- "num_materialized_or_scanned_rows" : [
46- 0.0 ,
47- 6.0 ,
48- 100.0 ,
49- 4955.0 ,
50- 23108.0 ,
51- 139504.0 ,
52- 616341.0 ,
53- 3855698.0 ,
54- 83725698.0 ,
55- 5991998082.0 ,
56- ],
5745 "avg_row_bytes" : [
5846 0.00014346299635435792 ,
5947 0.005370969708923197 ,
@@ -524,10 +512,11 @@ def main():
524512 for i in range (num_percentiles ):
525513 percentile = TABLE_STATS ["percentile" ][i ]
526514 avg_row_bytes_raw = TABLE_STATS ["avg_row_bytes" ][i ]
527- num_rows_raw = TABLE_STATS ["num_materialized_or_scanned_rows " ][i ]
515+ table_bytes_raw = TABLE_STATS ["materialized_or_scanned_bytes " ][i ]
528516
517+ target_table_bytes = max (1 , int (math .ceil (table_bytes_raw )))
529518 target_row_bytes = max (1 , int (math .ceil (avg_row_bytes_raw )))
530- num_rows = max (1 , int (math .ceil (num_rows_raw )))
519+ num_rows = max (1 , int (math .ceil (target_table_bytes / target_row_bytes )))
531520
532521 table_name = f"percentile_{ percentile :02d} "
533522 print (f"\n --- Processing Table: { table_name } ---" )
Original file line number Diff line number Diff line change @@ -44,7 +44,7 @@ def aggregate_output(
4444 df_aggregated = (
4545 df .assign (rounded = df [group_column ].astype ("Int64" ).round (- 9 ))
4646 .groupby ("rounded" )
47- .sum ()
47+ .sum (numeric_only = True )
4848 )
4949
5050 df_aggregated .shape
Original file line number Diff line number Diff line change 1414import pathlib
1515
1616import benchmark .utils as utils
17+ import pytest
1718
1819import bigframes .session
1920
@@ -35,8 +36,15 @@ def filter_output(
3536
3637 # Simulate the user filtering by a column and visualizing those results
3738 df_filtered = df [df ["col_bool_0" ]]
38- df_filtered .shape
39- next (iter (df_filtered .to_pandas_batches (page_size = PAGE_SIZE )))
39+ rows , _ = df_filtered .shape
40+
41+ # It's possible we don't have any pages at all, since we filtered out all
42+ # matching rows.
43+ if rows == 0 :
44+ with pytest .raises (StopIteration ):
45+ next (iter (df_filtered .to_pandas_batches (page_size = PAGE_SIZE )))
46+ else :
47+ next (iter (df_filtered .to_pandas_batches (page_size = PAGE_SIZE )))
4048
4149
4250if __name__ == "__main__" :
You can’t perform that action at this time.
0 commit comments