File tree Expand file tree Collapse file tree 3 files changed +14
-17
lines changed
tests/benchmark/read_gbq_colab Expand file tree Collapse file tree 3 files changed +14
-17
lines changed Original file line number Diff line number Diff line change 42
42
17486432.0 ,
43
43
1919625975.0 ,
44
44
],
45
- "num_materialized_or_scanned_rows" : [
46
- 0.0 ,
47
- 6.0 ,
48
- 100.0 ,
49
- 4955.0 ,
50
- 23108.0 ,
51
- 139504.0 ,
52
- 616341.0 ,
53
- 3855698.0 ,
54
- 83725698.0 ,
55
- 5991998082.0 ,
56
- ],
57
45
"avg_row_bytes" : [
58
46
0.00014346299635435792 ,
59
47
0.005370969708923197 ,
@@ -524,10 +512,11 @@ def main():
524
512
for i in range (num_percentiles ):
525
513
percentile = TABLE_STATS ["percentile" ][i ]
526
514
avg_row_bytes_raw = TABLE_STATS ["avg_row_bytes" ][i ]
527
- num_rows_raw = TABLE_STATS ["num_materialized_or_scanned_rows " ][i ]
515
+ table_bytes_raw = TABLE_STATS ["materialized_or_scanned_bytes " ][i ]
528
516
517
+ target_table_bytes = max (1 , int (math .ceil (table_bytes_raw )))
529
518
target_row_bytes = max (1 , int (math .ceil (avg_row_bytes_raw )))
530
- num_rows = max (1 , int (math .ceil (num_rows_raw )))
519
+ num_rows = max (1 , int (math .ceil (target_table_bytes / target_row_bytes )))
531
520
532
521
table_name = f"percentile_{ percentile :02d} "
533
522
print (f"\n --- Processing Table: { table_name } ---" )
Original file line number Diff line number Diff line change @@ -44,7 +44,7 @@ def aggregate_output(
44
44
df_aggregated = (
45
45
df .assign (rounded = df [group_column ].astype ("Int64" ).round (- 9 ))
46
46
.groupby ("rounded" )
47
- .sum ()
47
+ .sum (numeric_only = True )
48
48
)
49
49
50
50
df_aggregated .shape
Original file line number Diff line number Diff line change 14
14
import pathlib
15
15
16
16
import benchmark .utils as utils
17
+ import pytest
17
18
18
19
import bigframes .session
19
20
@@ -35,8 +36,15 @@ def filter_output(
35
36
36
37
# Simulate the user filtering by a column and visualizing those results
37
38
df_filtered = df [df ["col_bool_0" ]]
38
- df_filtered .shape
39
- next (iter (df_filtered .to_pandas_batches (page_size = PAGE_SIZE )))
39
+ rows , _ = df_filtered .shape
40
+
41
+ # It's possible we don't have any pages at all, since we filtered out all
42
+ # matching rows.
43
+ if rows == 0 :
44
+ with pytest .raises (StopIteration ):
45
+ next (iter (df_filtered .to_pandas_batches (page_size = PAGE_SIZE )))
46
+ else :
47
+ next (iter (df_filtered .to_pandas_batches (page_size = PAGE_SIZE )))
40
48
41
49
42
50
if __name__ == "__main__" :
You can’t perform that action at this time.
0 commit comments