Skip to content

Commit c135cee

Browse files
chore: use bigframes.pandas._read_gbq_colab() in benchmarks (#1942)
Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
1 parent a6d30ae commit c135cee

File tree

7 files changed

+28
-38
lines changed

7 files changed

+28
-38
lines changed

tests/benchmark/read_gbq_colab/aggregate_output.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,15 @@
1515

1616
import benchmark.utils as utils
1717

18-
import bigframes.session
18+
import bigframes.pandas as bpd
1919

2020
PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
2121

2222

23-
def aggregate_output(
24-
*, project_id, dataset_id, table_id, session: bigframes.session.Session
25-
):
23+
def aggregate_output(*, project_id, dataset_id, table_id):
2624
# TODO(tswast): Support alternative query if table_id is a local DataFrame,
2725
# e.g. "{local_inline}" or "{local_large}"
28-
df = session._read_gbq_colab(
29-
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
30-
)
26+
df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
3127

3228
# Simulate getting the first page, since we'll always do that first in the UI.
3329
df.shape

tests/benchmark/read_gbq_colab/dry_run.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,20 @@
1515

1616
import benchmark.utils as utils
1717

18-
import bigframes.session
18+
import bigframes.pandas
1919

2020

21-
def dry_run(*, project_id, dataset_id, table_id, session: bigframes.session.Session):
21+
def dry_run(*, project_id, dataset_id, table_id):
2222
# TODO(tswast): Support alternative query if table_id is a local DataFrame,
2323
# e.g. "{local_inline}" or "{local_large}"
24-
session._read_gbq_colab(
24+
bigframes.pandas._read_gbq_colab(
2525
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}",
2626
dry_run=True,
2727
)
2828

2929

3030
if __name__ == "__main__":
31-
config = utils.get_configuration(include_table_id=True)
31+
config = utils.get_configuration(include_table_id=True, start_session=False)
3232
current_path = pathlib.Path(__file__).absolute()
3333

3434
utils.get_execution_time(
@@ -38,5 +38,4 @@ def dry_run(*, project_id, dataset_id, table_id, session: bigframes.session.Sess
3838
project_id=config.project_id,
3939
dataset_id=config.dataset_id,
4040
table_id=config.table_id,
41-
session=config.session,
4241
)

tests/benchmark/read_gbq_colab/filter_output.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,20 @@
1515

1616
import benchmark.utils as utils
1717

18-
import bigframes.session
18+
import bigframes.pandas as bpd
1919

2020
PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
2121

2222

2323
def filter_output(
24-
*, project_id, dataset_id, table_id, session: bigframes.session.Session
24+
*,
25+
project_id,
26+
dataset_id,
27+
table_id,
2528
):
2629
# TODO(tswast): Support alternative query if table_id is a local DataFrame,
2730
# e.g. "{local_inline}" or "{local_large}"
28-
df = session._read_gbq_colab(
29-
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
30-
)
31+
df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
3132

3233
# Simulate getting the first page, since we'll always do that first in the UI.
3334
df.shape
@@ -54,5 +55,4 @@ def filter_output(
5455
project_id=config.project_id,
5556
dataset_id=config.dataset_id,
5657
table_id=config.table_id,
57-
session=config.session,
5858
)

tests/benchmark/read_gbq_colab/first_page.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,15 @@
1515

1616
import benchmark.utils as utils
1717

18-
import bigframes.session
18+
import bigframes.pandas
1919

2020
PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
2121

2222

23-
def first_page(*, project_id, dataset_id, table_id, session: bigframes.session.Session):
23+
def first_page(*, project_id, dataset_id, table_id):
2424
# TODO(tswast): Support alternative query if table_id is a local DataFrame,
2525
# e.g. "{local_inline}" or "{local_large}"
26-
df = session._read_gbq_colab(
26+
df = bigframes.pandas._read_gbq_colab(
2727
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
2828
)
2929

@@ -33,7 +33,7 @@ def first_page(*, project_id, dataset_id, table_id, session: bigframes.session.S
3333

3434

3535
if __name__ == "__main__":
36-
config = utils.get_configuration(include_table_id=True)
36+
config = utils.get_configuration(include_table_id=True, start_session=False)
3737
current_path = pathlib.Path(__file__).absolute()
3838

3939
utils.get_execution_time(
@@ -43,5 +43,4 @@ def first_page(*, project_id, dataset_id, table_id, session: bigframes.session.S
4343
project_id=config.project_id,
4444
dataset_id=config.dataset_id,
4545
table_id=config.table_id,
46-
session=config.session,
4746
)

tests/benchmark/read_gbq_colab/last_page.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,15 @@
1515

1616
import benchmark.utils as utils
1717

18-
import bigframes.session
18+
import bigframes.pandas
1919

2020
PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
2121

2222

23-
def last_page(*, project_id, dataset_id, table_id, session: bigframes.session.Session):
23+
def last_page(*, project_id, dataset_id, table_id):
2424
# TODO(tswast): Support alternative query if table_id is a local DataFrame,
2525
# e.g. "{local_inline}" or "{local_large}"
26-
df = session._read_gbq_colab(
26+
df = bigframes.pandas._read_gbq_colab(
2727
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
2828
)
2929

@@ -34,7 +34,7 @@ def last_page(*, project_id, dataset_id, table_id, session: bigframes.session.Se
3434

3535

3636
if __name__ == "__main__":
37-
config = utils.get_configuration(include_table_id=True)
37+
config = utils.get_configuration(include_table_id=True, start_session=False)
3838
current_path = pathlib.Path(__file__).absolute()
3939

4040
utils.get_execution_time(
@@ -44,5 +44,4 @@ def last_page(*, project_id, dataset_id, table_id, session: bigframes.session.Se
4444
project_id=config.project_id,
4545
dataset_id=config.dataset_id,
4646
table_id=config.table_id,
47-
session=config.session,
4847
)

tests/benchmark/read_gbq_colab/sort_output.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,15 @@
1515

1616
import benchmark.utils as utils
1717

18-
import bigframes.session
18+
import bigframes.pandas
1919

2020
PAGE_SIZE = utils.READ_GBQ_COLAB_PAGE_SIZE
2121

2222

23-
def sort_output(
24-
*, project_id, dataset_id, table_id, session: bigframes.session.Session
25-
):
23+
def sort_output(*, project_id, dataset_id, table_id):
2624
# TODO(tswast): Support alternative query if table_id is a local DataFrame,
2725
# e.g. "{local_inline}" or "{local_large}"
28-
df = session._read_gbq_colab(
26+
df = bigframes.pandas._read_gbq_colab(
2927
f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}"
3028
)
3129

@@ -44,7 +42,7 @@ def sort_output(
4442

4543

4644
if __name__ == "__main__":
47-
config = utils.get_configuration(include_table_id=True)
45+
config = utils.get_configuration(include_table_id=True, start_session=False)
4846
current_path = pathlib.Path(__file__).absolute()
4947

5048
utils.get_execution_time(
@@ -54,5 +52,4 @@ def sort_output(
5452
project_id=config.project_id,
5553
dataset_id=config.dataset_id,
5654
table_id=config.table_id,
57-
session=config.session,
5855
)

tests/benchmark/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@
2525
class BenchmarkConfig:
2626
project_id: str
2727
dataset_id: str
28-
session: bigframes.Session
28+
session: bigframes.Session | None
2929
benchmark_suffix: str | None
3030
table_id: str | None = None
3131

3232

33-
def get_configuration(include_table_id=False) -> BenchmarkConfig:
33+
def get_configuration(include_table_id=False, start_session=True) -> BenchmarkConfig:
3434
parser = argparse.ArgumentParser()
3535
parser.add_argument(
3636
"--project_id",
@@ -65,7 +65,7 @@ def get_configuration(include_table_id=False) -> BenchmarkConfig:
6565
)
6666

6767
args = parser.parse_args()
68-
session = _initialize_session(_str_to_bool(args.ordered))
68+
session = _initialize_session(_str_to_bool(args.ordered)) if start_session else None
6969

7070
return BenchmarkConfig(
7171
project_id=args.project_id,

0 commit comments

Comments
 (0)