Skip to content

Commit 50fa2fb

Browse files
test: use jobless query api for faster tests (#1478)
* use fixture to reset mutated global sessions * fix tests * fix progress bar tests by allowing large results * remove undesired location setting in tests * use fixture for test_df_construct_inline_respects_location * remove redundant fixture
1 parent 1c934c2 commit 50fa2fb

17 files changed

+214
-148
lines changed

tests/system/conftest.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,7 @@ def resourcemanager_client(
139139

140140
@pytest.fixture(scope="session")
141141
def session() -> Generator[bigframes.Session, None, None]:
142-
context = bigframes.BigQueryOptions(
143-
location="US",
144-
)
142+
context = bigframes.BigQueryOptions(location="US", allow_large_results=False)
145143
session = bigframes.Session(context=context)
146144
yield session
147145
session.close() # close generated session at cleanup time
@@ -157,15 +155,19 @@ def session_load() -> Generator[bigframes.Session, None, None]:
157155

158156
@pytest.fixture(scope="session", params=["strict", "partial"])
159157
def maybe_ordered_session(request) -> Generator[bigframes.Session, None, None]:
160-
context = bigframes.BigQueryOptions(location="US", ordering_mode=request.param)
158+
context = bigframes.BigQueryOptions(
159+
location="US", ordering_mode=request.param, allow_large_results=False
160+
)
161161
session = bigframes.Session(context=context)
162162
yield session
163163
session.close() # close generated session at cleanup type
164164

165165

166166
@pytest.fixture(scope="session")
167167
def unordered_session() -> Generator[bigframes.Session, None, None]:
168-
context = bigframes.BigQueryOptions(location="US", ordering_mode="partial")
168+
context = bigframes.BigQueryOptions(
169+
location="US", ordering_mode="partial", allow_large_results=False
170+
)
169171
session = bigframes.Session(context=context)
170172
yield session
171173
session.close() # close generated session at cleanup type
@@ -1378,6 +1380,12 @@ def floats_product_bf(session, floats_product_pd):
13781380
return session.read_pandas(floats_product_pd)
13791381

13801382

1383+
@pytest.fixture(scope="session", autouse=True)
1384+
def use_fast_query_path():
1385+
with bpd.option_context("bigquery.allow_large_results", False):
1386+
yield
1387+
1388+
13811389
@pytest.fixture(scope="session", autouse=True)
13821390
def cleanup_cloud_functions(session, cloudfunctions_client, dataset_id_permanent):
13831391
"""Clean up stale cloud functions."""
@@ -1460,3 +1468,12 @@ def images_mm_df(
14601468
return session.from_glob_path(
14611469
images_gcs_path, name="blob_col", connection=bq_connection
14621470
)
1471+
1472+
1473+
@pytest.fixture()
1474+
def reset_default_session_and_location():
1475+
bpd.close_session()
1476+
with bpd.option_context("bigquery.location", None):
1477+
yield
1478+
bpd.close_session()
1479+
bpd.options.bigquery.location = None

tests/system/small/bigquery/test_json.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@
2222
import bigframes.pandas as bpd
2323

2424

25+
@pytest.fixture(scope="module", autouse=True)
26+
def use_large_query_path():
27+
# b/401630655
28+
with bpd.option_context("bigquery.allow_large_results", True):
29+
yield
30+
31+
2532
@pytest.mark.parametrize(
2633
("json_path", "expected_json"),
2734
[

tests/system/small/bigquery/test_struct.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,10 @@ def test_struct_from_dataframe(columns_arg):
5353
srs = series.Series(
5454
columns_arg,
5555
)
56+
# Use allow_large_results=True, due to b/403028465
5657
pd.testing.assert_series_equal(
57-
srs.to_pandas(),
58-
bbq.struct(srs.struct.explode()).to_pandas(),
58+
srs.to_pandas(allow_large_results=True),
59+
bbq.struct(srs.struct.explode()).to_pandas(allow_large_results=True),
5960
check_index_type=False,
6061
check_dtype=False,
6162
)

tests/system/small/blob/test_properties.py

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -55,31 +55,33 @@ def test_blob_version(images_mm_df: bpd.DataFrame):
5555

5656

5757
def test_blob_metadata(images_mm_df: bpd.DataFrame):
58-
bigframes.options.experiments.blob = True
59-
60-
actual = images_mm_df["blob_col"].blob.metadata().to_pandas()
61-
expected = pd.Series(
62-
[
63-
{
64-
"content_type": "image/jpeg",
65-
"md5_hash": "e130ad042261a1883cd2cc06831cf748",
66-
"size": 338390,
67-
"updated": 1739574332000000,
68-
},
69-
{
70-
"content_type": "image/jpeg",
71-
"md5_hash": "e2ae3191ff2b809fd0935f01a537c650",
72-
"size": 43333,
73-
"updated": 1739574332000000,
74-
},
75-
],
76-
name="metadata",
77-
dtype=db_dtypes.JSONDtype(),
78-
)
79-
80-
pd.testing.assert_series_equal(
81-
actual, expected, check_dtype=False, check_index_type=False
82-
)
58+
# allow_large_result=False incompatible with json b/401630655
59+
with bigframes.option_context(
60+
"bigquery.allow_large_results", True, "experiments.blob", True
61+
):
62+
actual = images_mm_df["blob_col"].blob.metadata().to_pandas()
63+
expected = pd.Series(
64+
[
65+
{
66+
"content_type": "image/jpeg",
67+
"md5_hash": "e130ad042261a1883cd2cc06831cf748",
68+
"size": 338390,
69+
"updated": 1739574332000000,
70+
},
71+
{
72+
"content_type": "image/jpeg",
73+
"md5_hash": "e2ae3191ff2b809fd0935f01a537c650",
74+
"size": 43333,
75+
"updated": 1739574332000000,
76+
},
77+
],
78+
name="metadata",
79+
dtype=db_dtypes.JSONDtype(),
80+
)
81+
82+
pd.testing.assert_series_equal(
83+
actual, expected, check_dtype=False, check_index_type=False
84+
)
8385

8486

8587
def test_blob_content_type(images_mm_df: bpd.DataFrame):

tests/system/small/ml/test_core.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,9 +401,10 @@ def test_model_generate_text(
401401
"top_p": 0.5,
402402
"flatten_json_output": True,
403403
}
404+
# Until b/401630655 is resolved, json not compatible with allow_large_results=False
404405
df = bqml_palm2_text_generator_model.generate_text(
405406
llm_text_df, options=options
406-
).to_pandas()
407+
).to_pandas(allow_large_results=True)
407408

408409
utils.check_pandas_df_schema_and_index(
409410
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False

tests/system/small/ml/test_llm.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@
2424
from tests.system import utils
2525

2626

27+
# Until b/401630655 is resolved, ML apis return json, not compatible with allow_large_results=False
28+
@pytest.fixture(scope="module", autouse=True)
29+
def always_create_table():
30+
with bigframes.option_context("bigquery.allow_large_results", True):
31+
yield
32+
33+
2734
def test_create_load_text_generator_model(
2835
palm2_text_generator_model, dataset_id, bq_connection
2936
):

tests/system/small/test_dataframe.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -163,11 +163,11 @@ def test_df_construct_from_dict():
163163
)
164164

165165

166-
def test_df_construct_inline_respects_location():
166+
def test_df_construct_inline_respects_location(reset_default_session_and_location):
167167
# Note: This starts a thread-local session.
168168
with bpd.option_context("bigquery.location", "europe-west1"):
169169
df = bpd.DataFrame([[1, 2, 3], [4, 5, 6]])
170-
repr(df)
170+
df.to_gbq()
171171
assert df.query_job is not None
172172
table = bpd.get_global_session().bqclient.get_table(df.query_job.destination)
173173

@@ -666,7 +666,8 @@ def test_df_peek(scalars_dfs_maybe_ordered):
666666

667667
session = scalars_df._block.session
668668
slot_millis_sum = session.slot_millis_sum
669-
peek_result = scalars_df.peek(n=3, force=False)
669+
# allow_large_results=False needed to get slot_millis_sum statistics only
670+
peek_result = scalars_df.peek(n=3, force=False, allow_large_results=True)
670671

671672
assert session.slot_millis_sum - slot_millis_sum > 1000
672673
pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
@@ -4584,12 +4585,13 @@ def test_df_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, sub
45844585
],
45854586
)
45864587
def test_df_drop_duplicates_w_json(json_df, keep):
4587-
bf_df = json_df.drop_duplicates(keep=keep).to_pandas()
4588+
bf_df = json_df.drop_duplicates(keep=keep).to_pandas(allow_large_results=True)
45884589

45894590
# drop_duplicates relies on pa.compute.dictionary_encode, which is incompatible
45904591
# with Arrow string extension types. Temporary conversion to standard Pandas
45914592
# strings is required.
4592-
json_pandas_df = json_df.to_pandas()
4593+
# allow_large_results=True for b/401630655
4594+
json_pandas_df = json_df.to_pandas(allow_large_results=True)
45934595
json_pandas_df["json_col"] = json_pandas_df["json_col"].astype(
45944596
pd.StringDtype(storage="pyarrow")
45954597
)
@@ -4951,14 +4953,16 @@ def test_df_bool_interpretation_error(scalars_df_index):
49514953

49524954

49534955
def test_query_job_setters(scalars_df_default_index: dataframe.DataFrame):
4954-
job_ids = set()
4955-
repr(scalars_df_default_index)
4956-
assert scalars_df_default_index.query_job is not None
4957-
job_ids.add(scalars_df_default_index.query_job.job_id)
4958-
scalars_df_default_index.to_pandas()
4959-
job_ids.add(scalars_df_default_index.query_job.job_id)
4956+
# if allow_large_results=False, might not create query job
4957+
with bigframes.option_context("bigquery.allow_large_results", True):
4958+
job_ids = set()
4959+
repr(scalars_df_default_index)
4960+
assert scalars_df_default_index.query_job is not None
4961+
job_ids.add(scalars_df_default_index.query_job.job_id)
4962+
scalars_df_default_index.to_pandas(allow_large_results=True)
4963+
job_ids.add(scalars_df_default_index.query_job.job_id)
49604964

4961-
assert len(job_ids) == 2
4965+
assert len(job_ids) == 2
49624966

49634967

49644968
def test_df_cached(scalars_df_index):
@@ -5196,7 +5200,12 @@ def test_to_pandas_downsampling_option_override(session):
51965200
df = session.read_gbq("bigframes-dev.bigframes_tests_sys.batting")
51975201
download_size = 1
51985202

5199-
df = df.to_pandas(max_download_size=download_size, sampling_method="head")
5203+
# limits only apply for allow_large_result=True
5204+
df = df.to_pandas(
5205+
max_download_size=download_size,
5206+
sampling_method="head",
5207+
allow_large_results=True,
5208+
)
52005209

52015210
total_memory_bytes = df.memory_usage(deep=True).sum()
52025211
total_memory_mb = total_memory_bytes / (1024 * 1024)

tests/system/small/test_dataframe_io.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -254,27 +254,31 @@ def test_to_pandas_array_struct_correct_result(session):
254254
def test_to_pandas_override_global_option(scalars_df_index):
255255
# Direct call to_pandas uses global default setting (allow_large_results=True),
256256
# table has 'bqdf' prefix.
257-
scalars_df_index.to_pandas()
258-
table_id = scalars_df_index._query_job.destination.table_id
259-
assert table_id.startswith("bqdf")
257+
with bigframes.option_context("bigquery.allow_large_results", True):
260258

261-
# When allow_large_results=False, a query_job object should not be created.
262-
# Therefore, the table_id should remain unchanged.
263-
scalars_df_index.to_pandas(allow_large_results=False)
264-
assert scalars_df_index._query_job.destination.table_id == table_id
259+
scalars_df_index.to_pandas()
260+
table_id = scalars_df_index._query_job.destination.table_id
261+
assert table_id.startswith("bqdf")
262+
263+
# When allow_large_results=False, a query_job object should not be created.
264+
# Therefore, the table_id should remain unchanged.
265+
scalars_df_index.to_pandas(allow_large_results=False)
266+
assert scalars_df_index._query_job.destination.table_id == table_id
265267

266268

267269
def test_to_arrow_override_global_option(scalars_df_index):
268270
# Direct call to_arrow uses global default setting (allow_large_results=True),
269271
# table has 'bqdf' prefix.
270-
scalars_df_index.to_arrow()
271-
table_id = scalars_df_index._query_job.destination.table_id
272-
assert table_id.startswith("bqdf")
273-
274-
# When allow_large_results=False, a query_job object should not be created.
275-
# Therefore, the table_id should remain unchanged.
276-
scalars_df_index.to_arrow(allow_large_results=False)
277-
assert scalars_df_index._query_job.destination.table_id == table_id
272+
with bigframes.option_context("bigquery.allow_large_results", True):
273+
274+
scalars_df_index.to_arrow()
275+
table_id = scalars_df_index._query_job.destination.table_id
276+
assert table_id.startswith("bqdf")
277+
278+
# When allow_large_results=False, a query_job object should not be created.
279+
# Therefore, the table_id should remain unchanged.
280+
scalars_df_index.to_arrow(allow_large_results=False)
281+
assert scalars_df_index._query_job.destination.table_id == table_id
278282

279283

280284
def test_load_json_w_json_string_items(session):

tests/system/small/test_encryption.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def bq_cmek() -> str:
4141

4242
@pytest.fixture(scope="module")
4343
def session_with_bq_cmek(bq_cmek) -> bigframes.Session:
44+
# allow_large_results = False might not create table, and therefore no encryption config
4445
session = bigframes.Session(bigframes.BigQueryOptions(kms_key_name=bq_cmek))
4546

4647
return session
@@ -52,7 +53,7 @@ def _assert_bq_table_is_encrypted(
5253
session: bigframes.Session,
5354
):
5455
# Materialize the data in BQ
55-
repr(df)
56+
df.to_gbq()
5657

5758
# The df should be backed by a query job with intended encryption on the result table
5859
assert df.query_job is not None

tests/system/small/test_groupby.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -546,8 +546,7 @@ def test_dataframe_groupby_nonnumeric_with_mean():
546546
)
547547
pd_result = df.groupby(["key1", "key2"]).mean()
548548

549-
with bpd.option_context("bigquery.location", "US"):
550-
bf_result = bpd.DataFrame(df).groupby(["key1", "key2"]).mean().to_pandas()
549+
bf_result = bpd.DataFrame(df).groupby(["key1", "key2"]).mean().to_pandas()
551550

552551
pd.testing.assert_frame_equal(
553552
pd_result, bf_result, check_index_type=False, check_dtype=False

0 commit comments

Comments
 (0)