Skip to content

Commit ffa597c

Browse files
authored
chore: move multimodal tests from PROD to TEST (#1578)
* add a new session and connect to test env * I need to revert code change * revert files for unncessary change * fix more tests * all test are clean * fix more tests * merge in the change * revert the order changes, move single-modality tests back to PROD since they are in GA * refactor multimodal tests out * change the project name * revert change * remove unused bq client
1 parent ef63772 commit ffa597c

File tree

6 files changed

+133
-73
lines changed

6 files changed

+133
-73
lines changed

tests/system/conftest.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,20 @@ def session_tokyo(tokyo_location: str) -> Generator[bigframes.Session, None, Non
184184
session.close() # close generated session at cleanup type
185185

186186

187+
@pytest.fixture(scope="session")
188+
def test_session() -> Generator[bigframes.Session, None, None]:
189+
context = bigframes.BigQueryOptions(
190+
client_endpoints_override={
191+
"bqclient": "https://test-bigquery.sandbox.google.com",
192+
"bqconnectionclient": "test-bigqueryconnection.sandbox.googleapis.com",
193+
"bqstoragereadclient": "test-bigquerystorage-grpc.sandbox.googleapis.com",
194+
},
195+
)
196+
session = bigframes.Session(context=context)
197+
yield session
198+
session.close()
199+
200+
187201
@pytest.fixture(scope="session")
188202
def bq_connection_name() -> str:
189203
return "bigframes-rf-conn"
@@ -910,8 +924,8 @@ def llm_text_pandas_df():
910924

911925

912926
@pytest.fixture(scope="session")
913-
def llm_text_df(session, llm_text_pandas_df):
914-
return session.read_pandas(llm_text_pandas_df)
927+
def llm_text_df(test_session, llm_text_pandas_df):
928+
return test_session.read_pandas(llm_text_pandas_df)
915929

916930

917931
@pytest.fixture(scope="session")
@@ -1249,10 +1263,10 @@ def penguins_randomforest_classifier_model_name(
12491263

12501264
@pytest.fixture(scope="session")
12511265
def llm_fine_tune_df_default_index(
1252-
session: bigframes.Session,
1266+
test_session: bigframes.Session,
12531267
) -> bigframes.dataframe.DataFrame:
12541268
training_table_name = "llm_tuning.emotion_classification_train"
1255-
df = session.read_gbq(training_table_name).dropna().head(30)
1269+
df = test_session.read_gbq(training_table_name).dropna().head(30)
12561270
prefix = "Please do sentiment analysis on the following text and only output a number from 0 to 5 where 0 means sadness, 1 means joy, 2 means love, 3 means anger, 4 means fear, and 5 means surprise. Text: "
12571271
df["prompt"] = prefix + df["text"]
12581272
df["label"] = df["label"].astype("string")
@@ -1484,13 +1498,14 @@ def images_uris() -> list[str]:
14841498

14851499
@pytest.fixture(scope="session")
14861500
def images_mm_df(
1487-
images_gcs_path, session: bigframes.Session, bq_connection: str
1501+
images_uris, test_session: bigframes.Session, bq_connection: str
14881502
) -> bpd.DataFrame:
14891503
bigframes.options.experiments.blob = True
14901504

1491-
return session.from_glob_path(
1492-
images_gcs_path, name="blob_col", connection=bq_connection
1505+
blob_series = bpd.Series(images_uris, session=test_session).str.to_blob(
1506+
connection=bq_connection
14931507
)
1508+
return blob_series.rename("blob_col").to_frame()
14941509

14951510

14961511
@pytest.fixture()
@@ -1509,8 +1524,10 @@ def pdf_gcs_path() -> str:
15091524

15101525
@pytest.fixture(scope="session")
15111526
def pdf_mm_df(
1512-
pdf_gcs_path, session: bigframes.Session, bq_connection: str
1527+
pdf_gcs_path, test_session: bigframes.Session, bq_connection: str
15131528
) -> bpd.DataFrame:
15141529
bigframes.options.experiments.blob = True
15151530

1516-
return session.from_glob_path(pdf_gcs_path, name="pdf", connection=bq_connection)
1531+
return test_session.from_glob_path(
1532+
pdf_gcs_path, name="pdf", connection=bq_connection
1533+
)

tests/system/large/blob/test_function.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,11 @@ def test_blob_image_blur_to_series(
5555
images_mm_df: bpd.DataFrame,
5656
bq_connection: str,
5757
images_output_uris: list[str],
58-
session: bigframes.Session,
58+
test_session: bigframes.Session,
5959
):
6060
bigframes.options.experiments.blob = True
6161

62-
series = bpd.Series(images_output_uris, session=session).str.to_blob(
62+
series = bpd.Series(images_output_uris, session=test_session).str.to_blob(
6363
connection=bq_connection
6464
)
6565

@@ -129,11 +129,11 @@ def test_blob_image_resize_to_series(
129129
images_mm_df: bpd.DataFrame,
130130
bq_connection: str,
131131
images_output_uris: list[str],
132-
session: bigframes.Session,
132+
test_session: bigframes.Session,
133133
):
134134
bigframes.options.experiments.blob = True
135135

136-
series = bpd.Series(images_output_uris, session=session).str.to_blob(
136+
series = bpd.Series(images_output_uris, session=test_session).str.to_blob(
137137
connection=bq_connection
138138
)
139139

@@ -205,11 +205,11 @@ def test_blob_image_normalize_to_series(
205205
images_mm_df: bpd.DataFrame,
206206
bq_connection: str,
207207
images_output_uris: list[str],
208-
session: bigframes.Session,
208+
test_session: bigframes.Session,
209209
):
210210
bigframes.options.experiments.blob = True
211211

212-
series = bpd.Series(images_output_uris, session=session).str.to_blob(
212+
series = bpd.Series(images_output_uris, session=test_session).str.to_blob(
213213
connection=bq_connection
214214
)
215215

tests/system/small/blob/test_io.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919

2020

2121
def test_blob_create_from_uri_str(
22-
bq_connection: str, session: bigframes.Session, images_uris
22+
bq_connection: str, test_session: bigframes.Session, images_uris
2323
):
2424
bigframes.options.experiments.blob = True
2525

26-
uri_series = bpd.Series(images_uris, session=session)
26+
uri_series = bpd.Series(images_uris, session=test_session)
2727
blob_series = uri_series.str.to_blob(connection=bq_connection)
2828

2929
pd_blob_df = blob_series.struct.explode().to_pandas()
@@ -42,14 +42,21 @@ def test_blob_create_from_uri_str(
4242

4343

4444
def test_blob_create_from_glob_path(
45-
bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
45+
bq_connection: str, test_session: bigframes.Session, images_gcs_path, images_uris
4646
):
4747
bigframes.options.experiments.blob = True
4848

49-
blob_df = session.from_glob_path(
49+
blob_df = test_session.from_glob_path(
5050
images_gcs_path, connection=bq_connection, name="blob_col"
5151
)
52-
pd_blob_df = blob_df["blob_col"].struct.explode().to_pandas()
52+
pd_blob_df = (
53+
blob_df["blob_col"]
54+
.struct.explode()
55+
.to_pandas()
56+
.sort_values("uri")
57+
.reset_index(drop=True)
58+
)
59+
5360
expected_df = pd.DataFrame(
5461
{
5562
"uri": images_uris,
@@ -65,14 +72,20 @@ def test_blob_create_from_glob_path(
6572

6673

6774
def test_blob_create_read_gbq_object_table(
68-
bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
75+
bq_connection: str, test_session: bigframes.Session, images_gcs_path, images_uris
6976
):
7077
bigframes.options.experiments.blob = True
7178

72-
obj_table = session._create_object_table(images_gcs_path, bq_connection)
79+
obj_table = test_session._create_object_table(images_gcs_path, bq_connection)
7380

74-
blob_df = session.read_gbq_object_table(obj_table, name="blob_col")
75-
pd_blob_df = blob_df["blob_col"].struct.explode().to_pandas()
81+
blob_df = test_session.read_gbq_object_table(obj_table, name="blob_col")
82+
pd_blob_df = (
83+
blob_df["blob_col"]
84+
.struct.explode()
85+
.to_pandas()
86+
.sort_values("uri")
87+
.reset_index(drop=True)
88+
)
7689
expected_df = pd.DataFrame(
7790
{
7891
"uri": images_uris,

tests/system/small/ml/test_llm.py

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -106,25 +106,6 @@ def test_create_load_multimodal_embedding_generator_model(
106106
assert reloaded_model.connection_name == bq_connection
107107

108108

109-
@pytest.mark.flaky(retries=2)
110-
def test_multimodal_embedding_generator_predict_default_params_success(
111-
images_mm_df, session, bq_connection
112-
):
113-
bigframes.options.experiments.blob = True
114-
115-
text_embedding_model = llm.MultimodalEmbeddingGenerator(
116-
connection_name=bq_connection, session=session
117-
)
118-
df = text_embedding_model.predict(images_mm_df).to_pandas()
119-
utils.check_pandas_df_schema_and_index(
120-
df,
121-
columns=utils.ML_MULTIMODAL_GENERATE_EMBEDDING_OUTPUT,
122-
index=2,
123-
col_exact=False,
124-
)
125-
assert len(df["ml_generate_embedding_result"][0]) == 1408
126-
127-
128109
@pytest.mark.parametrize(
129110
"model_name",
130111
(
@@ -241,36 +222,6 @@ def test_gemini_text_generator_multi_cols_predict_success(
241222
)
242223

243224

244-
@pytest.mark.parametrize(
245-
"model_name",
246-
(
247-
"gemini-1.5-pro-001",
248-
"gemini-1.5-pro-002",
249-
"gemini-1.5-flash-001",
250-
"gemini-1.5-flash-002",
251-
"gemini-2.0-flash-exp",
252-
),
253-
)
254-
@pytest.mark.flaky(retries=2)
255-
def test_gemini_text_generator_multimodal_input(
256-
images_mm_df: bpd.DataFrame, model_name, session, bq_connection
257-
):
258-
bigframes.options.experiments.blob = True
259-
260-
gemini_text_generator_model = llm.GeminiTextGenerator(
261-
model_name=model_name, connection_name=bq_connection, session=session
262-
)
263-
pd_df = gemini_text_generator_model.predict(
264-
images_mm_df, prompt=["Describe", images_mm_df["blob_col"]]
265-
).to_pandas()
266-
utils.check_pandas_df_schema_and_index(
267-
pd_df,
268-
columns=utils.ML_GENERATE_TEXT_OUTPUT + ["blob_col"],
269-
index=2,
270-
col_exact=False,
271-
)
272-
273-
274225
# Overrides __eq__ function for comparing as mock.call parameter
275226
class EqCmpAllDataFrame(bpd.DataFrame):
276227
def __eq__(self, other):
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pytest
16+
17+
import bigframes
18+
from bigframes.ml import llm
19+
import bigframes.pandas as bpd
20+
from tests.system import utils
21+
22+
23+
@pytest.mark.flaky(retries=2)
24+
def test_multimodal_embedding_generator_predict_default_params_success(
25+
images_mm_df, test_session, bq_connection
26+
):
27+
bigframes.options.experiments.blob = True
28+
29+
text_embedding_model = llm.MultimodalEmbeddingGenerator(
30+
connection_name=bq_connection, session=test_session
31+
)
32+
df = text_embedding_model.predict(images_mm_df).to_pandas()
33+
utils.check_pandas_df_schema_and_index(
34+
df,
35+
columns=utils.ML_MULTIMODAL_GENERATE_EMBEDDING_OUTPUT,
36+
index=2,
37+
col_exact=False,
38+
)
39+
assert len(df["ml_generate_embedding_result"][0]) == 1408
40+
41+
42+
@pytest.mark.parametrize(
43+
"model_name",
44+
(
45+
"gemini-1.5-pro-001",
46+
"gemini-1.5-pro-002",
47+
"gemini-1.5-flash-001",
48+
"gemini-1.5-flash-002",
49+
"gemini-2.0-flash-exp",
50+
),
51+
)
52+
@pytest.mark.flaky(retries=2)
53+
def test_gemini_text_generator_multimodal_input(
54+
images_mm_df: bpd.DataFrame, model_name, test_session, bq_connection
55+
):
56+
bigframes.options.experiments.blob = True
57+
58+
gemini_text_generator_model = llm.GeminiTextGenerator(
59+
model_name=model_name, connection_name=bq_connection, session=test_session
60+
)
61+
pd_df = gemini_text_generator_model.predict(
62+
images_mm_df, prompt=["Describe", images_mm_df["blob_col"]]
63+
).to_pandas()
64+
utils.check_pandas_df_schema_and_index(
65+
pd_df,
66+
columns=utils.ML_GENERATE_TEXT_OUTPUT + ["blob_col"],
67+
index=2,
68+
col_exact=False,
69+
)

tests/system/small/test_session.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1617,3 +1617,13 @@ def test_read_json_gcs_default_engine(session, scalars_dfs, gcs_folder):
16171617

16181618
assert df.shape[0] == scalars_df.shape[0]
16191619
pd.testing.assert_series_equal(df.dtypes, scalars_df.dtypes)
1620+
1621+
1622+
def test_read_gbq_test(test_session: bigframes.Session):
1623+
test_project_id = "bigframes-dev"
1624+
test_dataset_id = "test_env_only"
1625+
test_table_id = "one_table"
1626+
table_id = f"{test_project_id}.{test_dataset_id}.{test_table_id}"
1627+
actual = test_session.read_gbq(table_id).to_pandas()
1628+
1629+
assert actual.shape == (1, 1)

0 commit comments

Comments
 (0)