Skip to content

Commit ccd3c03

Browse files
authored
test: move Claude3 tests to load test (#997)
* test: move Claude3 tests to load test * add conftest
1 parent d2fb49f commit ccd3c03

File tree

5 files changed

+129
-101
lines changed

5 files changed

+129
-101
lines changed

tests/system/conftest.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -146,16 +146,6 @@ def session() -> Generator[bigframes.Session, None, None]:
146146
session.close() # close generated session at cleanup time
147147

148148

149-
@pytest.fixture(scope="session")
150-
def session_us_east5() -> Generator[bigframes.Session, None, None]:
151-
context = bigframes.BigQueryOptions(
152-
location="us-east5",
153-
)
154-
session = bigframes.Session(context=context)
155-
yield session
156-
session.close() # close generated session at cleanup time
157-
158-
159149
@pytest.fixture(scope="session")
160150
def session_load() -> Generator[bigframes.Session, None, None]:
161151
context = bigframes.BigQueryOptions(location="US", project="bigframes-load-testing")
@@ -188,6 +178,11 @@ def session_tokyo(tokyo_location: str) -> Generator[bigframes.Session, None, Non
188178
session.close() # close generated session at cleanup type
189179

190180

181+
@pytest.fixture(scope="session")
182+
def bq_connection(bigquery_client: bigquery.Client) -> str:
183+
return f"{bigquery_client.project}.{bigquery_client.location}.bigframes-rf-conn"
184+
185+
191186
@pytest.fixture(scope="session", autouse=True)
192187
def cleanup_datasets(bigquery_client: bigquery.Client) -> None:
193188
"""Cleanup any datasets that were created but not cleaned up."""
@@ -728,6 +723,25 @@ def new_penguins_df(session, new_penguins_pandas_df):
728723
return session.read_pandas(new_penguins_pandas_df)
729724

730725

726+
@pytest.fixture(scope="session")
727+
def llm_text_pandas_df():
728+
"""Additional data matching the penguins dataset, with a new index"""
729+
return pd.DataFrame(
730+
{
731+
"prompt": [
732+
"What is BigQuery?",
733+
"What is BQML?",
734+
"What is BigQuery DataFrame?",
735+
],
736+
}
737+
)
738+
739+
740+
@pytest.fixture(scope="session")
741+
def llm_text_df(session, llm_text_pandas_df):
742+
return session.read_pandas(llm_text_pandas_df)
743+
744+
731745
@pytest.fixture(scope="session")
732746
def penguins_linear_model_name(
733747
session: bigframes.Session, dataset_id_permanent, penguins_table_id

tests/system/load/conftest.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
from typing import Generator
17+
18+
import pytest
19+
20+
import bigframes
21+
22+
23+
# Override the session to target at bigframes-load-testing at all load tests. That allows to run load tests locally with authentic env.
24+
@pytest.fixture(scope="session")
25+
def session() -> Generator[bigframes.Session, None, None]:
26+
context = bigframes.BigQueryOptions(location="US", project="bigframes-load-testing")
27+
session = bigframes.Session(context=context)
28+
yield session
29+
session.close() # close generated session at cleanup time
30+
31+
32+
@pytest.fixture(scope="session")
33+
def session_us_east5() -> Generator[bigframes.Session, None, None]:
34+
context = bigframes.BigQueryOptions(
35+
location="us-east5", project="bigframes-load-testing"
36+
)
37+
session = bigframes.Session(context=context)
38+
yield session
39+
session.close() # close generated session at cleanup time

tests/system/load/test_llm.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,69 @@ def test_llm_gemini_configure_fit(llm_fine_tune_df_default_index, llm_remote_tex
9090
index=3,
9191
)
9292
# TODO(ashleyxu b/335492787): After bqml rolled out version control: save, load, check parameters to ensure configuration was kept
93+
94+
95+
# (b/366290533): Claude models are of extremely low capacity. The tests should reside in small tests. Moving these here just to protect BQML's shared capacity(as load test only runs once per day.) and make sure we still have minimum coverage.
96+
@pytest.mark.parametrize(
97+
"model_name",
98+
("claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
99+
)
100+
@pytest.mark.flaky(retries=3, delay=120)
101+
def test_claude3_text_generator_create_load(
102+
dataset_id, model_name, session, session_us_east5, bq_connection
103+
):
104+
if model_name in ("claude-3-5-sonnet", "claude-3-opus"):
105+
session = session_us_east5
106+
claude3_text_generator_model = llm.Claude3TextGenerator(
107+
model_name=model_name, connection_name=bq_connection, session=session
108+
)
109+
assert claude3_text_generator_model is not None
110+
assert claude3_text_generator_model._bqml_model is not None
111+
112+
# save, load to ensure configuration was kept
113+
reloaded_model = claude3_text_generator_model.to_gbq(
114+
f"{dataset_id}.temp_text_model", replace=True
115+
)
116+
assert f"{dataset_id}.temp_text_model" == reloaded_model._bqml_model.model_name
117+
assert reloaded_model.connection_name == bq_connection
118+
assert reloaded_model.model_name == model_name
119+
120+
121+
@pytest.mark.parametrize(
122+
"model_name",
123+
("claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
124+
)
125+
@pytest.mark.flaky(retries=3, delay=120)
126+
def test_claude3_text_generator_predict_default_params_success(
127+
llm_text_df, model_name, session, session_us_east5, bq_connection
128+
):
129+
if model_name in ("claude-3-5-sonnet", "claude-3-opus"):
130+
session = session_us_east5
131+
claude3_text_generator_model = llm.Claude3TextGenerator(
132+
model_name=model_name, connection_name=bq_connection, session=session
133+
)
134+
df = claude3_text_generator_model.predict(llm_text_df).to_pandas()
135+
utils.check_pandas_df_schema_and_index(
136+
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
137+
)
138+
139+
140+
@pytest.mark.parametrize(
141+
"model_name",
142+
("claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
143+
)
144+
@pytest.mark.flaky(retries=3, delay=120)
145+
def test_claude3_text_generator_predict_with_params_success(
146+
llm_text_df, model_name, session, session_us_east5, bq_connection
147+
):
148+
if model_name in ("claude-3-5-sonnet", "claude-3-opus"):
149+
session = session_us_east5
150+
claude3_text_generator_model = llm.Claude3TextGenerator(
151+
model_name=model_name, connection_name=bq_connection, session=session
152+
)
153+
df = claude3_text_generator_model.predict(
154+
llm_text_df, max_output_tokens=100, top_k=20, top_p=0.5
155+
).to_pandas()
156+
utils.check_pandas_df_schema_and_index(
157+
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
158+
)

tests/system/small/ml/conftest.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,6 @@
3434
)
3535

3636

37-
@pytest.fixture(scope="session")
38-
def bq_connection(bigquery_client) -> str:
39-
return f"{bigquery_client.project}.us.bigframes-rf-conn"
40-
41-
4237
@pytest.fixture(scope="session")
4338
def penguins_bqml_linear_model(session, penguins_linear_model_name) -> core.BqmlModel:
4439
model = session.bqclient.get_model(penguins_linear_model_name)
@@ -157,20 +152,6 @@ def penguins_pca_model(
157152
)
158153

159154

160-
@pytest.fixture(scope="session")
161-
def llm_text_pandas_df():
162-
"""Additional data matching the penguins dataset, with a new index"""
163-
return pd.DataFrame(
164-
{
165-
"prompt": [
166-
"What is BigQuery?",
167-
"What is BQML?",
168-
"What is BigQuery DataFrame?",
169-
],
170-
}
171-
)
172-
173-
174155
@pytest.fixture(scope="session")
175156
def onnx_iris_pandas_df():
176157
"""Data matching the iris dataset."""
@@ -212,11 +193,6 @@ def xgboost_iris_df(session, xgboost_iris_pandas_df):
212193
return session.read_pandas(xgboost_iris_pandas_df)
213194

214195

215-
@pytest.fixture(scope="session")
216-
def llm_text_df(session, llm_text_pandas_df):
217-
return session.read_pandas(llm_text_pandas_df)
218-
219-
220196
@pytest.fixture(scope="session")
221197
def bqml_palm2_text_generator_model(session, bq_connection) -> core.BqmlModel:
222198
options = {

tests/system/small/ml/test_llm.py

Lines changed: 0 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -414,73 +414,6 @@ def test_gemini_text_generator_predict_with_params_success(
414414
)
415415

416416

417-
@pytest.mark.parametrize(
418-
"model_name",
419-
("claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
420-
)
421-
@pytest.mark.flaky(retries=3, delay=120)
422-
def test_claude3_text_generator_create_load(
423-
dataset_id, model_name, session, session_us_east5, bq_connection
424-
):
425-
if model_name in ("claude-3-5-sonnet", "claude-3-opus"):
426-
session = session_us_east5
427-
claude3_text_generator_model = llm.Claude3TextGenerator(
428-
model_name=model_name, connection_name=bq_connection, session=session
429-
)
430-
assert claude3_text_generator_model is not None
431-
assert claude3_text_generator_model._bqml_model is not None
432-
433-
# save, load to ensure configuration was kept
434-
reloaded_model = claude3_text_generator_model.to_gbq(
435-
f"{dataset_id}.temp_text_model", replace=True
436-
)
437-
assert f"{dataset_id}.temp_text_model" == reloaded_model._bqml_model.model_name
438-
assert reloaded_model.connection_name == bq_connection
439-
assert reloaded_model.model_name == model_name
440-
441-
442-
@pytest.mark.skip("b/366290533 too many requests are exhausting bqml capacity")
443-
@pytest.mark.parametrize(
444-
"model_name",
445-
("claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
446-
)
447-
@pytest.mark.flaky(retries=3, delay=120)
448-
def test_claude3_text_generator_predict_default_params_success(
449-
llm_text_df, model_name, session, session_us_east5, bq_connection
450-
):
451-
if model_name in ("claude-3-5-sonnet", "claude-3-opus"):
452-
session = session_us_east5
453-
claude3_text_generator_model = llm.Claude3TextGenerator(
454-
model_name=model_name, connection_name=bq_connection, session=session
455-
)
456-
df = claude3_text_generator_model.predict(llm_text_df).to_pandas()
457-
utils.check_pandas_df_schema_and_index(
458-
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
459-
)
460-
461-
462-
@pytest.mark.skip("b/366290533 too many requests are exhausting bqml capacity")
463-
@pytest.mark.parametrize(
464-
"model_name",
465-
("claude-3-sonnet", "claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"),
466-
)
467-
@pytest.mark.flaky(retries=3, delay=120)
468-
def test_claude3_text_generator_predict_with_params_success(
469-
llm_text_df, model_name, session, session_us_east5, bq_connection
470-
):
471-
if model_name in ("claude-3-5-sonnet", "claude-3-opus"):
472-
session = session_us_east5
473-
claude3_text_generator_model = llm.Claude3TextGenerator(
474-
model_name=model_name, connection_name=bq_connection, session=session
475-
)
476-
df = claude3_text_generator_model.predict(
477-
llm_text_df, max_output_tokens=100, top_k=20, top_p=0.5
478-
).to_pandas()
479-
utils.check_pandas_df_schema_and_index(
480-
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
481-
)
482-
483-
484417
@pytest.mark.flaky(retries=2)
485418
def test_llm_palm_score(llm_fine_tune_df_default_index):
486419
model = llm.PaLM2TextGenerator(model_name="text-bison")

0 commit comments

Comments
 (0)