Skip to content

Commit 36a741a

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals) - Add pd.DataFrame as input for dataset in create_evaluation_runin Vertex AI GenAI SDK evals
PiperOrigin-RevId: 825658094
1 parent 6737a70 commit 36a741a

File tree

3 files changed

+31
-39
lines changed

3 files changed

+31
-39
lines changed

tests/unit/vertexai/genai/test_evals.py

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,9 +1070,7 @@ def test_run_inference_with_agent_engine_and_session_inputs_dict(
10701070
)
10711071

10721072
mock_agent_engine = mock.Mock()
1073-
mock_agent_engine.async_create_session = mock.AsyncMock(
1074-
return_value={"id": "session1"}
1075-
)
1073+
mock_agent_engine.create_session.return_value = {"id": "session1"}
10761074
stream_query_return_value = [
10771075
{
10781076
"id": "1",
@@ -1088,13 +1086,7 @@ def test_run_inference_with_agent_engine_and_session_inputs_dict(
10881086
},
10891087
]
10901088

1091-
async def _async_iterator(iterable):
1092-
for item in iterable:
1093-
yield item
1094-
1095-
mock_agent_engine.async_stream_query.return_value = _async_iterator(
1096-
stream_query_return_value
1097-
)
1089+
mock_agent_engine.stream_query.return_value = iter(stream_query_return_value)
10981090
mock_vertexai_client.return_value.agent_engines.get.return_value = (
10991091
mock_agent_engine
11001092
)
@@ -1108,10 +1100,10 @@ async def _async_iterator(iterable):
11081100
mock_vertexai_client.return_value.agent_engines.get.assert_called_once_with(
11091101
name="projects/test-project/locations/us-central1/reasoningEngines/123"
11101102
)
1111-
mock_agent_engine.async_create_session.assert_called_once_with(
1103+
mock_agent_engine.create_session.assert_called_once_with(
11121104
user_id="123", state={"a": "1"}
11131105
)
1114-
mock_agent_engine.async_stream_query.assert_called_once_with(
1106+
mock_agent_engine.stream_query.assert_called_once_with(
11151107
user_id="123", session_id="session1", message="agent prompt"
11161108
)
11171109

@@ -1162,9 +1154,7 @@ def test_run_inference_with_agent_engine_and_session_inputs_literal_string(
11621154
)
11631155

11641156
mock_agent_engine = mock.Mock()
1165-
mock_agent_engine.async_create_session = mock.AsyncMock(
1166-
return_value={"id": "session1"}
1167-
)
1157+
mock_agent_engine.create_session.return_value = {"id": "session1"}
11681158
stream_query_return_value = [
11691159
{
11701160
"id": "1",
@@ -1180,13 +1170,7 @@ def test_run_inference_with_agent_engine_and_session_inputs_literal_string(
11801170
},
11811171
]
11821172

1183-
async def _async_iterator(iterable):
1184-
for item in iterable:
1185-
yield item
1186-
1187-
mock_agent_engine.async_stream_query.return_value = _async_iterator(
1188-
stream_query_return_value
1189-
)
1173+
mock_agent_engine.stream_query.return_value = iter(stream_query_return_value)
11901174
mock_vertexai_client.return_value.agent_engines.get.return_value = (
11911175
mock_agent_engine
11921176
)
@@ -1200,10 +1184,10 @@ async def _async_iterator(iterable):
12001184
mock_vertexai_client.return_value.agent_engines.get.assert_called_once_with(
12011185
name="projects/test-project/locations/us-central1/reasoningEngines/123"
12021186
)
1203-
mock_agent_engine.async_create_session.assert_called_once_with(
1187+
mock_agent_engine.create_session.assert_called_once_with(
12041188
user_id="123", state={"a": "1"}
12051189
)
1206-
mock_agent_engine.async_stream_query.assert_called_once_with(
1190+
mock_agent_engine.stream_query.assert_called_once_with(
12071191
user_id="123", session_id="session1", message="agent prompt"
12081192
)
12091193

vertexai/_genai/_evals_common.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -278,12 +278,10 @@ def agent_run_wrapper(
278278
and type(agent_engine).__name__ == "AgentEngine"
279279
):
280280
agent_engine_instance = agent_engine
281-
return asyncio.run(
282-
inference_fn_arg(
283-
row=row_arg,
284-
contents=contents_arg,
285-
agent_engine=agent_engine_instance,
286-
)
281+
return inference_fn_arg(
282+
row=row_arg,
283+
contents=contents_arg,
284+
agent_engine=agent_engine_instance,
287285
)
288286

289287
future = executor.submit(
@@ -1265,7 +1263,7 @@ def _run_agent(
12651263
)
12661264

12671265

1268-
async def _execute_agent_run_with_retry(
1266+
def _execute_agent_run_with_retry(
12691267
row: pd.Series,
12701268
contents: Union[genai_types.ContentListUnion, genai_types.ContentListUnionDict],
12711269
agent_engine: types.AgentEngine,
@@ -1287,7 +1285,7 @@ async def _execute_agent_run_with_retry(
12871285
)
12881286
user_id = session_inputs.user_id
12891287
session_state = session_inputs.state
1290-
session = await agent_engine.async_create_session(
1288+
session = agent_engine.create_session(
12911289
user_id=user_id,
12921290
state=session_state,
12931291
)
@@ -1298,7 +1296,7 @@ async def _execute_agent_run_with_retry(
12981296
for attempt in range(max_retries):
12991297
try:
13001298
responses = []
1301-
async for event in agent_engine.async_stream_query(
1299+
for event in agent_engine.stream_query(
13021300
user_id=user_id,
13031301
session_id=session["id"],
13041302
message=contents,
@@ -1317,7 +1315,7 @@ async def _execute_agent_run_with_retry(
13171315
)
13181316
if attempt == max_retries - 1:
13191317
return {"error": f"Resource exhausted after retries: {e}"}
1320-
await asyncio.sleep(2**attempt)
1318+
time.sleep(2**attempt)
13211319
except Exception as e: # pylint: disable=broad-exception-caught
13221320
logger.error(
13231321
"Unexpected error during generate_content on attempt %d/%d: %s",
@@ -1328,7 +1326,7 @@ async def _execute_agent_run_with_retry(
13281326

13291327
if attempt == max_retries - 1:
13301328
return {"error": f"Failed after retries: {e}"}
1331-
await asyncio.sleep(1)
1329+
time.sleep(1)
13321330
return {"error": f"Failed to get agent run results after {max_retries} retries"}
13331331

13341332

vertexai/_genai/evals.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1332,7 +1332,9 @@ def get_evaluation_run(
13321332
def create_evaluation_run(
13331333
self,
13341334
*,
1335-
dataset: Union[types.EvaluationRunDataSource, types.EvaluationDataset],
1335+
dataset: Union[
1336+
types.EvaluationRunDataSource, types.EvaluationDataset, pd.DataFrame
1337+
],
13361338
dest: str,
13371339
name: Optional[str] = None,
13381340
display_name: Optional[str] = None,
@@ -1346,7 +1348,8 @@ def create_evaluation_run(
13461348
"""Creates an EvaluationRun.
13471349
13481350
Args:
1349-
dataset: The dataset to evaluate. Either an EvaluationRunDataSource or an EvaluationDataset.
1351+
dataset: The dataset to evaluate. Either an EvaluationRunDataSource, an
1352+
EvaluationDataset, or a pd.DataFrame.
13501353
dest: The GCS URI prefix to write the evaluation results to.
13511354
name: The name of the evaluation run.
13521355
display_name: The display name of the evaluation run.
@@ -1358,6 +1361,8 @@ def create_evaluation_run(
13581361
Returns:
13591362
The created evaluation run.
13601363
"""
1364+
if isinstance(dataset, pd.DataFrame):
1365+
dataset = types.EvaluationDataset(eval_dataset_df=dataset)
13611366
if type(dataset).__name__ == "EvaluationDataset":
13621367
logger.warning(
13631368
"EvaluationDataset input is experimental and may change in future versions."
@@ -2185,7 +2190,9 @@ async def get_evaluation_run(
21852190
async def create_evaluation_run(
21862191
self,
21872192
*,
2188-
dataset: Union[types.EvaluationRunDataSource, types.EvaluationDataset],
2193+
dataset: Union[
2194+
types.EvaluationRunDataSource, types.EvaluationDataset, pd.DataFrame
2195+
],
21892196
dest: str,
21902197
name: Optional[str] = None,
21912198
display_name: Optional[str] = None,
@@ -2199,7 +2206,8 @@ async def create_evaluation_run(
21992206
"""Creates an EvaluationRun.
22002207
22012208
Args:
2202-
dataset: The dataset to evaluate. Either an EvaluationRunDataSource or an EvaluationDataset.
2209+
dataset: The dataset to evaluate. Either an EvaluationRunDataSource, an
2210+
EvaluationDataset, or a pd.DataFrame.
22032211
dest: The GCS URI prefix to write the evaluation results to.
22042212
name: The name of the evaluation run.
22052213
display_name: The display name of the evaluation run.
@@ -2211,6 +2219,8 @@ async def create_evaluation_run(
22112219
Returns:
22122220
The created evaluation run.
22132221
"""
2222+
if isinstance(dataset, pd.DataFrame):
2223+
dataset = types.EvaluationDataset(eval_dataset_df=dataset)
22142224
if type(dataset).__name__ == "EvaluationDataset":
22152225
logger.warning(
22162226
"EvaluationDataset input is experimental and may change in future versions."

0 commit comments

Comments
 (0)