Skip to content

Commit 74cde53

Browse files
jsondaicopybara-github
authored andcommitted
fix: GenAI Client(evals) - Support direct pandas DataFrame dataset in evaluate()
PiperOrigin-RevId: 823070911
1 parent 59e3004 commit 74cde53

File tree

2 files changed

+54
-4
lines changed

2 files changed

+54
-4
lines changed

tests/unit/vertexai/genai/replays/test_evaluate.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,47 @@ def test_evaluation_result(client):
5454
assert case_result.response_candidate_results is not None
5555

5656

57+
def test_evaluation_byor(client):
58+
"""Tests that evaluate() with BYOR (Bring-Your-Own Response) produces a correctly structured EvaluationResult."""
59+
byor_df = pd.DataFrame(
60+
{
61+
"prompt": [
62+
"Write a simple story about a dinosaur",
63+
"Generate a poem about Vertex AI",
64+
],
65+
"response": [
66+
"Once upon a time, there was a T-Rex named Rexy.",
67+
"In clouds of code, a mind of silicon born...",
68+
],
69+
}
70+
)
71+
72+
metrics_to_run = [
73+
types.RubricMetric.GENERAL_QUALITY,
74+
]
75+
76+
evaluation_result = client.evals.evaluate(
77+
dataset=byor_df,
78+
metrics=metrics_to_run,
79+
)
80+
81+
assert isinstance(evaluation_result, types.EvaluationResult)
82+
83+
assert evaluation_result.summary_metrics is not None
84+
assert len(evaluation_result.summary_metrics) > 0
85+
for summary in evaluation_result.summary_metrics:
86+
assert isinstance(summary, types.AggregatedMetricResult)
87+
assert summary.metric_name is not None
88+
assert summary.mean_score is not None
89+
90+
assert evaluation_result.eval_case_results is not None
91+
assert len(evaluation_result.eval_case_results) > 0
92+
for case_result in evaluation_result.eval_case_results:
93+
assert isinstance(case_result, types.EvalCaseResult)
94+
assert case_result.eval_case_index is not None
95+
assert case_result.response_candidate_results is not None
96+
97+
5798
pytestmark = pytest_helper.setup(
5899
file=__file__,
59100
globals_for_file=globals(),

vertexai/_genai/evals.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -970,7 +970,9 @@ def evaluate(
970970
self,
971971
*,
972972
dataset: Union[
973-
types.EvaluationDatasetOrDict, list[types.EvaluationDatasetOrDict]
973+
pd.DataFrame,
974+
types.EvaluationDatasetOrDict,
975+
list[types.EvaluationDatasetOrDict],
974976
],
975977
metrics: list[types.MetricOrDict] = None,
976978
config: Optional[types.EvaluateMethodConfigOrDict] = None,
@@ -979,10 +981,13 @@ def evaluate(
979981
"""Evaluates candidate responses in the provided dataset(s) using the specified metrics.
980982
981983
Args:
982-
dataset: The dataset(s) to evaluate. Can be a single `types.EvaluationDataset` or a list of `types.EvaluationDataset`.
984+
dataset: The dataset(s) to evaluate. Can be a pandas DataFrame, a single
985+
`types.EvaluationDataset` or a list of `types.EvaluationDataset`.
983986
metrics: The list of metrics to use for evaluation.
984-
config: Optional configuration for the evaluation. Can be a dictionary or a `types.EvaluateMethodConfig` object.
985-
- dataset_schema: Schema to use for the dataset. If not specified, the dataset schema will be inferred from the dataset automatically.
987+
config: Optional configuration for the evaluation. Can be a dictionary or a
988+
`types.EvaluateMethodConfig` object.
989+
- dataset_schema: Schema to use for the dataset. If not specified, the
990+
dataset schema will be inferred from the dataset automatically.
986991
- dest: Destination path for storing evaluation results.
987992
**kwargs: Extra arguments to pass to evaluation, such as `agent_info`.
988993
@@ -993,6 +998,10 @@ def evaluate(
993998
config = types.EvaluateMethodConfig()
994999
if isinstance(config, dict):
9951000
config = types.EvaluateMethodConfig.model_validate(config)
1001+
1002+
if isinstance(dataset, pd.DataFrame):
1003+
dataset = types.EvaluationDataset(eval_dataset_df=dataset)
1004+
9961005
if isinstance(dataset, list):
9971006
dataset = [
9981007
(

0 commit comments

Comments
 (0)