@@ -969,7 +969,9 @@ def evaluate(
969969 self ,
970970 * ,
971971 dataset : Union [
972- types .EvaluationDatasetOrDict , list [types .EvaluationDatasetOrDict ]
972+ pd .DataFrame ,
973+ types .EvaluationDatasetOrDict ,
974+ list [types .EvaluationDatasetOrDict ],
973975 ],
974976 metrics : list [types .MetricOrDict ] = None ,
975977 config : Optional [types .EvaluateMethodConfigOrDict ] = None ,
@@ -978,10 +980,13 @@ def evaluate(
978980 """Evaluates candidate responses in the provided dataset(s) using the specified metrics.
979981
980982 Args:
981- dataset: The dataset(s) to evaluate. Can be a single `types.EvaluationDataset` or a list of `types.EvaluationDataset`.
983+ dataset: The dataset(s) to evaluate. Can be a pandas DataFrame, a single
984+ `types.EvaluationDataset` or a list of `types.EvaluationDataset`.
982985 metrics: The list of metrics to use for evaluation.
983- config: Optional configuration for the evaluation. Can be a dictionary or a `types.EvaluateMethodConfig` object.
984- - dataset_schema: Schema to use for the dataset. If not specified, the dataset schema will be inferred from the dataset automatically.
986+ config: Optional configuration for the evaluation. Can be a dictionary or a
987+ `types.EvaluateMethodConfig` object.
988+ - dataset_schema: Schema to use for the dataset. If not specified, the
989+ dataset schema will be inferred from the dataset automatically.
985990 - dest: Destination path for storing evaluation results.
986991 **kwargs: Extra arguments to pass to evaluation, such as `agent_info`.
987992
@@ -992,6 +997,10 @@ def evaluate(
992997 config = types .EvaluateMethodConfig ()
993998 if isinstance (config , dict ):
994999 config = types .EvaluateMethodConfig .model_validate (config )
1000+
1001+ if isinstance (dataset , pd .DataFrame ):
1002+ dataset = types .EvaluationDataset (eval_dataset_df = dataset )
1003+
9951004 if isinstance (dataset , list ):
9961005 dataset = [
9971006 (
0 commit comments