Skip to content

Commit 7e08808

Browse files
google-genai-botcopybara-github
authored andcommitted
feat: Expose print_detailed_results param to AgentEvaluator.evaluate
Currently `print_detailed_results` is available only in `AgentEvaluator.evaluate_eval_set` while the adk-samples data science agent uses `AgentEvaluator.evaluate` https://github.com/google/adk-samples/blob/c230c3ddc16a3f9fc7edff409b567bdd65ebd9da/python/agents/data-science/eval/test_eval.py#L32. PiperOrigin-RevId: 794262781
1 parent 1fc8d20 commit 7e08808

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

src/google/adk/evaluation/agent_evaluator.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,14 @@ async def evaluate_eval_set(
174174

175175
failures.extend(failures_per_eval_case)
176176

177-
assert not failures, (
178-
"Following are all the test failures. If you looking to get more"
179-
" details on the failures, then please re-run this test with"
180-
" `print_details` set to `True`.\n{}".format("\n".join(failures))
181-
)
177+
failure_message = "Following are all the test failures."
178+
if not print_detailed_results:
179+
failure_message += (
180+
" If you looking to get more details on the failures, then please"
181+
" re-run this test with `print_detailed_results` set to `True`."
182+
)
183+
failure_message += "\n" + "\n".join(failures)
184+
assert not failures, failure_message
182185

183186
@staticmethod
184187
async def evaluate(
@@ -187,6 +190,7 @@ async def evaluate(
187190
num_runs: int = NUM_RUNS,
188191
agent_name: Optional[str] = None,
189192
initial_session_file: Optional[str] = None,
193+
print_detailed_results: bool = True,
190194
):
191195
"""Evaluates an Agent given eval data.
192196
@@ -203,6 +207,8 @@ async def evaluate(
203207
agent_name: The name of the agent.
204208
initial_session_file: File that contains initial session state that is
205209
needed by all the evals in the eval dataset.
210+
print_detailed_results: Whether to print detailed results for each metric
211+
evaluation.
206212
"""
207213
test_files = []
208214
if isinstance(eval_dataset_file_path_or_dir, str) and os.path.isdir(
@@ -229,6 +235,7 @@ async def evaluate(
229235
criteria=criteria,
230236
num_runs=num_runs,
231237
agent_name=agent_name,
238+
print_detailed_results=print_detailed_results,
232239
)
233240

234241
@staticmethod

0 commit comments

Comments
 (0)