@@ -174,11 +174,14 @@ async def evaluate_eval_set(
174
174
175
175
failures .extend (failures_per_eval_case )
176
176
177
- assert not failures , (
178
- "Following are all the test failures. If you looking to get more"
179
- " details on the failures, then please re-run this test with"
180
- " `print_details` set to `True`.\n {}" .format ("\n " .join (failures ))
181
- )
177
+ failure_message = "Following are all the test failures."
178
+ if not print_detailed_results :
179
+ failure_message += (
180
+ " If you looking to get more details on the failures, then please"
181
+ " re-run this test with `print_detailed_results` set to `True`."
182
+ )
183
+ failure_message += "\n " + "\n " .join (failures )
184
+ assert not failures , failure_message
182
185
183
186
@staticmethod
184
187
async def evaluate (
@@ -187,6 +190,7 @@ async def evaluate(
187
190
num_runs : int = NUM_RUNS ,
188
191
agent_name : Optional [str ] = None ,
189
192
initial_session_file : Optional [str ] = None ,
193
+ print_detailed_results : bool = True ,
190
194
):
191
195
"""Evaluates an Agent given eval data.
192
196
@@ -203,6 +207,8 @@ async def evaluate(
203
207
agent_name: The name of the agent.
204
208
initial_session_file: File that contains initial session state that is
205
209
needed by all the evals in the eval dataset.
210
+ print_detailed_results: Whether to print detailed results for each metric
211
+ evaluation.
206
212
"""
207
213
test_files = []
208
214
if isinstance (eval_dataset_file_path_or_dir , str ) and os .path .isdir (
@@ -229,6 +235,7 @@ async def evaluate(
229
235
criteria = criteria ,
230
236
num_runs = num_runs ,
231
237
agent_name = agent_name ,
238
+ print_detailed_results = print_detailed_results ,
232
239
)
233
240
234
241
@staticmethod
0 commit comments