33
33
from . import cli_deploy
34
34
from .. import version
35
35
from ..evaluation .constants import MISSING_EVAL_DEPENDENCIES_MESSAGE
36
- from ..evaluation .local_eval_set_results_manager import LocalEvalSetResultsManager
37
- from ..sessions .in_memory_session_service import InMemorySessionService
38
36
from .cli import run_cli
39
37
from .fast_api import get_fast_api_app
40
38
from .utils import envs
@@ -289,7 +287,7 @@ def cli_run(
289
287
exists = True , dir_okay = True , file_okay = False , resolve_path = True
290
288
),
291
289
)
292
- @click .argument ("eval_set_file_path " , nargs = - 1 )
290
+ @click .argument ("eval_set_file_path_or_id " , nargs = - 1 )
293
291
@click .option ("--config_file_path" , help = "Optional. The path to config file." )
294
292
@click .option (
295
293
"--print_detailed_results" ,
@@ -309,7 +307,7 @@ def cli_run(
309
307
)
310
308
def cli_eval (
311
309
agent_module_file_path : str ,
312
- eval_set_file_path : list [str ],
310
+ eval_set_file_path_or_id : list [str ],
313
311
config_file_path : str ,
314
312
print_detailed_results : bool ,
315
313
eval_storage_uri : Optional [str ] = None ,
@@ -319,123 +317,188 @@ def cli_eval(
319
317
AGENT_MODULE_FILE_PATH: The path to the __init__.py file that contains a
320
318
module by the name "agent". "agent" module contains a root_agent.
321
319
322
- EVAL_SET_FILE_PATH: You can specify one or more eval set file paths.
320
+ EVAL_SET_FILE_PATH_OR_ID: You can specify one or more eval set file paths or
321
+ eval set id.
323
322
323
+ Mixing of eval set file paths with eval set ids is not allowed.
324
+
325
+ *Eval Set File Path*
324
326
For each file, all evals will be run by default.
325
327
326
328
If you want to run only specific evals from a eval set, first create a comma
327
329
separated list of eval names and then add that as a suffix to the eval set
328
330
file name, demarcated by a `:`.
329
331
330
- For example,
332
+ For example, we have `sample_eval_set_file.json` file that has following the
333
+ eval cases:
334
+ sample_eval_set_file.json:
335
+ |....... eval_1
336
+ |....... eval_2
337
+ |....... eval_3
338
+ |....... eval_4
339
+ |....... eval_5
331
340
332
341
sample_eval_set_file.json:eval_1,eval_2,eval_3
333
342
334
343
This will only run eval_1, eval_2 and eval_3 from sample_eval_set_file.json.
335
344
345
+ *Eval Set Id*
346
+ For each eval set, all evals will be run by default.
347
+
348
+ If you want to run only specific evals from a eval set, first create a comma
349
+ separated list of eval names and then add that as a suffix to the eval set
350
+ file name, demarcated by a `:`.
351
+
352
+ For example, we have `sample_eval_set_id` that has following the eval cases:
353
+ sample_eval_set_id:
354
+ |....... eval_1
355
+ |....... eval_2
356
+ |....... eval_3
357
+ |....... eval_4
358
+ |....... eval_5
359
+
360
+ If we did:
361
+ sample_eval_set_id:eval_1,eval_2,eval_3
362
+
363
+ This will only run eval_1, eval_2 and eval_3 from sample_eval_set_id.
364
+
336
365
CONFIG_FILE_PATH: The path to config file.
337
366
338
367
PRINT_DETAILED_RESULTS: Prints detailed results on the console.
339
368
"""
340
369
envs .load_dotenv_for_agent (agent_module_file_path , "." )
341
370
342
371
try :
372
+ from ..evaluation .base_eval_service import InferenceConfig
373
+ from ..evaluation .base_eval_service import InferenceRequest
374
+ from ..evaluation .eval_metrics import EvalMetric
375
+ from ..evaluation .eval_metrics import JudgeModelOptions
376
+ from ..evaluation .eval_result import EvalCaseResult
377
+ from ..evaluation .evaluator import EvalStatus
378
+ from ..evaluation .in_memory_eval_sets_manager import InMemoryEvalSetsManager
379
+ from ..evaluation .local_eval_service import LocalEvalService
380
+ from ..evaluation .local_eval_set_results_manager import LocalEvalSetResultsManager
343
381
from ..evaluation .local_eval_sets_manager import load_eval_set_from_file
344
- from .cli_eval import EvalCaseResult
345
- from .cli_eval import EvalMetric
346
- from .cli_eval import EvalStatus
382
+ from .. evaluation . local_eval_sets_manager import LocalEvalSetsManager
383
+ from .cli_eval import _collect_eval_results
384
+ from .cli_eval import _collect_inferences
347
385
from .cli_eval import get_evaluation_criteria_or_default
348
386
from .cli_eval import get_root_agent
349
387
from .cli_eval import parse_and_get_evals_to_run
350
- from .cli_eval import run_evals
351
- from .cli_eval import try_get_reset_func
352
- except ModuleNotFoundError :
353
- raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE )
388
+ except ModuleNotFoundError as mnf :
389
+ raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE ) from mnf
354
390
355
391
evaluation_criteria = get_evaluation_criteria_or_default (config_file_path )
356
392
eval_metrics = []
357
393
for metric_name , threshold in evaluation_criteria .items ():
358
394
eval_metrics .append (
359
- EvalMetric (metric_name = metric_name , threshold = threshold )
395
+ EvalMetric (
396
+ metric_name = metric_name ,
397
+ threshold = threshold ,
398
+ judge_model_options = JudgeModelOptions (),
399
+ )
360
400
)
361
401
362
402
print (f"Using evaluation criteria: { evaluation_criteria } " )
363
403
364
404
root_agent = get_root_agent (agent_module_file_path )
365
- reset_func = try_get_reset_func (agent_module_file_path )
366
-
367
- gcs_eval_sets_manager = None
405
+ app_name = os . path . basename (agent_module_file_path )
406
+ agents_dir = os . path . dirname ( agent_module_file_path )
407
+ eval_sets_manager = None
368
408
eval_set_results_manager = None
409
+
369
410
if eval_storage_uri :
370
411
gcs_eval_managers = evals .create_gcs_eval_managers_from_uri (
371
412
eval_storage_uri
372
413
)
373
- gcs_eval_sets_manager = gcs_eval_managers .eval_sets_manager
414
+ eval_sets_manager = gcs_eval_managers .eval_sets_manager
374
415
eval_set_results_manager = gcs_eval_managers .eval_set_results_manager
375
416
else :
376
- eval_set_results_manager = LocalEvalSetResultsManager (
377
- agents_dir = os .path .dirname (agent_module_file_path )
378
- )
379
- eval_set_file_path_to_evals = parse_and_get_evals_to_run (eval_set_file_path )
380
- eval_set_id_to_eval_cases = {}
381
-
382
- # Read the eval_set files and get the cases.
383
- for eval_set_file_path , eval_case_ids in eval_set_file_path_to_evals .items ():
384
- if gcs_eval_sets_manager :
385
- eval_set = gcs_eval_sets_manager ._load_eval_set_from_blob (
386
- eval_set_file_path
387
- )
388
- if not eval_set :
417
+ eval_set_results_manager = LocalEvalSetResultsManager (agents_dir = agents_dir )
418
+
419
+ inference_requests = []
420
+ eval_set_file_or_id_to_evals = parse_and_get_evals_to_run (
421
+ eval_set_file_path_or_id
422
+ )
423
+
424
+ # Check if the first entry is a file that exists, if it does then we assume
425
+ # rest of the entries are also files. We enforce this assumption in the if
426
+ # block.
427
+ if eval_set_file_or_id_to_evals and os .path .exists (
428
+ list (eval_set_file_or_id_to_evals .keys ())[0 ]
429
+ ):
430
+ eval_sets_manager = InMemoryEvalSetsManager ()
431
+
432
+ # Read the eval_set files and get the cases.
433
+ for (
434
+ eval_set_file_path ,
435
+ eval_case_ids ,
436
+ ) in eval_set_file_or_id_to_evals .items ():
437
+ try :
438
+ eval_set = load_eval_set_from_file (
439
+ eval_set_file_path , eval_set_file_path
440
+ )
441
+ except FileNotFoundError as fne :
389
442
raise click .ClickException (
390
- f"Eval set { eval_set_file_path } not found in GCS."
443
+ f"`{ eval_set_file_path } ` should be a valid eval set file."
444
+ ) from fne
445
+
446
+ eval_sets_manager .create_eval_set (
447
+ app_name = app_name , eval_set_id = eval_set .eval_set_id
448
+ )
449
+ for eval_case in eval_set .eval_cases :
450
+ eval_sets_manager .add_eval_case (
451
+ app_name = app_name ,
452
+ eval_set_id = eval_set .eval_set_id ,
453
+ eval_case = eval_case ,
391
454
)
392
- else :
393
- eval_set = load_eval_set_from_file (eval_set_file_path , eval_set_file_path )
394
- eval_cases = eval_set .eval_cases
395
-
396
- if eval_case_ids :
397
- # There are eval_ids that we should select.
398
- eval_cases = [
399
- e for e in eval_set .eval_cases if e .eval_id in eval_case_ids
400
- ]
401
-
402
- eval_set_id_to_eval_cases [eval_set .eval_set_id ] = eval_cases
403
-
404
- async def _collect_eval_results () -> list [EvalCaseResult ]:
405
- session_service = InMemorySessionService ()
406
- eval_case_results = []
407
- async for eval_case_result in run_evals (
408
- eval_set_id_to_eval_cases ,
409
- root_agent ,
410
- reset_func ,
411
- eval_metrics ,
412
- session_service = session_service ,
413
- ):
414
- eval_case_result .session_details = await session_service .get_session (
415
- app_name = os .path .basename (agent_module_file_path ),
416
- user_id = eval_case_result .user_id ,
417
- session_id = eval_case_result .session_id ,
455
+ inference_requests .append (
456
+ InferenceRequest (
457
+ app_name = app_name ,
458
+ eval_set_id = eval_set .eval_set_id ,
459
+ eval_case_ids = eval_case_ids ,
460
+ inference_config = InferenceConfig (),
461
+ )
462
+ )
463
+ else :
464
+ # We assume that what we have are eval set ids instead.
465
+ eval_sets_manager = (
466
+ eval_sets_manager
467
+ if eval_storage_uri
468
+ else LocalEvalSetsManager (agents_dir = agents_dir )
469
+ )
470
+
471
+ for eval_set_id_key , eval_case_ids in eval_set_file_or_id_to_evals .items ():
472
+ inference_requests .append (
473
+ InferenceRequest (
474
+ app_name = app_name ,
475
+ eval_set_id = eval_set_id_key ,
476
+ eval_case_ids = eval_case_ids ,
477
+ inference_config = InferenceConfig (),
478
+ )
418
479
)
419
- eval_case_results .append (eval_case_result )
420
- return eval_case_results
421
480
422
481
try :
423
- eval_results = asyncio .run (_collect_eval_results ())
424
- except ModuleNotFoundError :
425
- raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE )
426
-
427
- # Write eval set results.
428
- eval_set_id_to_eval_results = collections .defaultdict (list )
429
- for eval_case_result in eval_results :
430
- eval_set_id = eval_case_result .eval_set_id
431
- eval_set_id_to_eval_results [eval_set_id ].append (eval_case_result )
432
-
433
- for eval_set_id , eval_case_results in eval_set_id_to_eval_results .items ():
434
- eval_set_results_manager .save_eval_set_result (
435
- app_name = os .path .basename (agent_module_file_path ),
436
- eval_set_id = eval_set_id ,
437
- eval_case_results = eval_case_results ,
482
+ eval_service = LocalEvalService (
483
+ root_agent = root_agent ,
484
+ eval_sets_manager = eval_sets_manager ,
485
+ eval_set_results_manager = eval_set_results_manager ,
486
+ )
487
+
488
+ inference_results = asyncio .run (
489
+ _collect_inferences (
490
+ inference_requests = inference_requests , eval_service = eval_service
491
+ )
438
492
)
493
+ eval_results = asyncio .run (
494
+ _collect_eval_results (
495
+ inference_results = inference_results ,
496
+ eval_service = eval_service ,
497
+ eval_metrics = eval_metrics ,
498
+ )
499
+ )
500
+ except ModuleNotFoundError as mnf :
501
+ raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE ) from mnf
439
502
440
503
print ("*********************************************************************" )
441
504
eval_run_summary = {}
@@ -890,8 +953,10 @@ def cli_deploy_cloud_run(
890
953
port : int ,
891
954
trace_to_cloud : bool ,
892
955
with_ui : bool ,
893
- verbosity : str ,
894
956
adk_version : str ,
957
+ verbosity : str = "WARNING" ,
958
+ reload : bool = True ,
959
+ allow_origins : Optional [list [str ]] = None ,
895
960
log_level : Optional [str ] = None ,
896
961
session_service_uri : Optional [str ] = None ,
897
962
artifact_service_uri : Optional [str ] = None ,
@@ -923,6 +988,7 @@ def cli_deploy_cloud_run(
923
988
temp_folder = temp_folder ,
924
989
port = port ,
925
990
trace_to_cloud = trace_to_cloud ,
991
+ allow_origins = allow_origins ,
926
992
with_ui = with_ui ,
927
993
log_level = log_level ,
928
994
verbosity = verbosity ,
0 commit comments