3333from . import cli_deploy
3434from .. import version
3535from ..evaluation .constants import MISSING_EVAL_DEPENDENCIES_MESSAGE
36- from ..evaluation .local_eval_set_results_manager import LocalEvalSetResultsManager
37- from ..sessions .in_memory_session_service import InMemorySessionService
3836from .cli import run_cli
3937from .fast_api import get_fast_api_app
4038from .utils import envs
@@ -289,7 +287,7 @@ def cli_run(
289287 exists = True , dir_okay = True , file_okay = False , resolve_path = True
290288 ),
291289)
292- @click .argument ("eval_set_file_path " , nargs = - 1 )
290+ @click .argument ("eval_set_file_path_or_id " , nargs = - 1 )
293291@click .option ("--config_file_path" , help = "Optional. The path to config file." )
294292@click .option (
295293 "--print_detailed_results" ,
@@ -309,7 +307,7 @@ def cli_run(
309307)
310308def cli_eval (
311309 agent_module_file_path : str ,
312- eval_set_file_path : list [str ],
310+ eval_set_file_path_or_id : list [str ],
313311 config_file_path : str ,
314312 print_detailed_results : bool ,
315313 eval_storage_uri : Optional [str ] = None ,
@@ -319,123 +317,188 @@ def cli_eval(
319317 AGENT_MODULE_FILE_PATH: The path to the __init__.py file that contains a
320318 module by the name "agent". "agent" module contains a root_agent.
321319
322- EVAL_SET_FILE_PATH: You can specify one or more eval set file paths.
320+ EVAL_SET_FILE_PATH_OR_ID: You can specify one or more eval set file paths or
321+ eval set id.
323322
323+ Mixing of eval set file paths with eval set ids is not allowed.
324+
325+ *Eval Set File Path*
324326 For each file, all evals will be run by default.
325327
326328 If you want to run only specific evals from a eval set, first create a comma
327329 separated list of eval names and then add that as a suffix to the eval set
328330 file name, demarcated by a `:`.
329331
330- For example,
332+ For example, we have `sample_eval_set_file.json` file that has following the
333+ eval cases:
334+ sample_eval_set_file.json:
335+ |....... eval_1
336+ |....... eval_2
337+ |....... eval_3
338+ |....... eval_4
339+ |....... eval_5
331340
332341 sample_eval_set_file.json:eval_1,eval_2,eval_3
333342
334343 This will only run eval_1, eval_2 and eval_3 from sample_eval_set_file.json.
335344
345+ *Eval Set Id*
346+ For each eval set, all evals will be run by default.
347+
348+ If you want to run only specific evals from a eval set, first create a comma
349+ separated list of eval names and then add that as a suffix to the eval set
350+ file name, demarcated by a `:`.
351+
352+ For example, we have `sample_eval_set_id` that has following the eval cases:
353+ sample_eval_set_id:
354+ |....... eval_1
355+ |....... eval_2
356+ |....... eval_3
357+ |....... eval_4
358+ |....... eval_5
359+
360+ If we did:
361+ sample_eval_set_id:eval_1,eval_2,eval_3
362+
363+ This will only run eval_1, eval_2 and eval_3 from sample_eval_set_id.
364+
336365 CONFIG_FILE_PATH: The path to config file.
337366
338367 PRINT_DETAILED_RESULTS: Prints detailed results on the console.
339368 """
340369 envs .load_dotenv_for_agent (agent_module_file_path , "." )
341370
342371 try :
372+ from ..evaluation .base_eval_service import InferenceConfig
373+ from ..evaluation .base_eval_service import InferenceRequest
374+ from ..evaluation .eval_metrics import EvalMetric
375+ from ..evaluation .eval_metrics import JudgeModelOptions
376+ from ..evaluation .eval_result import EvalCaseResult
377+ from ..evaluation .evaluator import EvalStatus
378+ from ..evaluation .in_memory_eval_sets_manager import InMemoryEvalSetsManager
379+ from ..evaluation .local_eval_service import LocalEvalService
380+ from ..evaluation .local_eval_set_results_manager import LocalEvalSetResultsManager
343381 from ..evaluation .local_eval_sets_manager import load_eval_set_from_file
344- from .cli_eval import EvalCaseResult
345- from .cli_eval import EvalMetric
346- from .cli_eval import EvalStatus
382+ from .. evaluation . local_eval_sets_manager import LocalEvalSetsManager
383+ from .cli_eval import _collect_eval_results
384+ from .cli_eval import _collect_inferences
347385 from .cli_eval import get_evaluation_criteria_or_default
348386 from .cli_eval import get_root_agent
349387 from .cli_eval import parse_and_get_evals_to_run
350- from .cli_eval import run_evals
351- from .cli_eval import try_get_reset_func
352- except ModuleNotFoundError :
353- raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE )
388+ except ModuleNotFoundError as mnf :
389+ raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE ) from mnf
354390
355391 evaluation_criteria = get_evaluation_criteria_or_default (config_file_path )
356392 eval_metrics = []
357393 for metric_name , threshold in evaluation_criteria .items ():
358394 eval_metrics .append (
359- EvalMetric (metric_name = metric_name , threshold = threshold )
395+ EvalMetric (
396+ metric_name = metric_name ,
397+ threshold = threshold ,
398+ judge_model_options = JudgeModelOptions (),
399+ )
360400 )
361401
362402 print (f"Using evaluation criteria: { evaluation_criteria } " )
363403
364404 root_agent = get_root_agent (agent_module_file_path )
365- reset_func = try_get_reset_func (agent_module_file_path )
366-
367- gcs_eval_sets_manager = None
405+ app_name = os . path . basename (agent_module_file_path )
406+ agents_dir = os . path . dirname ( agent_module_file_path )
407+ eval_sets_manager = None
368408 eval_set_results_manager = None
409+
369410 if eval_storage_uri :
370411 gcs_eval_managers = evals .create_gcs_eval_managers_from_uri (
371412 eval_storage_uri
372413 )
373- gcs_eval_sets_manager = gcs_eval_managers .eval_sets_manager
414+ eval_sets_manager = gcs_eval_managers .eval_sets_manager
374415 eval_set_results_manager = gcs_eval_managers .eval_set_results_manager
375416 else :
376- eval_set_results_manager = LocalEvalSetResultsManager (
377- agents_dir = os .path .dirname (agent_module_file_path )
378- )
379- eval_set_file_path_to_evals = parse_and_get_evals_to_run (eval_set_file_path )
380- eval_set_id_to_eval_cases = {}
381-
382- # Read the eval_set files and get the cases.
383- for eval_set_file_path , eval_case_ids in eval_set_file_path_to_evals .items ():
384- if gcs_eval_sets_manager :
385- eval_set = gcs_eval_sets_manager ._load_eval_set_from_blob (
386- eval_set_file_path
387- )
388- if not eval_set :
417+ eval_set_results_manager = LocalEvalSetResultsManager (agents_dir = agents_dir )
418+
419+ inference_requests = []
420+ eval_set_file_or_id_to_evals = parse_and_get_evals_to_run (
421+ eval_set_file_path_or_id
422+ )
423+
424+ # Check if the first entry is a file that exists, if it does then we assume
425+ # rest of the entries are also files. We enforce this assumption in the if
426+ # block.
427+ if eval_set_file_or_id_to_evals and os .path .exists (
428+ list (eval_set_file_or_id_to_evals .keys ())[0 ]
429+ ):
430+ eval_sets_manager = InMemoryEvalSetsManager ()
431+
432+ # Read the eval_set files and get the cases.
433+ for (
434+ eval_set_file_path ,
435+ eval_case_ids ,
436+ ) in eval_set_file_or_id_to_evals .items ():
437+ try :
438+ eval_set = load_eval_set_from_file (
439+ eval_set_file_path , eval_set_file_path
440+ )
441+ except FileNotFoundError as fne :
389442 raise click .ClickException (
390- f"Eval set { eval_set_file_path } not found in GCS."
443+ f"`{ eval_set_file_path } ` should be a valid eval set file."
444+ ) from fne
445+
446+ eval_sets_manager .create_eval_set (
447+ app_name = app_name , eval_set_id = eval_set .eval_set_id
448+ )
449+ for eval_case in eval_set .eval_cases :
450+ eval_sets_manager .add_eval_case (
451+ app_name = app_name ,
452+ eval_set_id = eval_set .eval_set_id ,
453+ eval_case = eval_case ,
391454 )
392- else :
393- eval_set = load_eval_set_from_file (eval_set_file_path , eval_set_file_path )
394- eval_cases = eval_set .eval_cases
395-
396- if eval_case_ids :
397- # There are eval_ids that we should select.
398- eval_cases = [
399- e for e in eval_set .eval_cases if e .eval_id in eval_case_ids
400- ]
401-
402- eval_set_id_to_eval_cases [eval_set .eval_set_id ] = eval_cases
403-
404- async def _collect_eval_results () -> list [EvalCaseResult ]:
405- session_service = InMemorySessionService ()
406- eval_case_results = []
407- async for eval_case_result in run_evals (
408- eval_set_id_to_eval_cases ,
409- root_agent ,
410- reset_func ,
411- eval_metrics ,
412- session_service = session_service ,
413- ):
414- eval_case_result .session_details = await session_service .get_session (
415- app_name = os .path .basename (agent_module_file_path ),
416- user_id = eval_case_result .user_id ,
417- session_id = eval_case_result .session_id ,
455+ inference_requests .append (
456+ InferenceRequest (
457+ app_name = app_name ,
458+ eval_set_id = eval_set .eval_set_id ,
459+ eval_case_ids = eval_case_ids ,
460+ inference_config = InferenceConfig (),
461+ )
462+ )
463+ else :
464+ # We assume that what we have are eval set ids instead.
465+ eval_sets_manager = (
466+ eval_sets_manager
467+ if eval_storage_uri
468+ else LocalEvalSetsManager (agents_dir = agents_dir )
469+ )
470+
471+ for eval_set_id_key , eval_case_ids in eval_set_file_or_id_to_evals .items ():
472+ inference_requests .append (
473+ InferenceRequest (
474+ app_name = app_name ,
475+ eval_set_id = eval_set_id_key ,
476+ eval_case_ids = eval_case_ids ,
477+ inference_config = InferenceConfig (),
478+ )
418479 )
419- eval_case_results .append (eval_case_result )
420- return eval_case_results
421480
422481 try :
423- eval_results = asyncio .run (_collect_eval_results ())
424- except ModuleNotFoundError :
425- raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE )
426-
427- # Write eval set results.
428- eval_set_id_to_eval_results = collections .defaultdict (list )
429- for eval_case_result in eval_results :
430- eval_set_id = eval_case_result .eval_set_id
431- eval_set_id_to_eval_results [eval_set_id ].append (eval_case_result )
432-
433- for eval_set_id , eval_case_results in eval_set_id_to_eval_results .items ():
434- eval_set_results_manager .save_eval_set_result (
435- app_name = os .path .basename (agent_module_file_path ),
436- eval_set_id = eval_set_id ,
437- eval_case_results = eval_case_results ,
482+ eval_service = LocalEvalService (
483+ root_agent = root_agent ,
484+ eval_sets_manager = eval_sets_manager ,
485+ eval_set_results_manager = eval_set_results_manager ,
486+ )
487+
488+ inference_results = asyncio .run (
489+ _collect_inferences (
490+ inference_requests = inference_requests , eval_service = eval_service
491+ )
438492 )
493+ eval_results = asyncio .run (
494+ _collect_eval_results (
495+ inference_results = inference_results ,
496+ eval_service = eval_service ,
497+ eval_metrics = eval_metrics ,
498+ )
499+ )
500+ except ModuleNotFoundError as mnf :
501+ raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE ) from mnf
439502
440503 print ("*********************************************************************" )
441504 eval_run_summary = {}
@@ -890,8 +953,10 @@ def cli_deploy_cloud_run(
890953 port : int ,
891954 trace_to_cloud : bool ,
892955 with_ui : bool ,
893- verbosity : str ,
894956 adk_version : str ,
957+ verbosity : str = "WARNING" ,
958+ reload : bool = True ,
959+ allow_origins : Optional [list [str ]] = None ,
895960 log_level : Optional [str ] = None ,
896961 session_service_uri : Optional [str ] = None ,
897962 artifact_service_uri : Optional [str ] = None ,
@@ -923,6 +988,7 @@ def cli_deploy_cloud_run(
923988 temp_folder = temp_folder ,
924989 port = port ,
925990 trace_to_cloud = trace_to_cloud ,
991+ allow_origins = allow_origins ,
926992 with_ui = with_ui ,
927993 log_level = log_level ,
928994 verbosity = verbosity ,
0 commit comments