Skip to content

Commit a54c702

Browse files
ankursharmascopybara-github
authored andcommitted
fix: Re-adding eval related changes
Due to reasons that are being investigated, some of the recent changes got unintentionally reverted. We are adding those back in this PR. PiperOrigin-RevId: 789384063
1 parent 1cfe6e9 commit a54c702

File tree

2 files changed

+245
-79
lines changed

2 files changed

+245
-79
lines changed

src/google/adk/cli/cli_tools_click.py

Lines changed: 142 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@
3333
from . import cli_deploy
3434
from .. import version
3535
from ..evaluation.constants import MISSING_EVAL_DEPENDENCIES_MESSAGE
36-
from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
37-
from ..sessions.in_memory_session_service import InMemorySessionService
3836
from .cli import run_cli
3937
from .fast_api import get_fast_api_app
4038
from .utils import envs
@@ -289,7 +287,7 @@ def cli_run(
289287
exists=True, dir_okay=True, file_okay=False, resolve_path=True
290288
),
291289
)
292-
@click.argument("eval_set_file_path", nargs=-1)
290+
@click.argument("eval_set_file_path_or_id", nargs=-1)
293291
@click.option("--config_file_path", help="Optional. The path to config file.")
294292
@click.option(
295293
"--print_detailed_results",
@@ -309,7 +307,7 @@ def cli_run(
309307
)
310308
def cli_eval(
311309
agent_module_file_path: str,
312-
eval_set_file_path: list[str],
310+
eval_set_file_path_or_id: list[str],
313311
config_file_path: str,
314312
print_detailed_results: bool,
315313
eval_storage_uri: Optional[str] = None,
@@ -319,123 +317,188 @@ def cli_eval(
319317
AGENT_MODULE_FILE_PATH: The path to the __init__.py file that contains a
320318
module by the name "agent". "agent" module contains a root_agent.
321319
322-
EVAL_SET_FILE_PATH: You can specify one or more eval set file paths.
320+
EVAL_SET_FILE_PATH_OR_ID: You can specify one or more eval set file paths or
321+
eval set id.
323322
323+
Mixing of eval set file paths with eval set ids is not allowed.
324+
325+
*Eval Set File Path*
324326
For each file, all evals will be run by default.
325327
326328
If you want to run only specific evals from a eval set, first create a comma
327329
separated list of eval names and then add that as a suffix to the eval set
328330
file name, demarcated by a `:`.
329331
330-
For example,
332+
For example, we have `sample_eval_set_file.json` file that has following the
333+
eval cases:
334+
sample_eval_set_file.json:
335+
|....... eval_1
336+
|....... eval_2
337+
|....... eval_3
338+
|....... eval_4
339+
|....... eval_5
331340
332341
sample_eval_set_file.json:eval_1,eval_2,eval_3
333342
334343
This will only run eval_1, eval_2 and eval_3 from sample_eval_set_file.json.
335344
345+
*Eval Set Id*
346+
For each eval set, all evals will be run by default.
347+
348+
If you want to run only specific evals from a eval set, first create a comma
349+
separated list of eval names and then add that as a suffix to the eval set
350+
file name, demarcated by a `:`.
351+
352+
For example, we have `sample_eval_set_id` that has following the eval cases:
353+
sample_eval_set_id:
354+
|....... eval_1
355+
|....... eval_2
356+
|....... eval_3
357+
|....... eval_4
358+
|....... eval_5
359+
360+
If we did:
361+
sample_eval_set_id:eval_1,eval_2,eval_3
362+
363+
This will only run eval_1, eval_2 and eval_3 from sample_eval_set_id.
364+
336365
CONFIG_FILE_PATH: The path to config file.
337366
338367
PRINT_DETAILED_RESULTS: Prints detailed results on the console.
339368
"""
340369
envs.load_dotenv_for_agent(agent_module_file_path, ".")
341370

342371
try:
372+
from ..evaluation.base_eval_service import InferenceConfig
373+
from ..evaluation.base_eval_service import InferenceRequest
374+
from ..evaluation.eval_metrics import EvalMetric
375+
from ..evaluation.eval_metrics import JudgeModelOptions
376+
from ..evaluation.eval_result import EvalCaseResult
377+
from ..evaluation.evaluator import EvalStatus
378+
from ..evaluation.in_memory_eval_sets_manager import InMemoryEvalSetsManager
379+
from ..evaluation.local_eval_service import LocalEvalService
380+
from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
343381
from ..evaluation.local_eval_sets_manager import load_eval_set_from_file
344-
from .cli_eval import EvalCaseResult
345-
from .cli_eval import EvalMetric
346-
from .cli_eval import EvalStatus
382+
from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager
383+
from .cli_eval import _collect_eval_results
384+
from .cli_eval import _collect_inferences
347385
from .cli_eval import get_evaluation_criteria_or_default
348386
from .cli_eval import get_root_agent
349387
from .cli_eval import parse_and_get_evals_to_run
350-
from .cli_eval import run_evals
351-
from .cli_eval import try_get_reset_func
352-
except ModuleNotFoundError:
353-
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE)
388+
except ModuleNotFoundError as mnf:
389+
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
354390

355391
evaluation_criteria = get_evaluation_criteria_or_default(config_file_path)
356392
eval_metrics = []
357393
for metric_name, threshold in evaluation_criteria.items():
358394
eval_metrics.append(
359-
EvalMetric(metric_name=metric_name, threshold=threshold)
395+
EvalMetric(
396+
metric_name=metric_name,
397+
threshold=threshold,
398+
judge_model_options=JudgeModelOptions(),
399+
)
360400
)
361401

362402
print(f"Using evaluation criteria: {evaluation_criteria}")
363403

364404
root_agent = get_root_agent(agent_module_file_path)
365-
reset_func = try_get_reset_func(agent_module_file_path)
366-
367-
gcs_eval_sets_manager = None
405+
app_name = os.path.basename(agent_module_file_path)
406+
agents_dir = os.path.dirname(agent_module_file_path)
407+
eval_sets_manager = None
368408
eval_set_results_manager = None
409+
369410
if eval_storage_uri:
370411
gcs_eval_managers = evals.create_gcs_eval_managers_from_uri(
371412
eval_storage_uri
372413
)
373-
gcs_eval_sets_manager = gcs_eval_managers.eval_sets_manager
414+
eval_sets_manager = gcs_eval_managers.eval_sets_manager
374415
eval_set_results_manager = gcs_eval_managers.eval_set_results_manager
375416
else:
376-
eval_set_results_manager = LocalEvalSetResultsManager(
377-
agents_dir=os.path.dirname(agent_module_file_path)
378-
)
379-
eval_set_file_path_to_evals = parse_and_get_evals_to_run(eval_set_file_path)
380-
eval_set_id_to_eval_cases = {}
381-
382-
# Read the eval_set files and get the cases.
383-
for eval_set_file_path, eval_case_ids in eval_set_file_path_to_evals.items():
384-
if gcs_eval_sets_manager:
385-
eval_set = gcs_eval_sets_manager._load_eval_set_from_blob(
386-
eval_set_file_path
387-
)
388-
if not eval_set:
417+
eval_set_results_manager = LocalEvalSetResultsManager(agents_dir=agents_dir)
418+
419+
inference_requests = []
420+
eval_set_file_or_id_to_evals = parse_and_get_evals_to_run(
421+
eval_set_file_path_or_id
422+
)
423+
424+
# Check if the first entry is a file that exists, if it does then we assume
425+
# rest of the entries are also files. We enforce this assumption in the if
426+
# block.
427+
if eval_set_file_or_id_to_evals and os.path.exists(
428+
list(eval_set_file_or_id_to_evals.keys())[0]
429+
):
430+
eval_sets_manager = InMemoryEvalSetsManager()
431+
432+
# Read the eval_set files and get the cases.
433+
for (
434+
eval_set_file_path,
435+
eval_case_ids,
436+
) in eval_set_file_or_id_to_evals.items():
437+
try:
438+
eval_set = load_eval_set_from_file(
439+
eval_set_file_path, eval_set_file_path
440+
)
441+
except FileNotFoundError as fne:
389442
raise click.ClickException(
390-
f"Eval set {eval_set_file_path} not found in GCS."
443+
f"`{eval_set_file_path}` should be a valid eval set file."
444+
) from fne
445+
446+
eval_sets_manager.create_eval_set(
447+
app_name=app_name, eval_set_id=eval_set.eval_set_id
448+
)
449+
for eval_case in eval_set.eval_cases:
450+
eval_sets_manager.add_eval_case(
451+
app_name=app_name,
452+
eval_set_id=eval_set.eval_set_id,
453+
eval_case=eval_case,
391454
)
392-
else:
393-
eval_set = load_eval_set_from_file(eval_set_file_path, eval_set_file_path)
394-
eval_cases = eval_set.eval_cases
395-
396-
if eval_case_ids:
397-
# There are eval_ids that we should select.
398-
eval_cases = [
399-
e for e in eval_set.eval_cases if e.eval_id in eval_case_ids
400-
]
401-
402-
eval_set_id_to_eval_cases[eval_set.eval_set_id] = eval_cases
403-
404-
async def _collect_eval_results() -> list[EvalCaseResult]:
405-
session_service = InMemorySessionService()
406-
eval_case_results = []
407-
async for eval_case_result in run_evals(
408-
eval_set_id_to_eval_cases,
409-
root_agent,
410-
reset_func,
411-
eval_metrics,
412-
session_service=session_service,
413-
):
414-
eval_case_result.session_details = await session_service.get_session(
415-
app_name=os.path.basename(agent_module_file_path),
416-
user_id=eval_case_result.user_id,
417-
session_id=eval_case_result.session_id,
455+
inference_requests.append(
456+
InferenceRequest(
457+
app_name=app_name,
458+
eval_set_id=eval_set.eval_set_id,
459+
eval_case_ids=eval_case_ids,
460+
inference_config=InferenceConfig(),
461+
)
462+
)
463+
else:
464+
# We assume that what we have are eval set ids instead.
465+
eval_sets_manager = (
466+
eval_sets_manager
467+
if eval_storage_uri
468+
else LocalEvalSetsManager(agents_dir=agents_dir)
469+
)
470+
471+
for eval_set_id_key, eval_case_ids in eval_set_file_or_id_to_evals.items():
472+
inference_requests.append(
473+
InferenceRequest(
474+
app_name=app_name,
475+
eval_set_id=eval_set_id_key,
476+
eval_case_ids=eval_case_ids,
477+
inference_config=InferenceConfig(),
478+
)
418479
)
419-
eval_case_results.append(eval_case_result)
420-
return eval_case_results
421480

422481
try:
423-
eval_results = asyncio.run(_collect_eval_results())
424-
except ModuleNotFoundError:
425-
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE)
426-
427-
# Write eval set results.
428-
eval_set_id_to_eval_results = collections.defaultdict(list)
429-
for eval_case_result in eval_results:
430-
eval_set_id = eval_case_result.eval_set_id
431-
eval_set_id_to_eval_results[eval_set_id].append(eval_case_result)
432-
433-
for eval_set_id, eval_case_results in eval_set_id_to_eval_results.items():
434-
eval_set_results_manager.save_eval_set_result(
435-
app_name=os.path.basename(agent_module_file_path),
436-
eval_set_id=eval_set_id,
437-
eval_case_results=eval_case_results,
482+
eval_service = LocalEvalService(
483+
root_agent=root_agent,
484+
eval_sets_manager=eval_sets_manager,
485+
eval_set_results_manager=eval_set_results_manager,
486+
)
487+
488+
inference_results = asyncio.run(
489+
_collect_inferences(
490+
inference_requests=inference_requests, eval_service=eval_service
491+
)
438492
)
493+
eval_results = asyncio.run(
494+
_collect_eval_results(
495+
inference_results=inference_results,
496+
eval_service=eval_service,
497+
eval_metrics=eval_metrics,
498+
)
499+
)
500+
except ModuleNotFoundError as mnf:
501+
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
439502

440503
print("*********************************************************************")
441504
eval_run_summary = {}
@@ -890,8 +953,10 @@ def cli_deploy_cloud_run(
890953
port: int,
891954
trace_to_cloud: bool,
892955
with_ui: bool,
893-
verbosity: str,
894956
adk_version: str,
957+
verbosity: str = "WARNING",
958+
reload: bool = True,
959+
allow_origins: Optional[list[str]] = None,
895960
log_level: Optional[str] = None,
896961
session_service_uri: Optional[str] = None,
897962
artifact_service_uri: Optional[str] = None,
@@ -923,6 +988,7 @@ def cli_deploy_cloud_run(
923988
temp_folder=temp_folder,
924989
port=port,
925990
trace_to_cloud=trace_to_cloud,
991+
allow_origins=allow_origins,
926992
with_ui=with_ui,
927993
log_level=log_level,
928994
verbosity=verbosity,

0 commit comments

Comments
 (0)