Skip to content

Commit c984b9e

Browse files
ankursharmascopybara-github
authored andcommitted
feat: Add Rubric based tool use metric
The PR does two main things: 1) Introduces a new rubric based tool use metric 2) Given that we now have two rubric based metric, we refactor and create a new RubricBasedEvaluator interface. PiperOrigin-RevId: 811983514
1 parent a959653 commit c984b9e

10 files changed

+1312
-681
lines changed

src/google/adk/evaluation/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,6 @@ class EvalBaseModel(pydantic.BaseModel):
2222
model_config = pydantic.ConfigDict(
2323
alias_generator=alias_generators.to_camel,
2424
populate_by_name=True,
25-
extra='forbid',
25+
extra="forbid",
2626
arbitrary_types_allowed=True,
2727
)

src/google/adk/evaluation/eval_metrics.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ class PrebuiltMetrics(Enum):
5252
"rubric_based_final_response_quality_v1"
5353
)
5454

55+
RUBRIC_BASED_TOOL_USE_QUALITY_V1 = "rubric_based_tool_use_quality_v1"
56+
5557

5658
MetricName: TypeAlias = Union[str, PrebuiltMetrics]
5759
Threshold: TypeAlias = float

src/google/adk/evaluation/llm_as_judge.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from ..models.llm_response import LlmResponse
2727
from ..models.registry import LLMRegistry
2828
from ..utils.context_utils import Aclosing
29+
from ..utils.feature_decorator import experimental
2930
from .common import EvalBaseModel
3031
from .eval_case import Invocation
3132
from .eval_metrics import BaseCriterion
@@ -42,6 +43,7 @@ class AutoRaterScore(EvalBaseModel):
4243
rubric_scores: Optional[list[RubricScore]] = None
4344

4445

46+
@experimental
4547
class LlmAsJudge(Evaluator):
4648
"""Evaluator based on a LLM.
4749

src/google/adk/evaluation/metric_evaluator_registry.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from .final_response_match_v2 import FinalResponseMatchV2Evaluator
2626
from .response_evaluator import ResponseEvaluator
2727
from .rubric_based_final_response_quality_v1 import RubricBasedFinalResponseQualityV1Evaluator
28+
from .rubric_based_tool_use_quality_v1 import RubricBasedToolUseV1Evaluator
2829
from .safety_evaluator import SafetyEvaluatorV1
2930
from .trajectory_evaluator import TrajectoryEvaluator
3031

@@ -116,6 +117,10 @@ def _get_default_metric_evaluator_registry() -> MetricEvaluatorRegistry:
116117
metric_info=RubricBasedFinalResponseQualityV1Evaluator.get_metric_info(),
117118
evaluator=RubricBasedFinalResponseQualityV1Evaluator,
118119
)
120+
metric_evaluator_registry.register_evaluator(
121+
metric_info=RubricBasedToolUseV1Evaluator.get_metric_info(),
122+
evaluator=RubricBasedToolUseV1Evaluator,
123+
)
119124

120125
return metric_evaluator_registry
121126

0 commit comments

Comments
 (0)