Skip to content

Commit 8c73d29

Browse files
jcpagadora737copybara-github
authored andcommitted
feat: Add HallucinationsV1 evaluation metric
PiperOrigin-RevId: 813456369
1 parent a239716 commit 8c73d29

File tree

5 files changed

+2294
-1
lines changed

5 files changed

+2294
-1
lines changed

src/google/adk/evaluation/app_details.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class AgentDetails(EvalBaseModel):
3939
class AppDetails(EvalBaseModel):
4040
"""Contains details about the App (the agentic system).
4141
42-
This structure is only a projection of the acutal app. Only details
42+
This structure is only a projection of the actual app. Only details
4343
that are relevant to the Eval System are captured here.
4444
"""
4545

src/google/adk/evaluation/eval_metrics.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ class PrebuiltMetrics(Enum):
5252
"rubric_based_final_response_quality_v1"
5353
)
5454

55+
HALLUCINATIONS_V1 = "hallucinations_v1"
56+
5557
RUBRIC_BASED_TOOL_USE_QUALITY_V1 = "rubric_based_tool_use_quality_v1"
5658

5759

@@ -130,6 +132,24 @@ class RubricsBasedCriterion(BaseCriterion):
130132
)
131133

132134

135+
class HallucinationsCriterion(BaseCriterion):
136+
"""Criterion to use when evaluating agents response for hallucinations."""
137+
138+
judge_model_options: JudgeModelOptions = Field(
139+
default_factory=JudgeModelOptions,
140+
description="Options for the judge model.",
141+
)
142+
143+
evaluate_intermediate_nl_responses: bool = Field(
144+
default=False,
145+
description=(
146+
"Whether any intermediate NL responses should be evaluated"
147+
" for hallucinations or not. By default, the metric only evaluates"
148+
" final response from the Agent for hallucinations."
149+
),
150+
)
151+
152+
133153
class EvalMetric(EvalBaseModel):
134154
"""A metric used to evaluate a particular aspect of an eval case."""
135155

0 commit comments

Comments
 (0)