Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,23 @@
)
),
)
EXACT_MATCH_COMPUTATION_BASED_METRIC = types.EvaluationRunMetric(
metric="exact_match",
metric_config=types.UnifiedMetric(
computation_based_metric_spec=types.ComputationBasedMetricSpec(
type=types.ComputationBasedMetricType.EXACT_MATCH,
)
),
)
BLEU_COMPUTATION_BASED_METRIC = types.EvaluationRunMetric(
metric="exact_match_2",
metric_config=types.UnifiedMetric(
computation_based_metric_spec=types.ComputationBasedMetricSpec(
type=types.ComputationBasedMetricType.BLEU,
parameters={"use_effective_order": True},
)
),
)


def test_create_eval_run_data_source_evaluation_set(client):
Expand Down Expand Up @@ -74,6 +91,8 @@ def test_create_eval_run_data_source_evaluation_set(client):
UNIVERSAL_AR_METRIC,
types.RubricMetric.FINAL_RESPONSE_QUALITY,
LLM_METRIC,
EXACT_MATCH_COMPUTATION_BASED_METRIC,
BLEU_COMPUTATION_BASED_METRIC,
],
agent_info=types.evals.AgentInfo(
agent_resource_name="project/123/locations/us-central1/reasoningEngines/456",
Expand All @@ -94,7 +113,13 @@ def test_create_eval_run_data_source_evaluation_set(client):
output_config=genai_types.OutputConfig(
gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
),
metrics=[UNIVERSAL_AR_METRIC, FINAL_RESPONSE_QUALITY_METRIC, LLM_METRIC],
metrics=[
UNIVERSAL_AR_METRIC,
FINAL_RESPONSE_QUALITY_METRIC,
LLM_METRIC,
EXACT_MATCH_COMPUTATION_BASED_METRIC,
BLEU_COMPUTATION_BASED_METRIC,
],
)
assert evaluation_run.inference_configs[
"agent-1"
Expand Down
14 changes: 14 additions & 0 deletions vertexai/_genai/evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,13 @@ def _UnifiedMetric_from_vertex(
getv(from_object, ["predefinedMetricSpec"]),
)

if getv(from_object, ["computationBasedMetricSpec"]) is not None:
setv(
to_object,
["computation_based_metric_spec"],
getv(from_object, ["computationBasedMetricSpec"]),
)

return to_object


Expand Down Expand Up @@ -621,6 +628,13 @@ def _UnifiedMetric_to_vertex(
getv(from_object, ["predefined_metric_spec"]),
)

if getv(from_object, ["computation_based_metric_spec"]) is not None:
setv(
to_object,
["computationBasedMetricSpec"],
getv(from_object, ["computation_based_metric_spec"]),
)

return to_object


Expand Down
8 changes: 8 additions & 0 deletions vertexai/_genai/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@
from .common import CometResult
from .common import CometResultDict
from .common import CometResultOrDict
from .common import ComputationBasedMetricSpec
from .common import ComputationBasedMetricSpecDict
from .common import ComputationBasedMetricSpecOrDict
from .common import ComputationBasedMetricType
from .common import ContainerSpec
from .common import ContainerSpecDict
from .common import ContainerSpecOrDict
Expand Down Expand Up @@ -1067,6 +1071,9 @@
"CustomCodeExecutionSpec",
"CustomCodeExecutionSpecDict",
"CustomCodeExecutionSpecOrDict",
"ComputationBasedMetricSpec",
"ComputationBasedMetricSpecDict",
"ComputationBasedMetricSpecOrDict",
"UnifiedMetric",
"UnifiedMetricDict",
"UnifiedMetricOrDict",
Expand Down Expand Up @@ -1915,6 +1922,7 @@
"EvaluationItemType",
"SamplingMethod",
"RubricContentType",
"ComputationBasedMetricType",
"EvaluationRunState",
"OptimizeTarget",
"GenerateMemoriesResponseGeneratedMemoryAction",
Expand Down
48 changes: 48 additions & 0 deletions vertexai/_genai/types/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,21 @@ class RubricContentType(_common.CaseInSensitiveEnum):
"""Generate rubrics in a unit test format."""


class ComputationBasedMetricType(_common.CaseInSensitiveEnum):
"""Represents the type of the computation based metric."""

COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED = (
"COMPUTATION_BASED_METRIC_TYPE_UNSPECIFIED"
)
"""Computation based metric type is unspecified."""
EXACT_MATCH = "EXACT_MATCH"
"""Exact match metric."""
BLEU = "BLEU"
"""BLEU metric."""
ROUGE = "ROUGE"
"""ROUGE metric."""


class EvaluationRunState(_common.CaseInSensitiveEnum):
"""Represents the state of an evaluation run."""

Expand Down Expand Up @@ -943,6 +958,33 @@ def evaluate(instance: dict[str, Any]) -> float:
]


class ComputationBasedMetricSpec(_common.BaseModel):
"""Specification for a computation based metric."""

type: Optional[ComputationBasedMetricType] = Field(
default=None, description="""The type of the computation based metric."""
)
parameters: Optional[dict[str, Any]] = Field(
default=None,
description="""A map of parameters for the metric, e.g. {"rouge_type": "rougeL"}.""",
)


class ComputationBasedMetricSpecDict(TypedDict, total=False):
"""Specification for a computation based metric."""

type: Optional[ComputationBasedMetricType]
"""The type of the computation based metric."""

parameters: Optional[dict[str, Any]]
"""A map of parameters for the metric, e.g. {"rouge_type": "rougeL"}."""


ComputationBasedMetricSpecOrDict = Union[
ComputationBasedMetricSpec, ComputationBasedMetricSpecDict
]


class UnifiedMetric(_common.BaseModel):
"""The unified metric used for evaluation."""

Expand All @@ -964,6 +1006,9 @@ class UnifiedMetric(_common.BaseModel):
predefined_metric_spec: Optional[PredefinedMetricSpec] = Field(
default=None, description="""The spec for a pre-defined metric."""
)
computation_based_metric_spec: Optional[ComputationBasedMetricSpec] = Field(
default=None, description="""The spec for a computation based metric."""
)


class UnifiedMetricDict(TypedDict, total=False):
Expand All @@ -987,6 +1032,9 @@ class UnifiedMetricDict(TypedDict, total=False):
predefined_metric_spec: Optional[PredefinedMetricSpecDict]
"""The spec for a pre-defined metric."""

computation_based_metric_spec: Optional[ComputationBasedMetricSpecDict]
"""The spec for a computation based metric."""


UnifiedMetricOrDict = Union[UnifiedMetric, UnifiedMetricDict]

Expand Down
Loading