Skip to content

Commit 66b9a1d

Browse files
authored
Merge pull request #3331 from Agenta-AI/feat/json-multi-field-match-evaluator
feat(evaluators): add JSON Multi-Field Match evaluator for entity extraction validation
2 parents 87fa2b5 + cc255ad commit 66b9a1d

File tree

14 files changed

+773
-93
lines changed

14 files changed

+773
-93
lines changed

api/oss/src/core/evaluators/service.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,9 @@
11
from typing import Optional, List
22
from uuid import UUID, uuid4
3-
from json import loads
43

54
from oss.src.utils.helpers import get_slug_from_name_and_id
65
from oss.src.services.db_manager import fetch_evaluator_config
76
from oss.src.core.workflows.dtos import (
8-
WorkflowFlags,
9-
WorkflowQueryFlags,
10-
#
117
WorkflowCreate,
128
WorkflowEdit,
139
WorkflowQuery,
@@ -17,8 +13,6 @@
1713
WorkflowVariantEdit,
1814
WorkflowVariantQuery,
1915
#
20-
WorkflowRevisionData,
21-
#
2216
WorkflowRevisionCreate,
2317
WorkflowRevisionEdit,
2418
WorkflowRevisionCommit,
@@ -35,11 +29,7 @@
3529
SimpleEvaluatorEdit,
3630
SimpleEvaluatorQuery,
3731
SimpleEvaluatorFlags,
38-
SimpleEvaluatorQueryFlags,
39-
#
4032
EvaluatorFlags,
41-
EvaluatorQueryFlags,
42-
#
4333
Evaluator,
4434
EvaluatorQuery,
4535
EvaluatorRevisionsLog,
@@ -1435,11 +1425,33 @@ def _transfer_evaluator_revision_data(
14351425
else None
14361426
)
14371427
headers = None
1428+
# TODO: This function reconstructs output schemas from old evaluator settings.
1429+
# When fully migrating to the new workflow-based evaluator system, the output
1430+
# schema should be stored directly in the evaluator revision (workflow revision)
1431+
# at configuration time, rather than being inferred from settings here.
1432+
# For evaluators with dynamic outputs (auto_ai_critique, json_multi_field_match),
1433+
# the frontend/API should build and save the complete output schema when the
1434+
# user configures the evaluator.
14381435
outputs_schema = None
14391436
if str(old_evaluator.evaluator_key) == "auto_ai_critique":
14401437
json_schema = old_evaluator.settings_values.get("json_schema", None)
14411438
if json_schema and isinstance(json_schema, dict):
14421439
outputs_schema = json_schema.get("schema", None)
1440+
# Handle json_multi_field_match with dynamic field-based properties
1441+
if str(old_evaluator.evaluator_key) == "json_multi_field_match":
1442+
# Build dynamic properties based on configured fields
1443+
fields = old_evaluator.settings_values.get("fields", [])
1444+
properties = {"aggregate_score": {"type": "number"}}
1445+
for field in fields:
1446+
# Each field becomes a numeric score (0 or 1)
1447+
properties[field] = {"type": "number"}
1448+
outputs_schema = {
1449+
"$schema": "https://json-schema.org/draft/2020-12/schema",
1450+
"type": "object",
1451+
"properties": properties,
1452+
"required": ["aggregate_score"],
1453+
"additionalProperties": False,
1454+
}
14431455
if not outputs_schema:
14441456
properties = (
14451457
{"score": {"type": "number"}, "success": {"type": "boolean"}}

api/oss/src/models/api/evaluation_model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class LegacyEvaluator(BaseModel):
2020
oss: Optional[bool] = False
2121
requires_llm_api_keys: Optional[bool] = False
2222
tags: List[str]
23+
archived: Optional[bool] = False
2324

2425

2526
class EvaluatorConfig(BaseModel):

api/oss/src/resources/evaluators/evaluators.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@
332332
"name": "JSON Field Match",
333333
"key": "field_match_test",
334334
"direct_use": False,
335+
"archived": True, # Deprecated - use json_multi_field_match instead
335336
"settings_template": {
336337
"json_field": {
337338
"label": "JSON Field",
@@ -355,6 +356,33 @@
355356
"oss": True,
356357
"tags": ["classifiers"],
357358
},
359+
{
360+
"name": "JSON Multi-Field Match",
361+
"key": "json_multi_field_match",
362+
"direct_use": False,
363+
"settings_template": {
364+
"fields": {
365+
"label": "Fields to Compare",
366+
"type": "fields_tags_editor", # Custom type - tag-based add/remove editor
367+
"required": True,
368+
"description": "Add fields to compare using dot notation for nested paths (e.g., user.name)",
369+
},
370+
"correct_answer_key": {
371+
"label": "Expected Answer Column",
372+
"default": "correct_answer",
373+
"type": "string",
374+
"required": True,
375+
"description": "Column name containing the expected JSON object",
376+
"ground_truth_key": True,
377+
"advanced": True, # Hidden in advanced section
378+
},
379+
},
380+
"description": "Compares configured fields in expected JSON against LLM output. Each field becomes a separate metric (0 or 1), with an aggregate_score showing the percentage of matching fields. Useful for entity extraction validation.",
381+
"requires_testcase": "always",
382+
"requires_trace": "always",
383+
"oss": True,
384+
"tags": ["classifiers"],
385+
},
358386
{
359387
"name": "JSON Diff Match",
360388
"key": "auto_json_diff",

0 commit comments

Comments
 (0)