Agenta-AI
diff --git a/‎api/oss/src/core/evaluators/service.py‎
Lines changed: 22 additions & 10 deletions b/‎api/oss/src/core/evaluators/service.py‎
Lines changed: 22 additions & 10 deletions
diff --git a/‎api/oss/src/models/api/evaluation_model.py‎
Lines changed: 1 addition & 0 deletions b/‎api/oss/src/models/api/evaluation_model.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎api/oss/src/resources/evaluators/evaluators.py‎
Lines changed: 28 additions & 0 deletions b/‎api/oss/src/resources/evaluators/evaluators.py‎
Lines changed: 28 additions & 0 deletions
@@ -1,13 +1,9 @@
 from typing import Optional, List
 from uuid import UUID, uuid4
-from json import loads
 
 from oss.src.utils.helpers import get_slug_from_name_and_id
 from oss.src.services.db_manager import fetch_evaluator_config
 from oss.src.core.workflows.dtos import (
-    WorkflowFlags,
-    WorkflowQueryFlags,
-    #
     WorkflowCreate,
     WorkflowEdit,
     WorkflowQuery,
@@ -17,8 +13,6 @@
     WorkflowVariantEdit,
     WorkflowVariantQuery,
     #
-    WorkflowRevisionData,
-    #
     WorkflowRevisionCreate,
     WorkflowRevisionEdit,
     WorkflowRevisionCommit,
@@ -35,11 +29,7 @@
     SimpleEvaluatorEdit,
     SimpleEvaluatorQuery,
     SimpleEvaluatorFlags,
-    SimpleEvaluatorQueryFlags,
-    #
     EvaluatorFlags,
-    EvaluatorQueryFlags,
-    #
     Evaluator,
     EvaluatorQuery,
     EvaluatorRevisionsLog,
@@ -1435,11 +1425,33 @@ def _transfer_evaluator_revision_data(
             else None
         )
         headers = None
+        # TODO: This function reconstructs output schemas from old evaluator settings.
+        # When fully migrating to the new workflow-based evaluator system, the output
+        # schema should be stored directly in the evaluator revision (workflow revision)
+        # at configuration time, rather than being inferred from settings here.
+        # For evaluators with dynamic outputs (auto_ai_critique, json_multi_field_match),
+        # the frontend/API should build and save the complete output schema when the
+        # user configures the evaluator.
         outputs_schema = None
         if str(old_evaluator.evaluator_key) == "auto_ai_critique":
             json_schema = old_evaluator.settings_values.get("json_schema", None)
             if json_schema and isinstance(json_schema, dict):
                 outputs_schema = json_schema.get("schema", None)
+        # Handle json_multi_field_match with dynamic field-based properties
+        if str(old_evaluator.evaluator_key) == "json_multi_field_match":
+            # Build dynamic properties based on configured fields
+            fields = old_evaluator.settings_values.get("fields", [])
+            properties = {"aggregate_score": {"type": "number"}}
+            for field in fields:
+                # Each field becomes a numeric score (0 or 1)
+                properties[field] = {"type": "number"}
+            outputs_schema = {
+                "$schema": "https://json-schema.org/draft/2020-12/schema",
+                "type": "object",
+                "properties": properties,
+                "required": ["aggregate_score"],
+                "additionalProperties": False,
+            }
         if not outputs_schema:
             properties = (
                 {"score": {"type": "number"}, "success": {"type": "boolean"}}
 
@@ -20,6 +20,7 @@ class LegacyEvaluator(BaseModel):
     oss: Optional[bool] = False
     requires_llm_api_keys: Optional[bool] = False
     tags: List[str]
+    archived: Optional[bool] = False
 
 
 class EvaluatorConfig(BaseModel):
 
@@ -332,6 +332,7 @@
         "name": "JSON Field Match",
         "key": "field_match_test",
         "direct_use": False,
+        "archived": True,  # Deprecated - use json_multi_field_match instead
         "settings_template": {
             "json_field": {
                 "label": "JSON Field",
@@ -355,6 +356,33 @@
         "oss": True,
         "tags": ["classifiers"],
     },
+    {
+        "name": "JSON Multi-Field Match",
+        "key": "json_multi_field_match",
+        "direct_use": False,
+        "settings_template": {
+            "fields": {
+                "label": "Fields to Compare",
+                "type": "fields_tags_editor",  # Custom type - tag-based add/remove editor
+                "required": True,
+                "description": "Add fields to compare using dot notation for nested paths (e.g., user.name)",
+            },
+            "correct_answer_key": {
+                "label": "Expected Answer Column",
+                "default": "correct_answer",
+                "type": "string",
+                "required": True,
+                "description": "Column name containing the expected JSON object",
+                "ground_truth_key": True,
+                "advanced": True,  # Hidden in advanced section
+            },
+        },
+        "description": "Compares configured fields in expected JSON against LLM output. Each field becomes a separate metric (0 or 1), with an aggregate_score showing the percentage of matching fields. Useful for entity extraction validation.",
+        "requires_testcase": "always",
+        "requires_trace": "always",
+        "oss": True,
+        "tags": ["classifiers"],
+    },
     {
         "name": "JSON Diff Match",
         "key": "auto_json_diff",