Skip to content

Commit aaaca36

Browse files
committed
Merge branch 'main' into AGE-3539-/-frontend-unify-evaluation-result-overview-page
2 parents 18a430e + 30f9fb4 commit aaaca36

File tree

46 files changed

+1517
-384
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1517
-384
lines changed

.all-contributorsrc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,15 @@
512512
"doc",
513513
"example"
514514
]
515+
},
516+
{
517+
"login": "adityadewan22-hub",
518+
"name": "adityadewan22-hub",
519+
"avatar_url": "https://avatars.githubusercontent.com/u/225586510?v=4",
520+
"profile": "https://github.com/adityadewan22-hub",
521+
"contributions": [
522+
"code"
523+
]
515524
}
516525
],
517526
"contributorsPerLine": 7,

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ We welcome contributions of all kinds — from filing issues and sharing ideas t
191191
## Contributors ✨
192192

193193
<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
194-
[![All Contributors](https://img.shields.io/badge/all_contributors-54-orange.svg?style=flat-square)](#contributors-)
194+
[![All Contributors](https://img.shields.io/badge/all_contributors-55-orange.svg?style=flat-square)](#contributors-)
195195
<!-- ALL-CONTRIBUTORS-BADGE:END -->
196196

197197
Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
@@ -270,6 +270,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
270270
<td align="center" valign="top" width="14.28%"><a href="https://github.com/VahantSharma"><img src="https://avatars.githubusercontent.com/u/172914890?v=4?s=100" width="100px;" alt="Vahant Sharma"/><br /><sub><b>Vahant Sharma</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=VahantSharma" title="Documentation">📖</a></td>
271271
<td align="center" valign="top" width="14.28%"><a href="https://github.com/muzman123"><img src="https://avatars.githubusercontent.com/u/66068301?v=4?s=100" width="100px;" alt="Muhammad Muzammil"/><br /><sub><b>Muhammad Muzammil</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=muzman123" title="Code">💻</a></td>
272272
<td align="center" valign="top" width="14.28%"><a href="https://github.com/CyrusNamjoo"><img src="https://avatars.githubusercontent.com/u/209579763?v=4?s=100" width="100px;" alt="Sirous Namjoo"/><br /><sub><b>Sirous Namjoo</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=CyrusNamjoo" title="Documentation">📖</a> <a href="#example-CyrusNamjoo" title="Examples">💡</a></td>
273+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/adityadewan22-hub"><img src="https://avatars.githubusercontent.com/u/225586510?v=4?s=100" width="100px;" alt="adityadewan22-hub"/><br /><sub><b>adityadewan22-hub</b></sub></a><br /><a href="https://github.com/Agenta-AI/agenta/commits?author=adityadewan22-hub" title="Code">💻</a></td>
273274
</tr>
274275
</tbody>
275276
</table>

api/oss/src/core/evaluators/service.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,9 @@
11
from typing import Optional, List
22
from uuid import UUID, uuid4
3-
from json import loads
43

54
from oss.src.utils.helpers import get_slug_from_name_and_id
65
from oss.src.services.db_manager import fetch_evaluator_config
76
from oss.src.core.workflows.dtos import (
8-
WorkflowFlags,
9-
WorkflowQueryFlags,
10-
#
117
WorkflowCreate,
128
WorkflowEdit,
139
WorkflowQuery,
@@ -17,8 +13,6 @@
1713
WorkflowVariantEdit,
1814
WorkflowVariantQuery,
1915
#
20-
WorkflowRevisionData,
21-
#
2216
WorkflowRevisionCreate,
2317
WorkflowRevisionEdit,
2418
WorkflowRevisionCommit,
@@ -35,11 +29,7 @@
3529
SimpleEvaluatorEdit,
3630
SimpleEvaluatorQuery,
3731
SimpleEvaluatorFlags,
38-
SimpleEvaluatorQueryFlags,
39-
#
4032
EvaluatorFlags,
41-
EvaluatorQueryFlags,
42-
#
4333
Evaluator,
4434
EvaluatorQuery,
4535
EvaluatorRevisionsLog,
@@ -1435,11 +1425,33 @@ def _transfer_evaluator_revision_data(
14351425
else None
14361426
)
14371427
headers = None
1428+
# TODO: This function reconstructs output schemas from old evaluator settings.
1429+
# When fully migrating to the new workflow-based evaluator system, the output
1430+
# schema should be stored directly in the evaluator revision (workflow revision)
1431+
# at configuration time, rather than being inferred from settings here.
1432+
# For evaluators with dynamic outputs (auto_ai_critique, json_multi_field_match),
1433+
# the frontend/API should build and save the complete output schema when the
1434+
# user configures the evaluator.
14381435
outputs_schema = None
14391436
if str(old_evaluator.evaluator_key) == "auto_ai_critique":
14401437
json_schema = old_evaluator.settings_values.get("json_schema", None)
14411438
if json_schema and isinstance(json_schema, dict):
14421439
outputs_schema = json_schema.get("schema", None)
1440+
# Handle json_multi_field_match with dynamic field-based properties
1441+
if str(old_evaluator.evaluator_key) == "json_multi_field_match":
1442+
# Build dynamic properties based on configured fields
1443+
fields = old_evaluator.settings_values.get("fields", [])
1444+
properties = {"aggregate_score": {"type": "number"}}
1445+
for field in fields:
1446+
# Each field becomes a numeric score (0 or 1)
1447+
properties[field] = {"type": "number"}
1448+
outputs_schema = {
1449+
"$schema": "https://json-schema.org/draft/2020-12/schema",
1450+
"type": "object",
1451+
"properties": properties,
1452+
"required": ["aggregate_score"],
1453+
"additionalProperties": False,
1454+
}
14431455
if not outputs_schema:
14441456
properties = (
14451457
{"score": {"type": "number"}, "success": {"type": "boolean"}}

api/oss/src/models/api/evaluation_model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class LegacyEvaluator(BaseModel):
2020
oss: Optional[bool] = False
2121
requires_llm_api_keys: Optional[bool] = False
2222
tags: List[str]
23+
archived: Optional[bool] = False
2324

2425

2526
class EvaluatorConfig(BaseModel):

api/oss/src/resources/evaluators/evaluators.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@
332332
"name": "JSON Field Match",
333333
"key": "field_match_test",
334334
"direct_use": False,
335+
"archived": True, # Deprecated - use json_multi_field_match instead
335336
"settings_template": {
336337
"json_field": {
337338
"label": "JSON Field",
@@ -355,6 +356,33 @@
355356
"oss": True,
356357
"tags": ["classifiers"],
357358
},
359+
{
360+
"name": "JSON Multi-Field Match",
361+
"key": "json_multi_field_match",
362+
"direct_use": False,
363+
"settings_template": {
364+
"fields": {
365+
"label": "Fields to Compare",
366+
"type": "fields_tags_editor", # Custom type - tag-based add/remove editor
367+
"required": True,
368+
"description": "Add fields to compare using dot notation for nested paths (e.g., user.name)",
369+
},
370+
"correct_answer_key": {
371+
"label": "Expected Answer Column",
372+
"default": "correct_answer",
373+
"type": "string",
374+
"required": True,
375+
"description": "Column name containing the expected JSON object",
376+
"ground_truth_key": True,
377+
"advanced": True, # Hidden in advanced section
378+
},
379+
},
380+
"description": "Compares configured fields in expected JSON against LLM output. Each field becomes a separate metric (0 or 1), with an aggregate_score showing the percentage of matching fields. Useful for entity extraction validation.",
381+
"requires_testcase": "always",
382+
"requires_trace": "always",
383+
"oss": True,
384+
"tags": ["classifiers"],
385+
},
358386
{
359387
"name": "JSON Diff Match",
360388
"key": "auto_json_diff",

0 commit comments

Comments
 (0)