Agenta-AI · ashrafchowdury · Jan 9, 2026 · Dec 31, 2025 · Jan 2, 2026 · Jan 2, 2026
diff --git a/api/pyproject.toml b/api/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "api"
-version = "0.76.0"
+version = "0.77.0"
 description = "Agenta API"
 authors = [
     { name = "Mahmoud Mabrouk", email = "[email protected]" },

diff --git a/docs/blog/entries/json-multi-field-match.mdx b/docs/blog/entries/json-multi-field-match.mdx
@@ -0,0 +1,124 @@
+---
+title: "JSON Multi-Field Match Evaluator"
+slug: json-multi-field-match
+date: 2025-12-31
+tags: [v0.73.0]
+description: "Compare multiple fields between JSON objects with the new JSON Multi-Field Match evaluator. Ideal for entity extraction validation with per-field scoring and support for nested paths."
+---
+
+```mdx-code-block
+import Image from "@theme/IdealImage";
+```
+
+The JSON Multi-Field Match evaluator lets you validate multiple fields in JSON outputs simultaneously. This makes it ideal for entity extraction tasks where you need to check if your model correctly extracted name, email, address, and other structured fields.
+
+## What is JSON Multi-Field Match?
+
+This evaluator compares specific fields between your model's JSON output and the expected JSON values from your test set. Unlike the old JSON Field Match evaluator (which only checked one field), this evaluator handles any number of fields at once.
+
+For each field you configure, the evaluator produces a separate score (either 1 for a match or 0 for no match). It also calculates an aggregate score showing the percentage of fields that matched correctly.
+
+## Key Features
+
+### Multiple Field Comparison
+
+Configure as many fields as you need to validate. The evaluator checks each field independently and reports results for all of them.
+
+If you're extracting user information, you might configure fields like `name`, `email`, `phone`, and `address.city`. Each field gets its own score, so you can see exactly which extractions succeeded and which failed.
+
+### Three Path Format Options
+
+The evaluator supports three different ways to specify field paths:
+
+**Dot notation** (recommended for most cases):
+- Simple fields: `name`, `email`
+- Nested fields: `user.address.city`
+- Array indices: `items.0.name`
+
+**JSON Path** (standard JSON Path syntax):
+- Simple fields: `$.name`, `$.email`
+- Nested fields: `$.user.address.city`
+- Array indices: `$.items[0].name`
+
+**JSON Pointer** (RFC 6901):
+- Simple fields: `/name`, `/email`
+- Nested fields: `/user/address/city`
+- Array indices: `/items/0/name`
+
+All three formats work the same way. Use whichever matches your existing tooling or personal preference.
+
+### Nested Field and Array Support
+
+Access deeply nested fields and array elements without restrictions. The evaluator handles any level of nesting.
+
+### Per-Field Scoring
+
+See individual scores for each configured field in the evaluation results. This granular view helps you identify which specific extractions are working well and which need improvement.
+
+### Aggregate Score
+
+The aggregate score shows the percentage of matching fields. If you configure five fields and three match, the aggregate score is 0.6 (or 60%).
+
+## Example
+
+Suppose you're building an entity extraction model that pulls contact information from text. Your ground truth looks like this:
+
+```json
+{
+  "name": "John Doe",
+  "email": "[email protected]",
+  "phone": "555-1234",
+  "address": {
+    "city": "New York",
+    "zip": "10001"
+  }
+}
+```
+
+Your model produces this output:
+
+```json
+{
+  "name": "John Doe",
+  "email": "[email protected]",
+  "phone": "555-1234",
+  "address": {
+    "city": "New York",
+    "zip": "10002"
+  }
+}
+```
+
+You configure these fields: `["name", "email", "phone", "address.city", "address.zip"]`
+
+The evaluator returns:
+
+| Field | Score |
+| ----- | ----- |
+| `name` | 1.0 |
+| `email` | 0.0 |
+| `phone` | 1.0 |
+| `address.city` | 1.0 |
+| `address.zip` | 0.0 |
+| `aggregate_score` | 0.6 |
+
+You can see immediately that the model got the email and zip code wrong but correctly extracted the name, phone, and city.
+
+## Auto-Detection in the UI
+
+When you configure the evaluator in the web interface, Agenta automatically detects available fields from your test set data. Click to add or remove fields using a tag-based interface. This makes setup fast and reduces configuration errors.
+
+## Migration from JSON Field Match
+
+The old JSON Field Match evaluator only supported checking a single field. If you're using it, consider migrating to JSON Multi-Field Match to gain:
+
+- Support for multiple fields in one evaluator
+- Per-field scoring for detailed analysis
+- Aggregate scoring for overall performance tracking
+- Nested field and array support
+
+Existing JSON Field Match configurations continue to work. We recommend migrating to JSON Multi-Field Match for new evaluations.
+
+## Next Steps
+
+Learn more about configuring and using the JSON Multi-Field Match evaluator in the [Classification and Entity Extraction Evaluators](/evaluation/configure-evaluators/classification-entity-extraction#json-multi-field-match) documentation.
diff --git a/docs/blog/main.mdx b/docs/blog/main.mdx
@@ -11,6 +11,7 @@ import Image from "@theme/IdealImage";
 <section class="changelog">
 
 
+
 ### [Chat Sessions in Observability](/changelog/chat-sessions-observability)
 
 _9 January 2026_
@@ -28,6 +29,16 @@ The new session browser shows key metrics like total cost, latency, and token us
 
 ---
 
+### [JSON Multi-Field Match Evaluator](/changelog/json-multi-field-match)
+
+_31 December 2025_
+
+**v0.73.0**
+
+The new JSON Multi-Field Match evaluator validates multiple fields between JSON objects. Configure any number of field paths using dot notation, JSON Path, or JSON Pointer formats. Each field gets its own score (0 or 1), and an aggregate score shows the percentage of matching fields. This evaluator is ideal for entity extraction tasks like validating extracted names, emails, and addresses. The UI automatically detects fields from your test data for quick setup. This replaces the old JSON Field Match evaluator, which only supported single fields.
+
+---
+
 ### [PDF Support in the Playground](/changelog/pdf-support-in-playground)
 
 _17 December 2025_

diff --git a/docs/docs/evaluation/configure-evaluators/01-overview.mdx b/docs/docs/evaluation/configure-evaluators/01-overview.mdx
@@ -25,7 +25,7 @@ Agenta offers a growing list of pre-built evaluators suitable for most use cases
 | [Exact Match](/evaluation/configure-evaluators/classification-entity-extraction#exact-match)                | Classification/Entity Extraction | Pattern Matching   | Checks if the output exactly matches the expected result.                        |
 | [Contains JSON](/evaluation/configure-evaluators/classification-entity-extraction#contains-json)            | Classification/Entity Extraction | Pattern Matching   | Ensures the output contains valid JSON.                                          |
 | [Regex Test](/evaluation/configure-evaluators/regex-evaluator)                          | Classification/Entity Extraction | Pattern Matching   | Checks if the output matches a given regex pattern.                              |
-| [JSON Field Match](/evaluation/configure-evaluators/classification-entity-extraction#json-field-match)      | Classification/Entity Extraction | Pattern Matching   | Compares specific fields within JSON data.                                       |
+| [JSON Multi-Field Match](/evaluation/configure-evaluators/classification-entity-extraction#json-multi-field-match) | Classification/Entity Extraction | Pattern Matching   | Compares multiple fields within JSON objects and reports per-field scores.       |
 | [JSON Diff Match](/evaluation/configure-evaluators/classification-entity-extraction#json-diff-match)        | Classification/Entity Extraction | Similarity Metrics | Compares generated JSON with a ground truth JSON based on schema or values.      |
 | [Similarity Match](/evaluation/configure-evaluators/semantic-similarity#similarity-match)                   | Text Generation / Chatbot        | Similarity Metrics | Compares generated output with expected using Jaccard similarity.                |
 | [Semantic Similarity Match](/evaluation/configure-evaluators/semantic-similarity#semantic-similarity-match) | Text Generation / Chatbot        | Semantic Analysis  | Compares the meaning of the generated output with the expected result.           |

diff --git a/docs/docs/evaluation/configure-evaluators/02-classification-entity-extraction.mdx b/docs/docs/evaluation/configure-evaluators/02-classification-entity-extraction.mdx
@@ -26,20 +26,84 @@ The Contains JSON evaluator checks if the model's output contains a valid JSON s
 
 This evaluator attempts to parse the output as JSON. It returns `true` if a valid JSON structure is found within the output, and `false` otherwise.
 
-## JSON Field Match
+## JSON Field Match (Deprecated) {#json-field-match}
 
-The JSON Field Match evaluator compares specific fields within JSON data.
+:::warning Deprecated
+The JSON Field Match evaluator has been replaced by [JSON Multi-Field Match](#json-multi-field-match). The new evaluator supports multiple fields, nested paths, and provides per-field scoring. Existing configurations will continue to work, but we recommend migrating to the new evaluator.
+:::
+
+## JSON Multi-Field Match
+
+The JSON Multi-Field Match evaluator compares multiple fields between two JSON objects and reports a score for each field. This evaluator is ideal for entity extraction tasks where you need to validate that specific fields (like name, email, or address) match the expected values.
 
 ### How It Works
 
-This evaluator attempts to parse the output as JSON and extract a specified field. It then compares this field value to the correct answer. The evaluator returns `true` if the field value matches the correct answer, and `false` otherwise. Note that the value in the `correct_answer` column should be a string, not JSON.
+The evaluator parses both the model output and the ground truth as JSON. It then compares each configured field path and produces:
+
+1. A score for each field (1 if matched, 0 if not matched)
+2. An aggregate score showing the percentage of fields that matched
+
+For example, if you configure fields `["name", "email", "phone"]` and the model gets name and email correct but phone wrong, you will see:
+- `name`: 1.0
+- `email`: 1.0
+- `phone`: 0.0
+- `aggregate_score`: 0.67
+
+### Path Formats
+
+You can specify field paths in three formats:
+
+| Format | Example | Description |
+| ------ | ------- | ----------- |
+| Dot notation | `user.address.city` | Simple nested access. Use numeric indices for arrays: `items.0.name` |
+| JSON Path | `$.user.address.city` | Standard JSON Path syntax. Supports array indexing: `$.items[0].name` |
+| JSON Pointer | `/user/address/city` | RFC 6901 standard. Use numeric segments for arrays: `/items/0/name` |
+
+Dot notation is recommended for most cases. JSON Path and JSON Pointer are useful when you need compatibility with other tools.
 
 ### Configuration
 
-| Parameter            | Type   | Description                                                   |
-| -------------------- | ------ | ------------------------------------------------------------- |
-| `json_field`         | String | The name of the field in the JSON output to evaluate          |
-| `correct_answer_key` | String | The column name in the test set containing the correct answer |
+| Parameter            | Type     | Description                                                        |
+| -------------------- | -------- | ------------------------------------------------------------------ |
+| `fields`             | String[] | List of field paths to compare (e.g., `["name", "user.email"]`)    |
+| `correct_answer_key` | String   | The column name in the test set containing the expected JSON       |
+
+### Example
+
+**Ground truth** (in the `correct_answer` column):
+```json
+{
+  "name": "John Doe",
+  "email": "[email protected]",
+  "address": {
+    "city": "New York",
+    "zip": "10001"
+  }
+}
+```
+
+**Model output**:
+```json
+{
+  "name": "John Doe",
+  "email": "[email protected]",
+  "address": {
+    "city": "New York",
+    "zip": "10002"
+  }
+}
+```
+
+**Configured fields**: `["name", "email", "address.city", "address.zip"]`
+
+**Results**:
+| Field | Score |
+| ----- | ----- |
+| `name` | 1.0 |
+| `email` | 0.0 |
+| `address.city` | 1.0 |
+| `address.zip` | 0.0 |
+| `aggregate_score` | 0.5 |
 
 ## JSON Diff Match
 

diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "agenta"
-version = "0.76.0"
+version = "0.77.0"
 description = "The SDK for agenta is an open-source LLMOps platform."
 readme = "README.md"
 authors = [

diff --git a/web/ee/package.json b/web/ee/package.json
@@ -1,6 +1,6 @@
 {
     "name": "@agenta/ee",
-    "version": "0.76.0",
+    "version": "0.77.0",
     "private": true,
     "engines": {
         "node": ">=18"

diff --git a/web/oss/package.json b/web/oss/package.json
@@ -1,6 +1,6 @@
 {
     "name": "@agenta/oss",
-    "version": "0.76.0",
+    "version": "0.77.0",
     "private": true,
     "engines": {
         "node": ">=18"

diff --git a/web/oss/src/components/DrillInView/DrillInBreadcrumb.tsx b/web/oss/src/components/DrillInView/DrillInBreadcrumb.tsx
@@ -104,7 +104,7 @@ const DrillInBreadcrumb = memo(
         )
 
         return (
-            <div className="flex items-center gap-1 min-h-[32px] sticky top-0 bg-white z-10 py-2">
+            <div className="drill-in-breadcrumb flex items-center gap-1 min-h-[32px] sticky top-0 bg-white z-10 py-2">
                 {/* Fixed prefix (span navigation) - doesn't scroll */}
                 {prefix && <div className="flex-shrink-0 flex items-center">{prefix}</div>}
 

diff --git a/web/oss/src/components/DrillInView/DrillInContent.tsx b/web/oss/src/components/DrillInView/DrillInContent.tsx
@@ -704,7 +704,7 @@ export function DrillInContent({
 
                                 {/* Field content - collapsible */}
                                 {!isCollapsed && (
-                                    <div>
+                                    <div className="drill-in-field-content">
                                         {renderFieldContent({
                                             item,
                                             stringValue,

diff --git a/web/oss/src/components/Editor/DiffView.tsx b/web/oss/src/components/Editor/DiffView.tsx
@@ -322,7 +322,6 @@ const DiffView: React.FC<DiffViewProps> = ({
                 key={diffKey}
                 initialValue=""
                 language={processedContent.language}
-                validationSchema={{}}
                 additionalCodePlugins={[
                     <DiffHighlightPlugin
                         key="diff-highlight"

diff --git a/web/oss/src/components/EvalRunDetails/utils/chatMessages.ts b/web/oss/src/components/EvalRunDetails/utils/chatMessages.ts
@@ -13,7 +13,7 @@ const CHAT_ARRAY_KEYS = [
     "output_messages",
 ]
 
-const tryParseJson = (value: unknown): unknown => {
+export const tryParseJson = (value: unknown): unknown => {
     if (typeof value !== "string") return value
     try {
         return JSON.parse(value)
@@ -42,7 +42,7 @@ const isChatEntry = (entry: any): boolean => {
     return false
 }
 
-const extractMessageArray = (value: any): any[] | null => {
+export const extractMessageArray = (value: any): any[] | null => {
     if (!value) return null
     if (Array.isArray(value)) return value
     if (typeof value !== "object") return null
@@ -67,7 +67,9 @@ const extractMessageArray = (value: any): any[] | null => {
     return null
 }
 
-const normalizeMessages = (messages: any[]): {role: string; content: any; tool_calls?: any[]}[] => {
+export const normalizeMessages = (
+    messages: any[],
+): {role: string; content: any; tool_calls?: any[]}[] => {
     return messages
         .map((entry) => {
             if (!entry) return null

diff --git a/web/oss/src/components/GenericDrawer/index.tsx b/web/oss/src/components/GenericDrawer/index.tsx
@@ -27,6 +27,7 @@ const GenericDrawer = ({
                         onClick={() => props.onClose?.({} as any)}
                         type="text"
                         icon={<CloseOutlined />}
+                        {...props.closeButtonProps}
                     />
 
                     {props.expandable && (
@@ -46,6 +47,7 @@ const GenericDrawer = ({
                                     <FullscreenExitOutlined />
                                 )
                             }
+                            {...props.expandButtonProps}
                         />
                     )}
 

diff --git a/web/oss/src/components/GenericDrawer/types.d.ts b/web/oss/src/components/GenericDrawer/types.d.ts
@@ -1,9 +1,10 @@
 import {ReactNode} from "react"
 
-import {DrawerProps} from "antd"
+import {ButtonProps, DrawerProps} from "antd"
 
 export interface GenericDrawerProps extends DrawerProps {
     expandable?: boolean
+    expandButtonProps?: ButtonProps
     headerExtra?: ReactNode
     mainContent: ReactNode
     extraContent?: ReactNode
@@ -14,4 +15,5 @@ export interface GenericDrawerProps extends DrawerProps {
     mainContentDefaultSize?: number
     extraContentDefaultSize?: number
     closeOnLayoutClick?: boolean
+    closeButtonProps?: ButtonProps
 }