From 0cabd261e5108dfecc9fe281da59735875f21184 Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Wed, 14 Jan 2026 15:25:55 +0100
Subject: [PATCH 01/20] fix: prevent unnecessary form sync during JSON schema
 updates

---
 .../components/ConfigureEvaluator/index.tsx           |  9 +++++++--
 .../JSONSchema/JSONSchemaEditor.tsx                   |  7 +++++++
 .../EvaluatorsModal/ConfigureEvaluator/index.tsx      | 11 +++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx b/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx
index f21209048..ca07709a5 100644
--- a/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx
+++ b/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx
@@ -21,6 +21,7 @@ import {useRouter} from "next/router"
 import {message} from "@/oss/components/AppMessageContext"
 import {
     initPlaygroundAtom,
+    playgroundEditValuesAtom,
     resetPlaygroundAtom,
 } from "@/oss/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms"
 import useURL from "@/oss/hooks/useURL"
@@ -52,11 +53,15 @@ const ConfigureEvaluatorPage = ({evaluatorId}: {evaluatorId?: string | null}) =>
     // Atom actions
     const initPlayground = useSetAtom(initPlaygroundAtom)
     const resetPlayground = useSetAtom(resetPlaygroundAtom)
+    const stagedConfig = useAtomValue(playgroundEditValuesAtom)
 
     const existingConfig = useMemo(() => {
         if (!evaluatorId) return null
-        return evaluatorConfigs.find((config) => config.id === evaluatorId) ?? null
-    }, [evaluatorConfigs, evaluatorId])
+        return (
+            evaluatorConfigs.find((config) => config.id === evaluatorId) ??
+            (stagedConfig?.id === evaluatorId ? stagedConfig : null)
+        )
+    }, [evaluatorConfigs, evaluatorId, stagedConfig])
 
     const evaluatorKey = existingConfig?.evaluator_key ?? evaluatorId ?? null
 
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
index 6aff01d8c..3f7984b1e 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
@@ -78,6 +78,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
     })
 
     const lastSyncedValueRef = useRef<string | undefined>(undefined)
+    const skipSyncRef = useRef(false)
 
     const namePath = useMemo(() => (Array.isArray(name) ? name : [name]), [name])
 
@@ -133,6 +134,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         if (!defaultValue) {
             setSupportsBasicMode(true)
             setRawSchema("")
+            skipSyncRef.current = true
             return
         }
 
@@ -145,6 +147,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
         setSupportsBasicMode(isSchemaCompatibleWithBasicMode(defaultValue))
         setRawSchema(defaultValue)
+        skipSyncRef.current = true
     }, [defaultValue, applyParsedConfig])
 
     useEffect(() => {
@@ -155,6 +158,10 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
     // Update form when basic mode changes
     useEffect(() => {
+        if (skipSyncRef.current) {
+            skipSyncRef.current = false
+            return
+        }
         if (mode === "basic" && supportsBasicMode) {
             const config: SchemaConfig = {
                 responseFormat,
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
index 0cae1a371..331afe085 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
@@ -5,10 +5,12 @@ import {ArrowLeft, Info, SidebarSimple} from "@phosphor-icons/react"
 import {Button, Form, Input, Space, Tag, Tooltip, Typography} from "antd"
 import {useAtomValue, useSetAtom} from "jotai"
 import dynamic from "next/dynamic"
+import {useRouter} from "next/router"
 import {createUseStyles} from "react-jss"
 
 import {message} from "@/oss/components/AppMessageContext"
 import {useAppId} from "@/oss/hooks/useAppId"
+import useURL from "@/oss/hooks/useURL"
 import {EvaluationSettingsTemplate, JSSTheme, SettingsPreset} from "@/oss/lib/Types"
 import {
     CreateEvaluationConfigData,
@@ -127,6 +129,8 @@ const ConfigureEvaluator = ({
     const routeAppId = useAppId()
     const apps = useAppList()
     const appId = routeAppId ?? apps?.[0]?.app_id
+    const router = useRouter()
+    const {projectURL} = useURL()
     const classes = useStyles()
 
     // ================================================================
@@ -352,6 +356,13 @@ const ConfigureEvaluator = ({
                 if (createdConfig) {
                     // Use commitPlayground to update state and switch to edit mode
                     commitPlayground(createdConfig)
+                    if (uiVariant === "page" && createdConfig.id) {
+                        await router.replace(
+                            `${projectURL}/evaluators/configure/${encodeURIComponent(
+                                createdConfig.id,
+                            )}`,
+                        )
+                    }
                 }
             }
 

From 3fda588bf9c16167ec5ed6ba4530f0ecdca5c637 Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Thu, 15 Jan 2026 10:12:28 +0100
Subject: [PATCH 02/20] fix: improve JSON schema parsing and synchronization in
 JSONSchemaEditor

---
 .../JSONSchema/JSONSchemaEditor.tsx           | 44 ++++++++++---------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
index 3f7984b1e..9e50da98f 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
@@ -78,10 +78,21 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
     })
 
     const lastSyncedValueRef = useRef<string | undefined>(undefined)
-    const skipSyncRef = useRef(false)
 
     const namePath = useMemo(() => (Array.isArray(name) ? name : [name]), [name])
 
+    const parseSchemaObject = useCallback((value: string) => {
+        try {
+            const parsed = JSON.parse(value)
+            if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
+                return null
+            }
+            return parsed as Record<string, unknown>
+        } catch {
+            return null
+        }
+    }, [])
+
     const applyParsedConfig = useCallback((parsed: SchemaConfig) => {
         setResponseFormat(parsed.responseFormat)
         setIncludeReasoning(parsed.includeReasoning)
@@ -99,14 +110,14 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
     }, [])
 
     const syncFormValue = useCallback(
-        (value: string) => {
-            const current = form.getFieldValue(namePath)
-            if (current === value && lastSyncedValueRef.current === value) return
+        (value: string, parsedValue?: Record<string, unknown> | null) => {
+            if (lastSyncedValueRef.current === value) return
 
-            form.setFieldValue(namePath, value)
+            const nextValue = parsedValue ?? parseSchemaObject(value) ?? value
+            form.setFieldValue(namePath, nextValue)
             lastSyncedValueRef.current = value
         },
-        [form, namePath],
+        [form, namePath, parseSchemaObject],
     )
 
     const getDefaultConfig = useCallback((): SchemaConfig => {
@@ -121,9 +132,10 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
     const applyConfigAndSync = useCallback(
         (config: SchemaConfig) => {
             applyParsedConfig(config)
-            const schemaString = JSON.stringify(generateJSONSchema(config), null, 2)
+            const schema = generateJSONSchema(config)
+            const schemaString = JSON.stringify(schema, null, 2)
             setRawSchema(schemaString)
-            syncFormValue(schemaString)
+            syncFormValue(schemaString, schema)
             setSupportsBasicMode(true)
         },
         [applyParsedConfig, syncFormValue],
@@ -134,7 +146,6 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         if (!defaultValue) {
             setSupportsBasicMode(true)
             setRawSchema("")
-            skipSyncRef.current = true
             return
         }
 
@@ -147,7 +158,6 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
         setSupportsBasicMode(isSchemaCompatibleWithBasicMode(defaultValue))
         setRawSchema(defaultValue)
-        skipSyncRef.current = true
     }, [defaultValue, applyParsedConfig])
 
     useEffect(() => {
@@ -158,10 +168,6 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
     // Update form when basic mode changes
     useEffect(() => {
-        if (skipSyncRef.current) {
-            skipSyncRef.current = false
-            return
-        }
         if (mode === "basic" && supportsBasicMode) {
             const config: SchemaConfig = {
                 responseFormat,
@@ -172,7 +178,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
             const schema = generateJSONSchema(config)
             const schemaString = JSON.stringify(schema, null, 2)
 
-            syncFormValue(schemaString)
+            syncFormValue(schemaString, schema)
         }
     }, [
         mode,
@@ -200,7 +206,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
             const schema = generateJSONSchema(config)
             const schemaString = JSON.stringify(schema, null, 2)
             setRawSchema(schemaString)
-            syncFormValue(schemaString)
+            syncFormValue(schemaString, schema)
             setSupportsBasicMode(true)
             setMode("advanced")
             return
@@ -273,11 +279,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                                     value ? isSchemaCompatibleWithBasicMode(value) : false,
                                 )
 
-                                if (Array.isArray(name)) {
-                                    form.setFieldValue(name, value)
-                                } else {
-                                    form.setFieldValue([name], value)
-                                }
+                                syncFormValue(value)
                             }
                         }}
                         editorProps={{

From 66f839ad802ada5f7b2a0a6272bf8ace6c9e849b Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Thu, 15 Jan 2026 11:47:12 +0100
Subject: [PATCH 03/20] added console log

---
 .../autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
index 331afe085..beef41885 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
@@ -154,6 +154,7 @@ const ConfigureEvaluator = ({
         null,
     )
     const [form] = Form.useForm()
+    console.log("ConfigureEvaluator: ", {form: form.getFieldsValue()})
     const [submitLoading, setSubmitLoading] = useState(false)
 
     // Store form ref in atom so DebugSection can access it

From ee481b7f69b15f325f71d1166aa8084ce47800c5 Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Thu, 15 Jan 2026 12:18:59 +0100
Subject: [PATCH 04/20] fix

---
 web/oss/src/services/evaluators/index.ts | 31 ++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/web/oss/src/services/evaluators/index.ts b/web/oss/src/services/evaluators/index.ts
index 2a9bb15de..30e0f5a00 100644
--- a/web/oss/src/services/evaluators/index.ts
+++ b/web/oss/src/services/evaluators/index.ts
@@ -103,6 +103,22 @@ export const fetchAllEvaluators = async (includeArchived = false) => {
 }
 
 // Evaluator Configs
+const normalizeSettingsValues = (settingsValues?: Record<string, any> | null) => {
+    if (!settingsValues) return settingsValues
+    const jsonSchema = settingsValues.json_schema
+    if (typeof jsonSchema !== "string") return settingsValues
+
+    try {
+        const parsed = JSON.parse(jsonSchema)
+        if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
+            return settingsValues
+        }
+        return {...settingsValues, json_schema: parsed}
+    } catch {
+        return settingsValues
+    }
+}
+
 export const fetchAllEvaluatorConfigs = async (
     appId?: string | null,
     projectIdOverride?: string | null,
@@ -123,6 +139,7 @@ export const fetchAllEvaluatorConfigs = async (
     })
     const evaluatorConfigs = (response.data || []).map((item: EvaluatorConfig) => ({
         ...item,
+        settings_values: normalizeSettingsValues(item.settings_values),
         icon_url: evaluatorIconsMap[item.evaluator_key as keyof typeof evaluatorIconsMap],
         color: tagColors[stringToNumberInRange(item.evaluator_key, 0, tagColors.length - 1)],
     })) as EvaluatorConfig[]
@@ -137,8 +154,13 @@ export const createEvaluatorConfig = async (
     const {projectId} = getProjectValues()
     void _appId
 
-    return axios.post(`/evaluators/configs?project_id=${projectId}`, {
+    const normalizedConfig = {
         ...config,
+        settings_values: normalizeSettingsValues(config.settings_values),
+    }
+
+    return axios.post(`/evaluators/configs?project_id=${projectId}`, {
+        ...normalizedConfig,
     })
 }
 
@@ -148,7 +170,12 @@ export const updateEvaluatorConfig = async (
 ) => {
     const {projectId} = getProjectValues()
 
-    return axios.put(`/evaluators/configs/${configId}?project_id=${projectId}`, config)
+    const normalizedConfig = {
+        ...config,
+        settings_values: normalizeSettingsValues(config.settings_values),
+    }
+
+    return axios.put(`/evaluators/configs/${configId}?project_id=${projectId}`, normalizedConfig)
 }
 
 export const deleteEvaluatorConfig = async (configId: string) => {

From e916b9e2ba7b4ece9a2cc0a9a9edfaff0f76b6ba Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Thu, 15 Jan 2026 12:40:07 +0100
Subject: [PATCH 05/20] revert

---
 .../JSONSchema/JSONSchemaEditor.tsx           | 37 +++++++------------
 web/oss/src/services/evaluators/index.ts      | 31 +---------------
 2 files changed, 16 insertions(+), 52 deletions(-)

diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
index 9e50da98f..6aff01d8c 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
@@ -81,18 +81,6 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
     const namePath = useMemo(() => (Array.isArray(name) ? name : [name]), [name])
 
-    const parseSchemaObject = useCallback((value: string) => {
-        try {
-            const parsed = JSON.parse(value)
-            if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
-                return null
-            }
-            return parsed as Record<string, unknown>
-        } catch {
-            return null
-        }
-    }, [])
-
     const applyParsedConfig = useCallback((parsed: SchemaConfig) => {
         setResponseFormat(parsed.responseFormat)
         setIncludeReasoning(parsed.includeReasoning)
@@ -110,14 +98,14 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
     }, [])
 
     const syncFormValue = useCallback(
-        (value: string, parsedValue?: Record<string, unknown> | null) => {
-            if (lastSyncedValueRef.current === value) return
+        (value: string) => {
+            const current = form.getFieldValue(namePath)
+            if (current === value && lastSyncedValueRef.current === value) return
 
-            const nextValue = parsedValue ?? parseSchemaObject(value) ?? value
-            form.setFieldValue(namePath, nextValue)
+            form.setFieldValue(namePath, value)
             lastSyncedValueRef.current = value
         },
-        [form, namePath, parseSchemaObject],
+        [form, namePath],
     )
 
     const getDefaultConfig = useCallback((): SchemaConfig => {
@@ -132,10 +120,9 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
     const applyConfigAndSync = useCallback(
         (config: SchemaConfig) => {
             applyParsedConfig(config)
-            const schema = generateJSONSchema(config)
-            const schemaString = JSON.stringify(schema, null, 2)
+            const schemaString = JSON.stringify(generateJSONSchema(config), null, 2)
             setRawSchema(schemaString)
-            syncFormValue(schemaString, schema)
+            syncFormValue(schemaString)
             setSupportsBasicMode(true)
         },
         [applyParsedConfig, syncFormValue],
@@ -178,7 +165,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
             const schema = generateJSONSchema(config)
             const schemaString = JSON.stringify(schema, null, 2)
 
-            syncFormValue(schemaString, schema)
+            syncFormValue(schemaString)
         }
     }, [
         mode,
@@ -206,7 +193,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
             const schema = generateJSONSchema(config)
             const schemaString = JSON.stringify(schema, null, 2)
             setRawSchema(schemaString)
-            syncFormValue(schemaString, schema)
+            syncFormValue(schemaString)
             setSupportsBasicMode(true)
             setMode("advanced")
             return
@@ -279,7 +266,11 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                                     value ? isSchemaCompatibleWithBasicMode(value) : false,
                                 )
 
-                                syncFormValue(value)
+                                if (Array.isArray(name)) {
+                                    form.setFieldValue(name, value)
+                                } else {
+                                    form.setFieldValue([name], value)
+                                }
                             }
                         }}
                         editorProps={{
diff --git a/web/oss/src/services/evaluators/index.ts b/web/oss/src/services/evaluators/index.ts
index 30e0f5a00..2a9bb15de 100644
--- a/web/oss/src/services/evaluators/index.ts
+++ b/web/oss/src/services/evaluators/index.ts
@@ -103,22 +103,6 @@ export const fetchAllEvaluators = async (includeArchived = false) => {
 }
 
 // Evaluator Configs
-const normalizeSettingsValues = (settingsValues?: Record<string, any> | null) => {
-    if (!settingsValues) return settingsValues
-    const jsonSchema = settingsValues.json_schema
-    if (typeof jsonSchema !== "string") return settingsValues
-
-    try {
-        const parsed = JSON.parse(jsonSchema)
-        if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
-            return settingsValues
-        }
-        return {...settingsValues, json_schema: parsed}
-    } catch {
-        return settingsValues
-    }
-}
-
 export const fetchAllEvaluatorConfigs = async (
     appId?: string | null,
     projectIdOverride?: string | null,
@@ -139,7 +123,6 @@ export const fetchAllEvaluatorConfigs = async (
     })
     const evaluatorConfigs = (response.data || []).map((item: EvaluatorConfig) => ({
         ...item,
-        settings_values: normalizeSettingsValues(item.settings_values),
         icon_url: evaluatorIconsMap[item.evaluator_key as keyof typeof evaluatorIconsMap],
         color: tagColors[stringToNumberInRange(item.evaluator_key, 0, tagColors.length - 1)],
     })) as EvaluatorConfig[]
@@ -154,13 +137,8 @@ export const createEvaluatorConfig = async (
     const {projectId} = getProjectValues()
     void _appId
 
-    const normalizedConfig = {
-        ...config,
-        settings_values: normalizeSettingsValues(config.settings_values),
-    }
-
     return axios.post(`/evaluators/configs?project_id=${projectId}`, {
-        ...normalizedConfig,
+        ...config,
     })
 }
 
@@ -170,12 +148,7 @@ export const updateEvaluatorConfig = async (
 ) => {
     const {projectId} = getProjectValues()
 
-    const normalizedConfig = {
-        ...config,
-        settings_values: normalizeSettingsValues(config.settings_values),
-    }
-
-    return axios.put(`/evaluators/configs/${configId}?project_id=${projectId}`, normalizedConfig)
+    return axios.put(`/evaluators/configs/${configId}?project_id=${projectId}`, config)
 }
 
 export const deleteEvaluatorConfig = async (configId: string) => {

From 8dc83882765203faecd2c087dced532e2813d922 Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Thu, 15 Jan 2026 13:26:56 +0100
Subject: [PATCH 06/20] removed console log

---
 .../autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
index beef41885..331afe085 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
@@ -154,7 +154,6 @@ const ConfigureEvaluator = ({
         null,
     )
     const [form] = Form.useForm()
-    console.log("ConfigureEvaluator: ", {form: form.getFieldsValue()})
     const [submitLoading, setSubmitLoading] = useState(false)
 
     // Store form ref in atom so DebugSection can access it

From 91a88ba0c7d1343f23af0b0a83fde056a7531122 Mon Sep 17 00:00:00 2001
From: bekossy <99529776+bekossy@users.noreply.github.com>
Date: Thu, 15 Jan 2026 15:02:00 +0000
Subject: [PATCH 07/20] v0.77.4

---
 api/pyproject.toml   | 2 +-
 sdk/pyproject.toml   | 2 +-
 web/ee/package.json  | 2 +-
 web/oss/package.json | 4 ++--
 web/package.json     | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/api/pyproject.toml b/api/pyproject.toml
index dbf772b78..8882202a4 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "api"
-version = "0.77.3"
+version = "0.77.4"
 description = "Agenta API"
 authors = [
     { name = "Mahmoud Mabrouk", email = "mahmoud@agenta.ai" },
diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml
index a26b90916..079717a0b 100644
--- a/sdk/pyproject.toml
+++ b/sdk/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "agenta"
-version = "0.77.3"
+version = "0.77.4"
 description = "The SDK for agenta is an open-source LLMOps platform."
 readme = "README.md"
 authors = [
diff --git a/web/ee/package.json b/web/ee/package.json
index 78c5f6e45..d21522b18 100644
--- a/web/ee/package.json
+++ b/web/ee/package.json
@@ -1,6 +1,6 @@
 {
     "name": "@agenta/ee",
-    "version": "0.77.3",
+    "version": "0.77.4",
     "private": true,
     "engines": {
         "node": ">=18"
diff --git a/web/oss/package.json b/web/oss/package.json
index d6dc78e83..30153a341 100644
--- a/web/oss/package.json
+++ b/web/oss/package.json
@@ -1,6 +1,6 @@
 {
     "name": "@agenta/oss",
-    "version": "0.77.3",
+    "version": "0.77.4",
     "private": true,
     "engines": {
         "node": ">=18"
@@ -20,6 +20,7 @@
     },
     "dependencies": {
         "@agenta/web-tests": "workspace:../tests",
+        "@agentaai/nextstepjs": "^2.1.3-agenta.1",
         "@ant-design/colors": "^7.2.1",
         "@ant-design/cssinjs": "^2.0.1",
         "@ant-design/icons": "^6.1.0",
@@ -89,7 +90,6 @@
         "lodash": "^4.17.21",
         "lucide-react": "^0.475.0",
         "motion": "^12.0.0",
-        "@agentaai/nextstepjs": "^2.1.3-agenta.1",
         "next": "15.5.9",
         "papaparse": "^5.5.3",
         "postcss": "^8.5.6",
diff --git a/web/package.json b/web/package.json
index 5560faf56..117e5dcb5 100644
--- a/web/package.json
+++ b/web/package.json
@@ -1,6 +1,6 @@
 {
     "name": "agenta-web",
-    "version": "0.77.3",
+    "version": "0.77.4",
     "workspaces": [
         "ee",
         "oss",

From 17b70f284d3b7dc1f07c8e7d132be0e1c008a0e3 Mon Sep 17 00:00:00 2001
From: Mahmoud Mabrouk <mahmoud@agenta.ai>
Date: Thu, 15 Jan 2026 20:30:05 +0100
Subject: [PATCH 08/20] docs: update testset docs to new SDK

Switch examples from legacy /testsets APIs to ag.testsets + aevaluate, and clarify revision vs testset IDs.
---
 .../evaluations/sdk/testset-management.ipynb  |  10 +-
 .../_evaluation-from-sdk/01-quick-start.mdx   |  66 ++++----
 .../02-setup-configuration.mdx                |   6 +-
 .../03-managing-test-sets.mdx                 |  62 +++++--
 .../04-configuring-evaluators.mdx             |  53 +++---
 .../05-running-evaluations.mdx                |  64 +++----
 .../06-viewing-results.mdx                    |  32 ++--
 .../02-managing-testsets.mdx                  |  42 +++--
 .../02-create-programatically.mdx             |  79 +++++----
 .../docs/tutorials/sdk/_evaluate-with-SDK.mdx | 158 +++++++++---------
 examples/jupyter/evaluation/quick-start.ipynb |   4 +-
 .../evaluation/testset-management.ipynb       |   4 +-
 12 files changed, 303 insertions(+), 277 deletions(-)

diff --git a/api/ee/tests/manual/evaluations/sdk/testset-management.ipynb b/api/ee/tests/manual/evaluations/sdk/testset-management.ipynb
index 0eed3141b..49124966e 100644
--- a/api/ee/tests/manual/evaluations/sdk/testset-management.ipynb
+++ b/api/ee/tests/manual/evaluations/sdk/testset-management.ipynb
@@ -120,8 +120,8 @@
     "print(f\"   Slug: {testset.slug}\")\n",
     "print(f\"   Description: {testset.description}\")\n",
     "\n",
-    "# Save the ID for later use\n",
-    "testset_id = testset.id"
+    "# Save the parent testset ID for later use\n",
+    "testset_id = testset.testset_id or testset.id"
    ]
   },
   {
@@ -137,8 +137,10 @@
     "   Description: A testset of countries and their capitals for geography evaluation\n",
     "```\n",
     "\n",
-    "The `create_testset` function returns a `SimpleTestset` object with the following fields:\n",
-    "- `id`: Unique UUID for the testset\n",
+    "The `acreate` function returns a `TestsetRevision` object with the following fields:\n",
+    "- `id`: The revision UUID\n",
+    "- `testset_id`: The parent testset UUID\n",
+    "- `version`: The revision version\n",
     "- `name`: The name you provided\n",
     "- `slug`: A shortened identifier\n",
     "- `description`: Your description\n",
diff --git a/docs/docs/evaluation/_evaluation-from-sdk/01-quick-start.mdx b/docs/docs/evaluation/_evaluation-from-sdk/01-quick-start.mdx
index c7d594aef..ccc62414b 100644
--- a/docs/docs/evaluation/_evaluation-from-sdk/01-quick-start.mdx
+++ b/docs/docs/evaluation/_evaluation-from-sdk/01-quick-start.mdx
@@ -29,41 +29,51 @@ pip install -U agenta
 ## Quick example
 
 ```python
+import asyncio
 import agenta as ag
-from agenta.client.api import AgentaApi
+from agenta.sdk.evaluations import aevaluate
 
-# Initialize the SDK
-client = AgentaApi(
-    base_url="https://cloud.agenta.ai/api",
-    api_key="your-api-key"
-)
+ag.init(host="https://cloud.agenta.ai", api_key="your-api-key")
 
-# Create a test set
-test_set = client.testsets.create_testset(
-    request={
-        "name": "my_test_set",
-        "csvdata": [
-            {"input": "Hello", "expected": "Hi there!"},
-            {"input": "How are you?", "expected": "I'm doing well!"}
-        ]
-    }
+@ag.application(
+    slug="capital_finder",
+    name="Capital Finder",
 )
+async def capital_finder(country: str):
+    capitals = {
+        "Germany": "Berlin",
+        "France": "Paris",
+    }
+    return capitals.get(country, "Unknown")
 
-# Run evaluation
-evaluation = client.evaluations.create_evaluation(
-    app_id="your-app-id",
-    variant_ids=["variant-id"],
-    testset_id=test_set.id,
-    evaluators_configs=["evaluator-config-id"]
+@ag.evaluator(
+    slug="exact_match",
+    name="Exact Match",
 )
+async def exact_match(expected: str, outputs: str):
+    return {
+        "score": 1.0 if outputs == expected else 0.0,
+        "success": outputs == expected,
+    }
 
-# Check status
-status = client.evaluations.fetch_evaluation_status(evaluation.id)
-print(f"Evaluation status: {status}")
-
-# Get results when complete
-results = client.evaluations.fetch_evaluation_results(evaluation.id)
-print(results)
+async def run():
+    testset = await ag.testsets.acreate(
+        name="my_test_set",
+        data=[
+            {"country": "Germany", "expected": "Berlin"},
+            {"country": "France", "expected": "Paris"},
+        ],
+    )
+
+    result = await aevaluate(
+        testsets=[testset.id],
+        applications=[capital_finder],
+        evaluators=[exact_match],
+    )
+    return result
+
+result = asyncio.run(run())
+print(result)
 ```
 
 ## Next steps
diff --git a/docs/docs/evaluation/_evaluation-from-sdk/02-setup-configuration.mdx b/docs/docs/evaluation/_evaluation-from-sdk/02-setup-configuration.mdx
index 28b858b81..1bf90a330 100644
--- a/docs/docs/evaluation/_evaluation-from-sdk/02-setup-configuration.mdx
+++ b/docs/docs/evaluation/_evaluation-from-sdk/02-setup-configuration.mdx
@@ -18,7 +18,7 @@ pip install -U agenta
 ## Initialize the SDK client
 
 ```python
-from agenta.client.api import AgentaApi
+import agenta as ag
 
 app_id = "667d8cfad1812781f7e375d9"
 
@@ -29,8 +29,8 @@ api_key = "EUqJGOUu.xxxx"
 # Host
 host = "https://cloud.agenta.ai"
 
-# Initialize the client
-client = AgentaApi(base_url=host + "/api", api_key=api_key)
+# Initialize the SDK
+ag.init(host=host, api_key=api_key)
 ```
 
 ## Configuration options
diff --git a/docs/docs/evaluation/_evaluation-from-sdk/03-managing-test-sets.mdx b/docs/docs/evaluation/_evaluation-from-sdk/03-managing-test-sets.mdx
index d86671094..f6c6aa515 100644
--- a/docs/docs/evaluation/_evaluation-from-sdk/03-managing-test-sets.mdx
+++ b/docs/docs/evaluation/_evaluation-from-sdk/03-managing-test-sets.mdx
@@ -5,35 +5,61 @@ description: "Learn how to create, load, and manage test sets using the SDK"
 sidebar_position: 3
 ---
 
-<!-- TODO: Replace with new SDK evaluation content -->
-
-## Creating test sets
+## Creating, retrieving, and updating test sets
 
 ```python
-from agenta.client.types.new_testset import NewTestset
+import asyncio
+import agenta as ag
 
-csvdata = [
-    {"country": "france", "capital": "Paris"},
-    {"country": "Germany", "capital": "Berlin"}
-]
+# Initialize from environment variables if set (AGENTA_HOST, AGENTA_API_KEY)
+ag.init()
 
-response = client.testsets.create_testset(
-    request=NewTestset(name="test set", csvdata=csvdata)
-)
-test_set_id = response.id
-```
+async def main():
+    # Create a testset (returns a TestsetRevision)
+    created = await ag.testsets.acreate(
+        name="test set",
+        data=[
+            {"country": "France", "capital": "Paris"},
+            {"country": "Germany", "capital": "Berlin"},
+        ],
+    )
 
-## Loading existing test sets
+    testset_id = created.testset_id or created.id
+    print(f"Testset ID: {testset_id}")
+    print(f"Revision ID: {created.id}")
 
-<!-- TODO: Add content for loading test sets -->
+    # Retrieve the latest revision for a testset
+    retrieved = await ag.testsets.aretrieve(testset_id=testset_id)
+    if retrieved:
+        print(f"Retrieved testset revision: {retrieved.id}")
+        print(f"Version: {retrieved.version}")
 
-## Updating test sets
+    # Update the testset data
+    await ag.testsets.aedit(
+        testset_id=testset_id,
+        name="test set v2",
+        data=[
+            {"country": "France", "capital": "Paris"},
+            {"country": "Germany", "capital": "Berlin"},
+            {"country": "Spain", "capital": "Madrid"},
+        ],
+    )
 
-<!-- TODO: Add content for updating test sets -->
+    # Fetch the latest revision after editing
+    updated = await ag.testsets.aretrieve(testset_id=testset_id)
+    if updated:
+        print(f"Latest revision ID: {updated.id}")
+        print(f"Version: {updated.version}")
+
+asyncio.run(main())
+```
 
 ## Deleting test sets
 
-<!-- TODO: Add content for deleting test sets -->
+The SDK does not currently expose delete helpers. To archive or delete testsets directly, use the API endpoints:
+
+- [Archive simple testset](/reference/api/archive-simple-testset)
+- [Unarchive simple testset](/reference/api/unarchive-simple-testset)
 
 ## Next steps
 
diff --git a/docs/docs/evaluation/_evaluation-from-sdk/04-configuring-evaluators.mdx b/docs/docs/evaluation/_evaluation-from-sdk/04-configuring-evaluators.mdx
index 0de67a068..5eb4955b2 100644
--- a/docs/docs/evaluation/_evaluation-from-sdk/04-configuring-evaluators.mdx
+++ b/docs/docs/evaluation/_evaluation-from-sdk/04-configuring-evaluators.mdx
@@ -5,46 +5,41 @@ description: "Learn how to configure built-in and custom evaluators using the SD
 sidebar_position: 4
 ---
 
-<!-- TODO: Replace with new SDK evaluation content -->
-
 ## Creating evaluators
 
-### Custom code evaluator
-
-Let's create a custom code evaluator that returns 1.0 if the first letter of the app output is uppercase:
+### Custom evaluator
 
 ```python
-code_snippet = """
-from typing import Dict
-
-def evaluate(
-    app_params: Dict[str, str],
-    inputs: Dict[str, str],
-    output: str,  # output of the llm app
-    datapoint: Dict[str, str]  # contains the testset row
-) -> float:
-    if output and output[0].isupper():
-        return 1.0
-    else:
-        return 0.0
-"""
-
-response = client.evaluators.create_new_evaluator_config(
-    app_id=app_id,
-    name="capital_letter_evaluator",
-    evaluator_key="auto_custom_code_run",
-    settings_values={"code": code_snippet}
+import agenta as ag
+
+@ag.evaluator(
+    slug="capital_letter_evaluator",
+    name="Capital Letter Evaluator",
 )
-letter_match_eval_id = response.id
+async def capital_letter_evaluator(outputs: str):
+    is_capitalized = bool(outputs) and outputs[0].isupper()
+    return {
+        "score": 1.0 if is_capitalized else 0.0,
+        "success": is_capitalized,
+    }
 ```
 
-## Using built-in evaluators
+### Built-in evaluators
+
+Agenta ships built-in evaluators you can configure directly:
 
-<!-- TODO: Add content for using built-in evaluators -->
+```python
+from agenta.sdk.workflows import builtin
+
+exact_match = builtin.auto_exact_match(
+    name="Capital Exact Match",
+    correct_answer_key="capital",
+)
+```
 
 ## Configuring evaluator settings
 
-<!-- TODO: Add content for configuring evaluator settings -->
+Built-in evaluators accept parameters (like `correct_answer_key`) when you construct them, so you can tailor scoring to your testset schema.
 
 ## Next steps
 
diff --git a/docs/docs/evaluation/_evaluation-from-sdk/05-running-evaluations.mdx b/docs/docs/evaluation/_evaluation-from-sdk/05-running-evaluations.mdx
index aa9a523a0..036aeb41e 100644
--- a/docs/docs/evaluation/_evaluation-from-sdk/05-running-evaluations.mdx
+++ b/docs/docs/evaluation/_evaluation-from-sdk/05-running-evaluations.mdx
@@ -5,55 +5,35 @@ description: "Learn how to run evaluations programmatically using the SDK"
 sidebar_position: 5
 ---
 
-<!-- TODO: Replace with new SDK evaluation content -->
-
 ## Running an evaluation
 
-First, let's grab the first variant in the app:
-
 ```python
-response = client.apps.list_app_variants(app_id=app_id)
-print(response)
-myvariant_id = response[0].variant_id
-```
-
-Then, let's start the evaluation jobs:
-
-```python
-from agenta.client.types.llm_run_rate_limit import LlmRunRateLimit
-
-rate_limit_config = LlmRunRateLimit(
-    batch_size=10,  # number of rows to call in parallel
-    max_retries=3,  # max number of time to retry a failed llm call
-    retry_delay=2,  # delay before retrying a failed llm call
-    delay_between_batches=5,  # delay between batches
-)
-
-response = client.evaluations.create_evaluation(
-    app_id=app_id,
-    variant_ids=[myvariant_id],
-    testset_id=test_set_id,
-    evaluators_configs=[letter_match_eval_id],
-    rate_limit=rate_limit_config
-)
-print(response)
+import asyncio
+import agenta as ag
+from agenta.sdk.evaluations import aevaluate
+
+# Initialize from environment variables if set (AGENTA_HOST, AGENTA_API_KEY)
+ag.init()
+
+# Assume `testset`, `capital_finder`, and `capital_letter_evaluator` are already defined
+async def main():
+    result = await aevaluate(
+        name="My Evaluation",
+        # You can pass a testset revision id (recommended)
+        testsets=[testset.id],
+        applications=[capital_finder],
+        evaluators=[capital_letter_evaluator],
+    )
+
+    print(f"Run ID: {result['run'].id}")
+    return result
+
+result = asyncio.run(main())
 ```
 
 ## Checking evaluation status
 
-Now we can check for the status of the job:
-
-```python
-client.evaluations.fetch_evaluation_status('667d98fbd1812781f7e3761a')
-```
-
-## Configuring rate limits
-
-<!-- TODO: Add more details about rate limit configuration -->
-
-## Handling errors
-
-<!-- TODO: Add content for error handling -->
+`aevaluate()` prints progress as it runs. You can also look up the run in the UI using the run ID printed above.
 
 ## Next steps
 
diff --git a/docs/docs/evaluation/_evaluation-from-sdk/06-viewing-results.mdx b/docs/docs/evaluation/_evaluation-from-sdk/06-viewing-results.mdx
index 7e8609e71..c17e0c819 100644
--- a/docs/docs/evaluation/_evaluation-from-sdk/06-viewing-results.mdx
+++ b/docs/docs/evaluation/_evaluation-from-sdk/06-viewing-results.mdx
@@ -5,40 +5,28 @@ description: "Learn how to retrieve and analyze evaluation results using the SDK
 sidebar_position: 6
 ---
 
-<!-- TODO: Replace with new SDK evaluation content -->
-
 ## Fetching overall results
 
-As soon as the evaluation is done, we can fetch the overall results:
+`aevaluate()` returns the run, scenarios, and metrics in a single object. You can inspect the metrics directly:
 
 ```python
-response = client.evaluations.fetch_evaluation_results('667d98fbd1812781f7e3761a')
-
-results = [
-    (evaluator["evaluator_config"]["name"], evaluator["result"])
-    for evaluator in response["results"]
-]
-print(results)
+metrics = result["metrics"]
+print(metrics)
 ```
 
 ## Fetching detailed results
 
-Get detailed results for each test case:
+Use the built-in display helper to render a detailed report:
 
 ```python
-detailed_results = client.evaluations.fetch_evaluation_scenarios(
-    evaluations_ids='667d98fbd1812781f7e3761a'
-)
-print(detailed_results)
-```
+import asyncio
+from agenta.sdk.evaluations import display
 
-## Analyzing results
+async def main():
+    await display(result)
 
-<!-- TODO: Add content for analyzing results -->
-
-## Exporting results
-
-<!-- TODO: Add content for exporting results -->
+asyncio.run(main())
+```
 
 ## Next steps
 
diff --git a/docs/docs/evaluation/evaluation-from-sdk/02-managing-testsets.mdx b/docs/docs/evaluation/evaluation-from-sdk/02-managing-testsets.mdx
index db8b6239f..cc011277c 100644
--- a/docs/docs/evaluation/evaluation-from-sdk/02-managing-testsets.mdx
+++ b/docs/docs/evaluation/evaluation-from-sdk/02-managing-testsets.mdx
@@ -15,6 +15,19 @@ This guide covers how to create, list, and retrieve testsets using the Agenta SD
   Open in Google Colaboratory
 </GoogleColabButton>
 
+:::tip Async examples
+Agenta's SDK uses async APIs. In Jupyter/Colab you can use top-level `await`. In a regular Python script, wrap async code like this:
+
+```python
+import asyncio
+
+async def main():
+    ...
+
+asyncio.run(main())
+```
+:::
+
 ## Creating a Testset
 
 Use `ag.testsets.acreate()` to create a new testset with data:
@@ -35,7 +48,9 @@ testset = await ag.testsets.acreate(
     name="Country Capitals",
 )
 
-print(f"Created testset with ID: {testset.id}")
+testset_id = testset.testset_id or testset.id
+print(f"Testset ID: {testset_id}")
+print(f"Revision ID: {testset.id}")
 print(f"Name: {testset.name}")
 print(f"Slug: {testset.slug}")
 ```
@@ -48,9 +63,11 @@ print(f"Slug: {testset.slug}")
 - `name`: The name of your testset.
 
 **Returns:** A `TestsetRevision` object containing:
-- `id`: The UUID of the created testset
+- `id`: The UUID of the created testset revision
+- `testset_id`: The parent testset UUID (stable across revisions)
 - `name`: The testset name
-- `slug`: The testset slug
+- `slug`: The revision slug
+- `version`: The revision version string (e.g. "1")
 - `data`: The test data (with `testcases` structure)
 
 **Sample Output:**
@@ -95,7 +112,9 @@ testset = await ag.testsets.aupsert(
     ],
 )
 
-print(f"Upserted testset with ID: {testset.id}")
+testset_id = testset.testset_id or testset.id
+print(f"Testset ID: {testset_id}")
+print(f"Revision ID: {testset.id}")
 ```
 
   </TabItem>
@@ -127,7 +146,8 @@ testsets = await ag.testsets.alist()
 
 print(f"Found {len(testsets)} testsets:")
 for testset in testsets:
-    print(f"  - {testset.name} (ID: {testset.id})")
+    testset_id = testset.testset_id or testset.id
+    print(f"  - {testset.name} (testset_id: {testset_id})")
 ```
 
   </TabItem>
@@ -135,10 +155,11 @@ for testset in testsets:
 
 **Parameters:** None required.
 
-**Returns:** A list of `TestsetRevision` objects, each containing:
-- `id`: The testset UUID
+**Returns:** A list of `TestsetRevision` objects. For each item:
+- `id`: The latest revision UUID
+- `testset_id`: The parent testset UUID
 - `name`: The testset name
-- `slug`: The testset slug
+- `slug`: The revision slug
 - Additional metadata fields
 
 **Sample Output:**
@@ -210,7 +231,7 @@ else:
 ```
 
 :::info
-Currently using the legacy testset API. When retrieving a testset, the function returns a `TestsetRevision` object with version "1". In the future, this will support the new versioning system where each update creates a new revision.
+Testsets are versioned. Each update via `ag.testsets.aedit()` or `ag.testsets.aupsert()` creates a new `TestsetRevision`, while the parent `testset_id` stays the same.
 :::
 
 ## Retrieving a Testset by Name
@@ -240,7 +261,8 @@ async def get_testset_by_name(name: str):
 testset = await get_testset_by_name("Country Capitals")
 
 if testset:
-    print(f"Found testset: {testset.name} with ID: {testset.id}")
+    testset_id = testset.testset_id or testset.id
+    print(f"Found testset: {testset.name} (testset_id: {testset_id}, revision_id: {testset.id})")
 else:
     print("Testset not found")
 ```
diff --git a/docs/docs/evaluation/managing-test-sets/02-create-programatically.mdx b/docs/docs/evaluation/managing-test-sets/02-create-programatically.mdx
index 79c271af2..966cc213b 100644
--- a/docs/docs/evaluation/managing-test-sets/02-create-programatically.mdx
+++ b/docs/docs/evaluation/managing-test-sets/02-create-programatically.mdx
@@ -11,14 +11,14 @@ Creating test sets programmatically allows you to automate test set generation,
 
 ## Creating via API
 
-You can upload a test set using our API. Find the [API endpoint reference here](/reference/api/upload-file).
+You can create a versioned testset using the simple testset API. Find the [API endpoint reference here](/reference/api/create-simple-testset).
 
 Here's an example of such a call:
 
 **HTTP Request:**
 
 ```
-POST /testsets
+POST /preview/simple/testsets/
 
 ```
 
@@ -26,58 +26,65 @@ POST /testsets
 
 ```json
 {
-  "name": "testsetname",
-  "csvdata": [
-    { "column1": "row1col1", "column2": "row1col2" },
-    { "column1": "row2col1", "column2": "row2col2" }
-  ]
+  "testset": {
+    "slug": "countries-capitals",
+    "name": "countries_capitals",
+    "data": {
+      "testcases": [
+        {"data": {"country": "France", "capital": "Paris"}},
+        {"data": {"country": "Germany", "capital": "Berlin"}}
+      ]
+    }
+  }
 }
 ```
 
 ### Example with curl
 
 ```bash
-curl -X POST "https://cloud.agenta.ai/api/testsets" \
+curl -X POST "https://cloud.agenta.ai/api/preview/simple/testsets/" \
   -H "Content-Type: application/json" \
   -H "Authorization: ApiKey YOUR_API_KEY" \
   -d '{
-    "name": "my_test_set",
-    "csvdata": [
-      {"input": "Hello", "expected": "Hi there!"},
-      {"input": "How are you?", "expected": "I am doing well!"}
-    ]
+    "testset": {
+      "slug": "my-test-set",
+      "name": "my_test_set",
+      "data": {
+        "testcases": [
+          {"data": {"input": "Hello", "expected": "Hi there!"}},
+          {"data": {"input": "How are you?", "expected": "I am doing well!"}}
+        ]
+      }
+    }
   }'
 ```
 
 ## Creating via SDK
 
 ```python
-from agenta.client.api import AgentaApi
-from agenta.client.types.new_testset import NewTestset
-
-# Initialize the client
-client = AgentaApi(
-    base_url="https://cloud.agenta.ai/api",
-    api_key="your-api-key"
-)
-
-# Create test set data
-csvdata = [
-    {"country": "France", "capital": "Paris"},
-    {"country": "Germany", "capital": "Berlin"},
-    {"country": "Spain", "capital": "Madrid"}
-]
-
-# Create the test set
-response = client.testsets.create_testset(
-    request=NewTestset(
+import asyncio
+import agenta as ag
+
+ag.init(host="https://cloud.agenta.ai", api_key="your-api-key")
+
+async def main():
+    # Create test set data
+    csvdata = [
+        {"country": "France", "capital": "Paris"},
+        {"country": "Germany", "capital": "Berlin"},
+        {"country": "Spain", "capital": "Madrid"},
+    ]
+
+    # Create the testset (returns a TestsetRevision)
+    testset = await ag.testsets.acreate(
         name="countries_capitals",
-        csvdata=csvdata
+        data=csvdata,
     )
-)
 
-test_set_id = response.id
-print(f"Created test set with ID: {test_set_id}")
+    testset_revision_id = testset.id
+    print(f"Created testset revision with ID: {testset_revision_id}")
+
+asyncio.run(main())
 ```
 
 ## Next steps
diff --git a/docs/docs/tutorials/sdk/_evaluate-with-SDK.mdx b/docs/docs/tutorials/sdk/_evaluate-with-SDK.mdx
index defa5c1b0..a62577708 100644
--- a/docs/docs/tutorials/sdk/_evaluate-with-SDK.mdx
+++ b/docs/docs/tutorials/sdk/_evaluate-with-SDK.mdx
@@ -40,12 +40,7 @@ This operation is managed through TaskIQ tasks. The interactions with the LLM ap
 # In this example we will use the default template single_prompt which has the prompt "Determine the capital of {country}"
 
 # You can find the application ID in the URL. For example, in the URL https://cloud.agenta.ai/apps/666dde95962bbaffdb0072b5/playground?variant=app.default, the application ID is `666dde95962bbaffdb0072b5`.
-from agenta.client.client import AgentaApi
-# Let's list the applications
-client.apps.list_apps()
-```
-
-```python
+import agenta as ag
 
 app_id = "667d8cfad1812781f7e375d9"
 
@@ -55,101 +50,102 @@ api_key = "EUqJGOUu.xxxx"
 # Host.
 host = "https://cloud.agenta.ai"
 
-# Initialize the client
-
-client = AgentaApi(base_url=host + "/api", api_key=api_key)
+# Initialize the SDK
+ag.init(host=host, api_key=api_key)
 ```
 
-## Create a test set
+## Define the application
 
 ```python
-from agenta.client.types.new_testset import NewTestset
+@ag.application(
+    slug="capital_finder",
+    name="Capital Finder",
+)
+async def capital_finder(country: str):
+    capitals = {
+        "Germany": "Berlin",
+        "France": "Paris",
+    }
+    return capitals.get(country, "Unknown")
+```
 
-csvdata = [
-        {"country": "france", "capital": "Paris"},
-        {"country": "Germany", "capital": "paris"}
-    ]
+## Create a test set
 
-response = client.testsets.create_testset(request=NewTestset(name="test set", csvdata=csvdata))
-test_set_id = response.id
+```python
+import asyncio
+import agenta as ag
 
-# let's now update it
+# Assumes `ag.init(...)` has already been called.
 
 csvdata = [
-        {"country": "france", "capital": "Paris"},
-        {"country": "Germany", "capital": "Berlin"}
-    ]
-
-client.testsets.update_testset(testset_id=test_set_id, request=NewTestset(name="test set", csvdata=csvdata))
+    {"country": "France", "capital": "Paris"},
+    {"country": "Germany", "capital": "Paris"},
+]
+
+async def main():
+    # Create a testset (returns a TestsetRevision)
+    created = await ag.testsets.acreate(name="test set", data=csvdata)
+    testset_id = created.testset_id or created.id
+
+    # Update the testset data
+    await ag.testsets.aedit(
+        testset_id=testset_id,
+        name="test set",
+        data=[
+            {"country": "France", "capital": "Paris"},
+            {"country": "Germany", "capital": "Berlin"},
+        ],
+    )
+
+    # Fetch the latest revision after editing
+    updated = await ag.testsets.aretrieve(testset_id=testset_id)
+    return updated
+
+updated = asyncio.run(main())
+print(f"Latest revision ID: {updated.id}")
 ```
 
 # Create evaluators
 
 ```python
-# Create an evaluator that performs an exact match comparison on the 'capital' column
-# You can find the list of evaluator keys and evaluators and their configurations in https://github.com/Agenta-AI/agenta/blob/main/agenta-backend/agenta_backend/resources/evaluators/evaluators.py
-response = client.evaluators.create_new_evaluator_config(app_id=app_id, name="capital_evaluator", evaluator_key="auto_exact_match", settings_values={"correct_answer_key": "capital"})
-exact_match_eval_id = response.id
-
-code_snippet = """
-from typing import Dict
-
-def evaluate(
-    app_params: Dict[str, str],
-    inputs: Dict[str, str],
-    output: str,  # output of the llm app
-    datapoint: Dict[str, str]  # contains the testset row
-) -> float:
-    if output and output[0].isupper():
-        return 1.0
-    else:
-        return 0.0
-"""
-
-response = client.evaluators.create_new_evaluator_config(app_id=app_id, name="capital_letter_evaluator", evaluator_key="auto_custom_code_run", settings_values={"code": code_snippet})
-letter_match_eval_id = response.id
-```
-
-```python
-# get list of all evaluators
-client.evaluators.get_evaluator_configs(app_id=app_id)
-```
-
-# Run an evaluation
-
-```python
-response = client.apps.list_app_variants(app_id=app_id)
-print(response)
-myvariant_id = response[0].variant_id
+@ag.evaluator(
+    slug="capital_exact_match",
+    name="Capital Exact Match",
+)
+async def exact_match(capital: str, outputs: str):
+    return {
+        "score": 1.0 if outputs == capital else 0.0,
+        "success": outputs == capital,
+    }
+
+@ag.evaluator(
+    slug="capital_letter_match",
+    name="Capital Letter Match",
+)
+async def letter_match(outputs: str):
+    is_capitalized = bool(outputs) and outputs[0].isupper()
+    return {
+        "score": 1.0 if is_capitalized else 0.0,
+        "success": is_capitalized,
+    }
 ```
 
-```python
 # Run an evaluation
-from agenta.client.types.llm_run_rate_limit import LlmRunRateLimit
-response = client.evaluations.create_evaluation(app_id=app_id, variant_ids=[myvariant_id], testset_id=test_set_id, evaluators_configs=[exact_match_eval_id, letter_match_eval_id],
-                                                rate_limit=LlmRunRateLimit(
-        batch_size=10, # number of rows to call in parallel
-        max_retries=3, # max number of time to retry a failed llm call
-        retry_delay=2, # delay before retrying a failed llm call
-        delay_between_batches=5, # delay between batches
-    ),)
-print(response)
-```
 
 ```python
-# check the status
-client.evaluations.fetch_evaluation_status('667d98fbd1812781f7e3761a')
-```
+import asyncio
+from agenta.sdk.evaluations import aevaluate
 
-```python
-# fetch the overall results
-response = client.evaluations.fetch_evaluation_results('667d98fbd1812781f7e3761a')
+async def main():
+    result = await aevaluate(
+        name="Capital evaluation",
+        testsets=[updated.id],
+        applications=[capital_finder],
+        evaluators=[exact_match, letter_match],
+    )
 
-results = [(evaluator["evaluator_config"]["name"], evaluator["result"]) for evaluator in response["results"]]
-# End of  Selection
-```
+    print(result)
+    return result
 
-```python
-# fetch the detailed results
-client.evaluations.fetch_evaluation_scenarios(evaluations_ids='667d98fbd1812781f7e3761a')
+result = asyncio.run(main())
 ```
diff --git a/examples/jupyter/evaluation/quick-start.ipynb b/examples/jupyter/evaluation/quick-start.ipynb
index 6467e4a2f..58d2d930a 100644
--- a/examples/jupyter/evaluation/quick-start.ipynb
+++ b/examples/jupyter/evaluation/quick-start.ipynb
@@ -330,7 +330,7 @@
      "output_type": "stream",
      "text": [
       "📝 Creating testset...\n",
-      "✅ Testset created with ID: 019a783b-7894-7c80-a5ce-25005d745f5f\n",
+      "✅ Testset revision created with ID: 019a783b-7894-7c80-a5ce-25005d745f5f\n",
       "   Contains 4 test cases\n",
       "\n"
      ]
@@ -349,7 +349,7 @@
     "if not testset or not testset.id:\n",
     "    print(\"❌ Failed to create testset\")\n",
     "else:\n",
-    "    print(f\"✅ Testset created with ID: {testset.id}\")\n",
+    "    print(f\"✅ Testset revision created with ID: {testset.id}\")\n",
     "    print(f\"   Contains {len(test_data)} test cases\\n\")"
    ]
   },
diff --git a/examples/jupyter/evaluation/testset-management.ipynb b/examples/jupyter/evaluation/testset-management.ipynb
index 045a6c8d8..eb8e1b903 100644
--- a/examples/jupyter/evaluation/testset-management.ipynb
+++ b/examples/jupyter/evaluation/testset-management.ipynb
@@ -90,13 +90,13 @@
    "id": "e2b89655",
    "metadata": {},
    "outputs": [],
-   "source": "# Create a testset with simple data\ntestset = await ag.testsets.acreate(\n    data=[\n        {\"country\": \"Germany\", \"capital\": \"Berlin\"},\n        {\"country\": \"France\", \"capital\": \"Paris\"},\n        {\"country\": \"Spain\", \"capital\": \"Madrid\"},\n        {\"country\": \"Italy\", \"capital\": \"Rome\"},\n        {\"country\": \"Japan\", \"capital\": \"Tokyo\"},\n    ],\n    name=\"Country Capitals\",\n)\n\nprint(f\"✅ Created testset with ID: {testset.id}\")\nprint(f\"   Name: {testset.name}\")\nprint(f\"   Slug: {testset.slug}\")\n\n# Save the ID for later use\ntestset_id = testset.id"
+   "source": "# Create a testset with simple data\ntestset = await ag.testsets.acreate(\n    data=[\n        {\"country\": \"Germany\", \"capital\": \"Berlin\"},\n        {\"country\": \"France\", \"capital\": \"Paris\"},\n        {\"country\": \"Spain\", \"capital\": \"Madrid\"},\n        {\"country\": \"Italy\", \"capital\": \"Rome\"},\n        {\"country\": \"Japan\", \"capital\": \"Tokyo\"},\n    ],\n    name=\"Country Capitals\",\n)\n\nprint(f\"✅ Created testset with ID: {testset.id}\")\nprint(f\"   Name: {testset.name}\")\nprint(f\"   Slug: {testset.slug}\")\n\n# Save the parent testset ID for later use\ntestset_id = testset.testset_id or testset.id"
   },
   {
    "cell_type": "markdown",
    "id": "852d13a8",
    "metadata": {},
-   "source": "**Expected Output:**\n```\n✅ Created testset with ID: 01963413-3d39-7650-80ce-3ad5d688da6c\n   Name: Country Capitals\n   Slug: 3ad5d688da6c\n```\n\nThe `acreate` function returns a `TestsetRevision` object with the following fields:\n- `id`: Unique UUID for the testset\n- `name`: The name you provided\n- `slug`: A shortened identifier\n- `data`: The test data in a structured format"
+   "source": "**Expected Output:**\n```\n✅ Created testset with ID: 01963413-3d39-7650-80ce-3ad5d688da6c\n   Name: Country Capitals\n   Slug: 3ad5d688da6c\n```\n\nThe `acreate` function returns a `TestsetRevision` object with the following fields:\n- `id`: The revision UUID\n- `testset_id`: The parent testset UUID\n- `version`: The revision version\n- `name`: The name you provided\n- `slug`: A shortened identifier\n- `data`: The test data in a structured format"
   },
   {
    "cell_type": "markdown",

From 753bdac9fba7a525dfbc368a80b4f4bd1b61dc2e Mon Sep 17 00:00:00 2001
From: Mahmoud Mabrouk <mahmoud@agenta.ai>
Date: Thu, 15 Jan 2026 20:35:12 +0100
Subject: [PATCH 09/20] chore(docs): fix docusaurus build deps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add missing @docusaurus/theme-common dependency and update npm lockfile so
> doc@0.0.0 build
> docusaurus build

[INFO] [en] Creating an optimized production build...
[webpackbar] ℹ Compiling Client
[webpackbar] ℹ Compiling Server
[webpackbar] ✔ Server: Compiled successfully in 2.02s
[webpackbar] ✔ Client: Compiled successfully in 2.88s
[SUCCESS] Generated static files in "build".
[INFO] Use `npm run serve` command to test your build locally. works in clean checkouts.
---
 docs/package-lock.json | 68 ++++++++++++++++++------------------------
 docs/package.json      |  1 +
 2 files changed, 30 insertions(+), 39 deletions(-)

diff --git a/docs/package-lock.json b/docs/package-lock.json
index 205fad92b..62c7f7444 100644
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -15,6 +15,7 @@
         "@docusaurus/plugin-content-docs": "^3.9.2",
         "@docusaurus/plugin-ideal-image": "^3.9.2",
         "@docusaurus/preset-classic": "^3.9.2",
+        "@docusaurus/theme-common": "^3.9.2",
         "@docusaurus/theme-search-algolia": "^3.9.2",
         "@mdx-js/react": "^3.0.0",
         "clsx": "^2.0.0",
@@ -160,6 +161,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/cache-browser-local-storage/-/cache-browser-local-storage-4.24.0.tgz",
       "integrity": "sha512-t63W9BnoXVrGy9iYHBgObNXqYXM3tYXCjDSHeNwnsc324r4o5UiVKUiAB4THQ5z9U5hTj6qUvwg/Ez43ZD85ww==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/cache-common": "4.24.0"
       }
@@ -168,13 +170,15 @@
       "version": "4.24.0",
       "resolved": "https://registry.npmjs.org/@algolia/cache-common/-/cache-common-4.24.0.tgz",
       "integrity": "sha512-emi+v+DmVLpMGhp0V9q9h5CdkURsNmFC+cOS6uK9ndeJm9J4TiqSvPYVu+THUP8P/S08rxf5x2P+p3CfID0Y4g==",
-      "license": "MIT"
+      "license": "MIT",
+      "peer": true
     },
     "node_modules/@algolia/cache-in-memory": {
       "version": "4.24.0",
       "resolved": "https://registry.npmjs.org/@algolia/cache-in-memory/-/cache-in-memory-4.24.0.tgz",
       "integrity": "sha512-gDrt2so19jW26jY3/MkFg5mEypFIPbPoXsQGQWAi6TrCPsNOSEYepBMPlucqWigsmEy/prp5ug2jy/N3PVG/8w==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/cache-common": "4.24.0"
       }
@@ -199,6 +203,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/client-account/-/client-account-4.24.0.tgz",
       "integrity": "sha512-adcvyJ3KjPZFDybxlqnf+5KgxJtBjwTPTeyG2aOyoJvx0Y8dUQAEOEVOJ/GBxX0WWNbmaSrhDURMhc+QeevDsA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/client-common": "4.24.0",
         "@algolia/client-search": "4.24.0",
@@ -210,6 +215,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-4.24.0.tgz",
       "integrity": "sha512-bc2ROsNL6w6rqpl5jj/UywlIYC21TwSSoFHKl01lYirGMW+9Eek6r02Tocg4gZ8HAw3iBvu6XQiM3BEbmEMoiA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/requester-common": "4.24.0",
         "@algolia/transporter": "4.24.0"
@@ -220,6 +226,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-4.24.0.tgz",
       "integrity": "sha512-uRW6EpNapmLAD0mW47OXqTP8eiIx5F6qN9/x/7HHO6owL3N1IXqydGwW5nhDFBrV+ldouro2W1VX3XlcUXEFCA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/client-common": "4.24.0",
         "@algolia/requester-common": "4.24.0",
@@ -231,6 +238,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/client-analytics/-/client-analytics-4.24.0.tgz",
       "integrity": "sha512-y8jOZt1OjwWU4N2qr8G4AxXAzaa8DBvyHTWlHzX/7Me1LX8OayfgHexqrsL4vSBcoMmVw2XnVW9MhL+Y2ZDJXg==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/client-common": "4.24.0",
         "@algolia/client-search": "4.24.0",
@@ -243,6 +251,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-4.24.0.tgz",
       "integrity": "sha512-bc2ROsNL6w6rqpl5jj/UywlIYC21TwSSoFHKl01lYirGMW+9Eek6r02Tocg4gZ8HAw3iBvu6XQiM3BEbmEMoiA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/requester-common": "4.24.0",
         "@algolia/transporter": "4.24.0"
@@ -253,6 +262,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-4.24.0.tgz",
       "integrity": "sha512-uRW6EpNapmLAD0mW47OXqTP8eiIx5F6qN9/x/7HHO6owL3N1IXqydGwW5nhDFBrV+ldouro2W1VX3XlcUXEFCA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/client-common": "4.24.0",
         "@algolia/requester-common": "4.24.0",
@@ -288,6 +298,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/client-personalization/-/client-personalization-4.24.0.tgz",
       "integrity": "sha512-l5FRFm/yngztweU0HdUzz1rC4yoWCFo3IF+dVIVTfEPg906eZg5BOd1k0K6rZx5JzyyoP4LdmOikfkfGsKVE9w==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/client-common": "4.24.0",
         "@algolia/requester-common": "4.24.0",
@@ -299,6 +310,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-4.24.0.tgz",
       "integrity": "sha512-bc2ROsNL6w6rqpl5jj/UywlIYC21TwSSoFHKl01lYirGMW+9Eek6r02Tocg4gZ8HAw3iBvu6XQiM3BEbmEMoiA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/requester-common": "4.24.0",
         "@algolia/transporter": "4.24.0"
@@ -324,7 +336,6 @@
       "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-5.42.0.tgz",
       "integrity": "sha512-NZR7yyHj2WzK6D5X8gn+/KOxPdzYEXOqVdSaK/biU8QfYUpUuEA0sCWg/XlO05tPVEcJelF/oLrrNY3UjRbOww==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@algolia/client-common": "5.42.0",
         "@algolia/requester-browser-xhr": "5.42.0",
@@ -360,13 +371,15 @@
       "version": "4.24.0",
       "resolved": "https://registry.npmjs.org/@algolia/logger-common/-/logger-common-4.24.0.tgz",
       "integrity": "sha512-LLUNjkahj9KtKYrQhFKCzMx0BY3RnNP4FEtO+sBybCjJ73E8jNdaKJ/Dd8A/VA4imVHP5tADZ8pn5B8Ga/wTMA==",
-      "license": "MIT"
+      "license": "MIT",
+      "peer": true
     },
     "node_modules/@algolia/logger-console": {
       "version": "4.24.0",
       "resolved": "https://registry.npmjs.org/@algolia/logger-console/-/logger-console-4.24.0.tgz",
       "integrity": "sha512-X4C8IoHgHfiUROfoRCV+lzSy+LHMgkoEEU1BbKcsfnV0i0S20zyy0NLww9dwVHUWNfPPxdMU+/wKmLGYf96yTg==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/logger-common": "4.24.0"
       }
@@ -391,6 +404,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/recommend/-/recommend-4.24.0.tgz",
       "integrity": "sha512-P9kcgerfVBpfYHDfVZDvvdJv0lEoCvzNlOy2nykyt5bK8TyieYyiD0lguIJdRZZYGre03WIAFf14pgE+V+IBlw==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/cache-browser-local-storage": "4.24.0",
         "@algolia/cache-common": "4.24.0",
@@ -410,6 +424,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-4.24.0.tgz",
       "integrity": "sha512-bc2ROsNL6w6rqpl5jj/UywlIYC21TwSSoFHKl01lYirGMW+9Eek6r02Tocg4gZ8HAw3iBvu6XQiM3BEbmEMoiA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/requester-common": "4.24.0",
         "@algolia/transporter": "4.24.0"
@@ -420,6 +435,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-4.24.0.tgz",
       "integrity": "sha512-uRW6EpNapmLAD0mW47OXqTP8eiIx5F6qN9/x/7HHO6owL3N1IXqydGwW5nhDFBrV+ldouro2W1VX3XlcUXEFCA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/client-common": "4.24.0",
         "@algolia/requester-common": "4.24.0",
@@ -431,6 +447,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/requester-browser-xhr/-/requester-browser-xhr-4.24.0.tgz",
       "integrity": "sha512-Z2NxZMb6+nVXSjF13YpjYTdvV3032YTBSGm2vnYvYPA6mMxzM3v5rsCiSspndn9rzIW4Qp1lPHBvuoKJV6jnAA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/requester-common": "4.24.0"
       }
@@ -440,6 +457,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/requester-node-http/-/requester-node-http-4.24.0.tgz",
       "integrity": "sha512-JF18yTjNOVYvU/L3UosRcvbPMGT9B+/GQWNWnenIImglzNVGpyzChkXLnrSf6uxwVNO6ESGu6oN8MqcGQcjQJw==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/requester-common": "4.24.0"
       }
@@ -460,7 +478,8 @@
       "version": "4.24.0",
       "resolved": "https://registry.npmjs.org/@algolia/requester-common/-/requester-common-4.24.0.tgz",
       "integrity": "sha512-k3CXJ2OVnvgE3HMwcojpvY6d9kgKMPRxs/kVohrwF5WMr2fnqojnycZkxPoEg+bXm8fi5BBfFmOqgYztRtHsQA==",
-      "license": "MIT"
+      "license": "MIT",
+      "peer": true
     },
     "node_modules/@algolia/requester-fetch": {
       "version": "5.42.0",
@@ -491,6 +510,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/transporter/-/transporter-4.24.0.tgz",
       "integrity": "sha512-86nI7w6NzWxd1Zp9q3413dRshDqAzSbsQjhcDhPIatEFiZrL1/TjnHL8S7jVKFePlIMzDsZWXAXwXzcok9c5oA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/cache-common": "4.24.0",
         "@algolia/logger-common": "4.24.0",
@@ -568,7 +588,6 @@
       "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.27.1.tgz",
       "integrity": "sha512-IaaGWsQqfsQWVLqMn9OB92MNN7zukfVA4s7KKAI0KfrrDsZ0yhi5uV4baBuLuN7n3vsZpwP8asPPcVwApxvjBQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@ampproject/remapping": "^2.2.0",
         "@babel/code-frame": "^7.27.1",
@@ -2342,7 +2361,6 @@
         }
       ],
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=18"
       },
@@ -2365,7 +2383,6 @@
         }
       ],
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=18"
       }
@@ -2475,7 +2492,6 @@
       "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-7.1.0.tgz",
       "integrity": "sha512-8sLjZwK0R+JlxlYcTuVnyT2v+htpdrjDOKuMcOVdYjt52Lh8hWRYpxBPoKx/Zg+bcjc3wx6fmQevMmUztS/ccA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "cssesc": "^3.0.0",
         "util-deprecate": "^1.0.2"
@@ -2897,7 +2913,6 @@
       "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-7.1.0.tgz",
       "integrity": "sha512-8sLjZwK0R+JlxlYcTuVnyT2v+htpdrjDOKuMcOVdYjt52Lh8hWRYpxBPoKx/Zg+bcjc3wx6fmQevMmUztS/ccA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "cssesc": "^3.0.0",
         "util-deprecate": "^1.0.2"
@@ -4011,7 +4026,6 @@
       "resolved": "https://registry.npmjs.org/@docusaurus/plugin-content-docs/-/plugin-content-docs-3.9.2.tgz",
       "integrity": "sha512-C5wZsGuKTY8jEYsqdxhhFOe1ZDjH0uIYJ9T/jebHwkyxqnr4wW0jTkB72OMqNjsoQRcb0JN3PcSeTwFlVgzCZg==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@docusaurus/core": "3.9.2",
         "@docusaurus/logger": "3.9.2",
@@ -4294,7 +4308,6 @@
       "resolved": "https://registry.npmjs.org/@docusaurus/theme-classic/-/theme-classic-3.9.2.tgz",
       "integrity": "sha512-IGUsArG5hhekXd7RDb11v94ycpJpFdJPkLnt10fFQWOVxAtq5/D7hT6lzc2fhyQKaaCE62qVajOMKL7OiAFAIA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@docusaurus/core": "3.9.2",
         "@docusaurus/logger": "3.9.2",
@@ -4335,7 +4348,6 @@
       "resolved": "https://registry.npmjs.org/@docusaurus/theme-common/-/theme-common-3.9.2.tgz",
       "integrity": "sha512-6c4DAbR6n6nPbnZhY2V3tzpnKnGL+6aOsLvFL26VRqhlczli9eWG0VDUNoCQEPnGwDMhPS42UhSAnz5pThm5Ag==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@docusaurus/mdx-loader": "3.9.2",
         "@docusaurus/module-type-aliases": "3.9.2",
@@ -4507,7 +4519,6 @@
       "resolved": "https://registry.npmjs.org/@docusaurus/utils/-/utils-3.9.2.tgz",
       "integrity": "sha512-lBSBiRruFurFKXr5Hbsl2thmGweAPmddhF3jb99U4EMDA5L+e5Y1rAkOS07Nvrup7HUMBDrCV45meaxZnt28nQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@docusaurus/logger": "3.9.2",
         "@docusaurus/types": "3.9.2",
@@ -4553,7 +4564,6 @@
       "resolved": "https://registry.npmjs.org/@docusaurus/utils-validation/-/utils-validation-3.9.2.tgz",
       "integrity": "sha512-l7yk3X5VnNmATbwijJkexdhulNsQaNDwoagiwujXoxFbWLcxHQqNQ+c/IAlzrfMMOfa/8xSBZ7KEKDesE/2J7A==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@docusaurus/logger": "3.9.2",
         "@docusaurus/utils": "3.9.2",
@@ -4905,7 +4915,6 @@
       "resolved": "https://registry.npmjs.org/@mdx-js/react/-/react-3.1.0.tgz",
       "integrity": "sha512-QjHtSaoameoalGnKDT3FoIl4+9RwyTmo9ZJGBdLOks/YOiWHoRDI3PUwEzOE7kEmGcV3AFcp9K6dYu9rEuKLAQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@types/mdx": "^2.0.0"
       },
@@ -5597,7 +5606,6 @@
       "resolved": "https://registry.npmjs.org/@svgr/core/-/core-8.1.0.tgz",
       "integrity": "sha512-8QqtOQT5ACVlmsvKOJNEaWmRPmcojMOzCz4Hs2BGG/toAp/K38LcsMRyLp349glq5AzJbCEeimEoxaX6v/fLrA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@babel/core": "^7.21.3",
         "@svgr/babel-preset": "8.1.0",
@@ -6003,7 +6011,6 @@
       "resolved": "https://registry.npmjs.org/@types/react/-/react-19.1.3.tgz",
       "integrity": "sha512-dLWQ+Z0CkIvK1J8+wrDPwGxEYFA4RAyHoZPxHVGspYmFVnwGSNT24cGIhFJrtfRnWVuW8X7NO52gCXmhkVUWGQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "csstype": "^3.0.2"
       }
@@ -6336,7 +6343,6 @@
       "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.1.tgz",
       "integrity": "sha512-OvQ/2pUDKmgfCg++xsTX1wGxfTaszcHVcTctW4UJB4hibJx2HXxxO5UmVgyjMa+ZDsiaf5wWLXYpRWMmBI0QHg==",
       "license": "MIT",
-      "peer": true,
       "bin": {
         "acorn": "bin/acorn"
       },
@@ -6419,7 +6425,6 @@
       "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.11.0.tgz",
       "integrity": "sha512-wGgprdCvMalC0BztXvitD2hC04YffAvtsUn93JbGXYLAtCUO4xd17mCCZQxUOItiBwZvJScWo8NIvQMQ71rdpg==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "fast-deep-equal": "^3.1.1",
         "json-schema-traverse": "^1.0.0",
@@ -6515,6 +6520,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-4.24.0.tgz",
       "integrity": "sha512-bc2ROsNL6w6rqpl5jj/UywlIYC21TwSSoFHKl01lYirGMW+9Eek6r02Tocg4gZ8HAw3iBvu6XQiM3BEbmEMoiA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/requester-common": "4.24.0",
         "@algolia/transporter": "4.24.0"
@@ -6525,6 +6531,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-4.24.0.tgz",
       "integrity": "sha512-uRW6EpNapmLAD0mW47OXqTP8eiIx5F6qN9/x/7HHO6owL3N1IXqydGwW5nhDFBrV+ldouro2W1VX3XlcUXEFCA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/client-common": "4.24.0",
         "@algolia/requester-common": "4.24.0",
@@ -6536,6 +6543,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/requester-browser-xhr/-/requester-browser-xhr-4.24.0.tgz",
       "integrity": "sha512-Z2NxZMb6+nVXSjF13YpjYTdvV3032YTBSGm2vnYvYPA6mMxzM3v5rsCiSspndn9rzIW4Qp1lPHBvuoKJV6jnAA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/requester-common": "4.24.0"
       }
@@ -6545,6 +6553,7 @@
       "resolved": "https://registry.npmjs.org/@algolia/requester-node-http/-/requester-node-http-4.24.0.tgz",
       "integrity": "sha512-JF18yTjNOVYvU/L3UosRcvbPMGT9B+/GQWNWnenIImglzNVGpyzChkXLnrSf6uxwVNO6ESGu6oN8MqcGQcjQJw==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@algolia/requester-common": "4.24.0"
       }
@@ -7197,7 +7206,6 @@
         }
       ],
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "baseline-browser-mapping": "^2.8.19",
         "caniuse-lite": "^1.0.30001751",
@@ -8319,7 +8327,6 @@
       "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-7.1.0.tgz",
       "integrity": "sha512-8sLjZwK0R+JlxlYcTuVnyT2v+htpdrjDOKuMcOVdYjt52Lh8hWRYpxBPoKx/Zg+bcjc3wx6fmQevMmUztS/ccA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "cssesc": "^3.0.0",
         "util-deprecate": "^1.0.2"
@@ -10900,7 +10907,6 @@
       "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
       "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "fast-deep-equal": "^3.1.1",
         "fast-json-stable-stringify": "^2.0.0",
@@ -15879,7 +15885,6 @@
       "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
       "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "fast-deep-equal": "^3.1.1",
         "fast-json-stable-stringify": "^2.0.0",
@@ -16660,7 +16665,6 @@
         }
       ],
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "nanoid": "^3.3.11",
         "picocolors": "^1.1.1",
@@ -17677,7 +17681,6 @@
       "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-7.1.0.tgz",
       "integrity": "sha512-8sLjZwK0R+JlxlYcTuVnyT2v+htpdrjDOKuMcOVdYjt52Lh8hWRYpxBPoKx/Zg+bcjc3wx6fmQevMmUztS/ccA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "cssesc": "^3.0.0",
         "util-deprecate": "^1.0.2"
@@ -18682,7 +18685,6 @@
       "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
       "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "loose-envify": "^1.1.0"
       },
@@ -18695,7 +18697,6 @@
       "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz",
       "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "loose-envify": "^1.1.0",
         "scheduler": "^0.23.2"
@@ -18732,7 +18733,6 @@
       "resolved": "https://registry.npmjs.org/react-hook-form/-/react-hook-form-7.56.2.tgz",
       "integrity": "sha512-vpfuHuQMF/L6GpuQ4c3ZDo+pRYxIi40gQqsCmmfUBwm+oqvBhKhwghCuj2o00YCgSfU6bR9KC/xnQGWm3Gr08A==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=18.0.0"
       },
@@ -18793,7 +18793,6 @@
       "resolved": "https://registry.npmjs.org/@docusaurus/react-loadable/-/react-loadable-6.0.0.tgz",
       "integrity": "sha512-YMMxTUQV/QFSnbgrP3tjDzLHRg7vsbMn8e9HAa8o/1iXoiomo48b7sk/kkmWEuWNDPJVlKSJRB6Y2fHqdJk+SQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@types/react": "*"
       },
@@ -19516,7 +19515,6 @@
       "resolved": "https://registry.npmjs.org/react-redux/-/react-redux-7.2.9.tgz",
       "integrity": "sha512-Gx4L3uM182jEEayZfRbI/G11ZpYdNAnBs70lFVMNdHJI76XYtR+7m0MN+eAs7UHBPhWXcnFPaS+9owSCJQHNpQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@babel/runtime": "^7.15.4",
         "@types/react-redux": "^7.1.20",
@@ -19548,7 +19546,6 @@
       "resolved": "https://registry.npmjs.org/react-router/-/react-router-5.3.4.tgz",
       "integrity": "sha512-Ys9K+ppnJah3QuaRiLxk+jDWOR1MekYQrlytiXxC1RyfbdsZkS5pvKAzCCr031xHixZwpnsYNT5xysdFHQaYsA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@babel/runtime": "^7.12.13",
         "history": "^4.9.0",
@@ -19697,7 +19694,6 @@
       "resolved": "https://registry.npmjs.org/redux/-/redux-4.2.1.tgz",
       "integrity": "sha512-LAUYz4lc+Do8/g7aeRa8JkyDErK6ekstQaqWQrNRW//MY1TvCEpMtpTWvlQ+FPbWCx+Xixu/6SHt5N0HR+SB4w==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@babel/runtime": "^7.9.2"
       }
@@ -20309,7 +20305,6 @@
       "resolved": "https://registry.npmjs.org/sass/-/sass-1.87.0.tgz",
       "integrity": "sha512-d0NoFH4v6SjEK7BoX810Jsrhj7IQSYHAHLi/iSpgqKc7LaIDshFRlSg5LOymf9FqQhxEHs2W5ZQXlvy0KD45Uw==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "chokidar": "^4.0.0",
         "immutable": "^5.0.2",
@@ -21994,8 +21989,7 @@
       "version": "2.8.1",
       "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
       "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
-      "license": "0BSD",
-      "peer": true
+      "license": "0BSD"
     },
     "node_modules/tunnel-agent": {
       "version": "0.6.0",
@@ -22049,7 +22043,6 @@
       "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
       "devOptional": true,
       "license": "Apache-2.0",
-      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -22420,7 +22413,6 @@
       "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
       "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "fast-deep-equal": "^3.1.1",
         "fast-json-stable-stringify": "^2.0.0",
@@ -22710,7 +22702,6 @@
       "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.99.8.tgz",
       "integrity": "sha512-lQ3CPiSTpfOnrEGeXDwoq5hIGzSjmwD72GdfVzF7CQAI7t47rJG9eDWvcEkEn3CUQymAElVvDg3YNTlCYj+qUQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@types/eslint-scope": "^3.7.7",
         "@types/estree": "^1.0.6",
@@ -23319,7 +23310,6 @@
       "resolved": "https://registry.npmjs.org/zod/-/zod-4.1.12.tgz",
       "integrity": "sha512-JInaHOamG8pt5+Ey8kGmdcAcg3OL9reK8ltczgHTAwNhMys/6ThXHityHxVV2p3fkw/c+MAvBHFVYHFZDmjMCQ==",
       "license": "MIT",
-      "peer": true,
       "funding": {
         "url": "https://github.com/sponsors/colinhacks"
       }
diff --git a/docs/package.json b/docs/package.json
index 95d5319b1..a94b4645e 100644
--- a/docs/package.json
+++ b/docs/package.json
@@ -28,6 +28,7 @@
     "@docusaurus/plugin-ideal-image": "^3.9.2",
     "@docusaurus/preset-classic": "^3.9.2",
     "@docusaurus/theme-search-algolia": "^3.9.2",
+    "@docusaurus/theme-common": "^3.9.2",
     "@mdx-js/react": "^3.0.0",
     "clsx": "^2.0.0",
     "docusaurus-plugin-image-zoom": "^2.0.0",

From 3b6b69668303e7ff7cdfb74c77e29198702e17ec Mon Sep 17 00:00:00 2001
From: Mahmoud Mabrouk <mahmoud@agenta.ai>
Date: Thu, 15 Jan 2026 20:39:51 +0100
Subject: [PATCH 10/20] [3452] fix(frontend): update testset API modal to
 preview endpoints

Switch CreateTestsetFromApi modal + code snippets from legacy /testsets endpoints to /preview/simple/testsets, matching current backend + docs payloads.
---
 .../testsets/create_with_json/curl.ts         | 17 +++++++++-----
 .../testsets/create_with_json/python.ts       | 22 ++++++++++++-------
 .../testsets/create_with_json/typescript.ts   | 16 +++++++++-----
 .../testsets/create_with_upload/curl.ts       |  9 ++++----
 .../testsets/create_with_upload/python.ts     |  5 ++++-
 .../testsets/create_with_upload/typescript.ts |  1 +
 .../testset/modals/CreateTestsetFromApi.tsx   | 18 +++++++++++----
 7 files changed, 60 insertions(+), 28 deletions(-)

diff --git a/web/oss/src/code_snippets/testsets/create_with_json/curl.ts b/web/oss/src/code_snippets/testsets/create_with_json/curl.ts
index bdd35848c..01d105e32 100644
--- a/web/oss/src/code_snippets/testsets/create_with_json/curl.ts
+++ b/web/oss/src/code_snippets/testsets/create_with_json/curl.ts
@@ -3,12 +3,17 @@ import {isDemo} from "@/oss/lib/helpers/utils"
 export default function cURLCode(uri: string, params: string): string {
     return `curl -X POST ${uri} \
 -H 'Content-Type: application/json' \
-${!isDemo() ? "" : "-H 'Authorization: your_api_key'"} \
+ ${!isDemo() ? "" : "-H 'Authorization: your_api_key'"} \
 -d '{
-        "name": "your_testset_name",
-        "csvdata": [
-            {"column1": "value1", "column2": "value2"},
-            {"column1": "value3", "column2": "value4"}
-        ]
+        "testset": {
+            "slug": "your-testset-slug",
+            "name": "your_testset_name",
+            "data": {
+                "testcases": [
+                    {"data": {"column1": "value1", "column2": "value2"}},
+                    {"data": {"column1": "value3", "column2": "value4"}}
+                ]
+            }
+        }
     }'`
 }
diff --git a/web/oss/src/code_snippets/testsets/create_with_json/python.ts b/web/oss/src/code_snippets/testsets/create_with_json/python.ts
index a7cb8af63..c34f1f471 100644
--- a/web/oss/src/code_snippets/testsets/create_with_json/python.ts
+++ b/web/oss/src/code_snippets/testsets/create_with_json/python.ts
@@ -5,17 +5,23 @@ export default function pythonCode(uri: string, params: string): string {
 import json
 
 url = '${uri}'
+
 data = {
-    "name": "your_testset_name",
-    "csvdata": [
-        {"column1": "value1", "column2": "value2"},
-        {"column1": "value3", "column2": "value4"}
-    ]
+    "testset": {
+        "slug": "your-testset-slug",
+        "name": "your_testset_name",
+        "data": {
+            "testcases": [
+                {"data": {"column1": "value1", "column2": "value2"}},
+                {"data": {"column1": "value3", "column2": "value4"}},
+            ]
+        },
+    }
 }
 
-response = requests.post(url, data=json.dumps(data), headers={'Content-Type': 'application/json'${
-        !isDemo() ? "" : ", 'Authorization': 'your_api_key'"
-    }})
+headers = {'Content-Type': 'application/json'${!isDemo() ? "" : ", 'Authorization': 'your_api_key'"}}
+
+response = requests.post(url, data=json.dumps(data), headers=headers)
 
 print(response.status_code)
 print(response.json())
diff --git a/web/oss/src/code_snippets/testsets/create_with_json/typescript.ts b/web/oss/src/code_snippets/testsets/create_with_json/typescript.ts
index abe9b045e..6a04996d5 100644
--- a/web/oss/src/code_snippets/testsets/create_with_json/typescript.ts
+++ b/web/oss/src/code_snippets/testsets/create_with_json/typescript.ts
@@ -6,12 +6,18 @@ export default function tsCode(uri: string, params: string): string {
     const codeString = `import axios from 'axios';
 
 const url = '${uri}';
+
 const data = {
-    name: 'your_testset_name',
-    csvdata: [
-        {column1: 'value1', column2: 'value2'},
-        {column1: 'value3', column2: 'value4'}
-    ]
+    testset: {
+        slug: 'your-testset-slug',
+        name: 'your_testset_name',
+        data: {
+            testcases: [
+                {data: {column1: 'value1', column2: 'value2'}},
+                {data: {column1: 'value3', column2: 'value4'}},
+            ],
+        },
+    },
 };
 
 axios.post(url, data${!isDemo() ? "" : ", {headers: {Authorization: 'your_api_key'}}"})
diff --git a/web/oss/src/code_snippets/testsets/create_with_upload/curl.ts b/web/oss/src/code_snippets/testsets/create_with_upload/curl.ts
index d7ddfa1c9..924d51ed4 100644
--- a/web/oss/src/code_snippets/testsets/create_with_upload/curl.ts
+++ b/web/oss/src/code_snippets/testsets/create_with_upload/curl.ts
@@ -1,9 +1,10 @@
 import {isDemo} from "@/oss/lib/helpers/utils"
 
 export default function cURLCode(uri: string): string {
-    return `curl -X POST ${uri} \\
--H 'Content-Type: multipart/form-data' \\
--F 'file=@/oss/path/to/your/file.csv' \\
--F 'testset_name=your_testset_name' \\
+    return `curl -X POST ${uri} \
+-H 'Content-Type: multipart/form-data' \
+-F 'file=@/oss/path/to/your/file.csv' \
+-F 'file_type=csv' \
+-F 'testset_name=your_testset_name' \
 ${!isDemo() ? "" : "-H 'Authorization: your_api_key'"}`
 }
diff --git a/web/oss/src/code_snippets/testsets/create_with_upload/python.ts b/web/oss/src/code_snippets/testsets/create_with_upload/python.ts
index 47d31c79c..38012af06 100644
--- a/web/oss/src/code_snippets/testsets/create_with_upload/python.ts
+++ b/web/oss/src/code_snippets/testsets/create_with_upload/python.ts
@@ -9,7 +9,10 @@ testset_name = 'your_testset_name'
 
 with open(file_path, 'rb') as file:
     files = {'file': file}
-    data = {'testset_name': testset_name}
+    data = {
+        'testset_name': testset_name,
+        'file_type': 'csv',
+    }
     response = requests.post(url, files=files, data=data${
         !isDemo() ? "" : ", headers={'Authorization': 'your_api_key'}"
     })
diff --git a/web/oss/src/code_snippets/testsets/create_with_upload/typescript.ts b/web/oss/src/code_snippets/testsets/create_with_upload/typescript.ts
index 25410d4ec..fb2791747 100644
--- a/web/oss/src/code_snippets/testsets/create_with_upload/typescript.ts
+++ b/web/oss/src/code_snippets/testsets/create_with_upload/typescript.ts
@@ -13,6 +13,7 @@ export default function tsCode(uri: string): string {
 
     const formData = new FormData();
     formData.append('file', fs.createReadStream(filePath));
+    formData.append('file_type', 'csv');
     formData.append('testset_name', testsetName);
 
     const config = {
diff --git a/web/oss/src/components/pages/testset/modals/CreateTestsetFromApi.tsx b/web/oss/src/components/pages/testset/modals/CreateTestsetFromApi.tsx
index a206bbbb7..a387815f2 100644
--- a/web/oss/src/components/pages/testset/modals/CreateTestsetFromApi.tsx
+++ b/web/oss/src/components/pages/testset/modals/CreateTestsetFromApi.tsx
@@ -73,11 +73,21 @@ const CreateTestsetFromApi: React.FC<Props> = ({setCurrent, onCancel}) => {
     const [uploadType, setUploadType] = useState<"csv" | "json">("csv")
     const [selectedLang, setSelectedLang] = useState("python")
 
-    const uploadURI = `${getAgentaApiUrl()}/testsets/upload`
-    const jsonURI = `${getAgentaApiUrl()}/testsets`
+    const uploadURI = `${getAgentaApiUrl()}/preview/simple/testsets/upload`
+    const jsonURI = `${getAgentaApiUrl()}/preview/simple/testsets/`
 
     const params = `{
-    "name": "testset_name",}`
+    "testset": {
+        "slug": "your-testset-slug",
+        "name": "your_testset_name",
+        "data": {
+            "testcases": [
+                {"data": {"column1": "value1", "column2": "value2"}},
+                {"data": {"column1": "value3", "column2": "value4"}}
+            ]
+        }
+    }
+}`
 
     const jsonCodeSnippets: Record<string, string> = {
         python: pythonCode(jsonURI, params),
@@ -116,7 +126,7 @@ const CreateTestsetFromApi: React.FC<Props> = ({setCurrent, onCancel}) => {
                     </Radio.Group>
                 </div>
 
-                <Text>Use this endpoint to create a new Testset for your App using JSON</Text>
+                <Text>Use these endpoints to create a testset via JSON or upload a file</Text>
 
                 <div>
                     <Tabs

From a40440a915ed576f710d520e62128389803c6f8a Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Fri, 16 Jan 2026 00:03:00 +0100
Subject: [PATCH 11/20] fix

---
 .../JSONSchema/JSONSchemaEditor.tsx           | 25 -------------------
 1 file changed, 25 deletions(-)

diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
index 6aff01d8c..dc6480ae0 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
@@ -153,31 +153,6 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         }
     }, [supportsBasicMode, mode])
 
-    // Update form when basic mode changes
-    useEffect(() => {
-        if (mode === "basic" && supportsBasicMode) {
-            const config: SchemaConfig = {
-                responseFormat,
-                includeReasoning,
-                continuousConfig: {minimum: minValue, maximum: maxValue},
-                categoricalOptions: categories,
-            }
-            const schema = generateJSONSchema(config)
-            const schemaString = JSON.stringify(schema, null, 2)
-
-            syncFormValue(schemaString)
-        }
-    }, [
-        mode,
-        responseFormat,
-        includeReasoning,
-        minValue,
-        maxValue,
-        categories,
-        supportsBasicMode,
-        syncFormValue,
-    ])
-
     const handleModeSwitch = (newMode: "basic" | "advanced") => {
         if (newMode === mode) {
             return

From 3a9e7e2782e4592ced051846eecd3d1b9bb2cfe4 Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Fri, 16 Jan 2026 00:31:57 +0100
Subject: [PATCH 12/20] fix

---
 .../JSONSchema/JSONSchemaEditor.tsx           | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
index dc6480ae0..6aff01d8c 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
@@ -153,6 +153,31 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         }
     }, [supportsBasicMode, mode])
 
+    // Update form when basic mode changes
+    useEffect(() => {
+        if (mode === "basic" && supportsBasicMode) {
+            const config: SchemaConfig = {
+                responseFormat,
+                includeReasoning,
+                continuousConfig: {minimum: minValue, maximum: maxValue},
+                categoricalOptions: categories,
+            }
+            const schema = generateJSONSchema(config)
+            const schemaString = JSON.stringify(schema, null, 2)
+
+            syncFormValue(schemaString)
+        }
+    }, [
+        mode,
+        responseFormat,
+        includeReasoning,
+        minValue,
+        maxValue,
+        categories,
+        supportsBasicMode,
+        syncFormValue,
+    ])
+
     const handleModeSwitch = (newMode: "basic" | "advanced") => {
         if (newMode === mode) {
             return

From cd5e28b97d62b238e35278ba1051a2392d99bac5 Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Fri, 16 Jan 2026 00:43:58 +0100
Subject: [PATCH 13/20] fix: ensure evaluator configuration changes persist
 correctly

---
 .../ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
index 6aff01d8c..f24ef6a5c 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
@@ -76,6 +76,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
         return isSchemaCompatibleWithBasicMode(defaultValue)
     })
+    const [isInitialized, setIsInitialized] = useState(false)
 
     const lastSyncedValueRef = useRef<string | undefined>(undefined)
 
@@ -133,10 +134,13 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         if (!defaultValue) {
             setSupportsBasicMode(true)
             setRawSchema("")
+            lastSyncedValueRef.current = undefined
+            setIsInitialized(true)
             return
         }
 
         if (lastSyncedValueRef.current === defaultValue) {
+            setIsInitialized(true)
             return
         }
 
@@ -145,7 +149,9 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
         setSupportsBasicMode(isSchemaCompatibleWithBasicMode(defaultValue))
         setRawSchema(defaultValue)
-    }, [defaultValue, applyParsedConfig])
+        syncFormValue(defaultValue)
+        setIsInitialized(true)
+    }, [defaultValue, applyParsedConfig, syncFormValue])
 
     useEffect(() => {
         if (!supportsBasicMode && mode !== "advanced") {
@@ -155,6 +161,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
     // Update form when basic mode changes
     useEffect(() => {
+        if (!isInitialized) return
         if (mode === "basic" && supportsBasicMode) {
             const config: SchemaConfig = {
                 responseFormat,
@@ -168,6 +175,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
             syncFormValue(schemaString)
         }
     }, [
+        isInitialized,
         mode,
         responseFormat,
         includeReasoning,

From c988e47eb68c03f5a53dbc021737d18e053a2ef0 Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Fri, 16 Jan 2026 01:05:45 +0100
Subject: [PATCH 14/20] fix

---
 .../JSONSchema/JSONSchemaEditor.tsx           | 33 ++++++++++++++++---
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
index f24ef6a5c..a3c58b098 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
@@ -77,6 +77,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         return isSchemaCompatibleWithBasicMode(defaultValue)
     })
     const [isInitialized, setIsInitialized] = useState(false)
+    const [isDirty, setIsDirty] = useState(false)
 
     const lastSyncedValueRef = useRef<string | undefined>(undefined)
 
@@ -96,6 +97,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         } else {
             setCategories(createDefaultCategories())
         }
+        setIsDirty(false)
     }, [])
 
     const syncFormValue = useCallback(
@@ -136,6 +138,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
             setRawSchema("")
             lastSyncedValueRef.current = undefined
             setIsInitialized(true)
+            setIsDirty(false)
             return
         }
 
@@ -151,6 +154,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         setRawSchema(defaultValue)
         syncFormValue(defaultValue)
         setIsInitialized(true)
+        setIsDirty(false)
     }, [defaultValue, applyParsedConfig, syncFormValue])
 
     useEffect(() => {
@@ -161,7 +165,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
     // Update form when basic mode changes
     useEffect(() => {
-        if (!isInitialized) return
+        if (!isInitialized || !isDirty) return
         if (mode === "basic" && supportsBasicMode) {
             const config: SchemaConfig = {
                 responseFormat,
@@ -172,10 +176,12 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
             const schema = generateJSONSchema(config)
             const schemaString = JSON.stringify(schema, null, 2)
 
+            setRawSchema(schemaString)
             syncFormValue(schemaString)
         }
     }, [
         isInitialized,
+        isDirty,
         mode,
         responseFormat,
         includeReasoning,
@@ -219,6 +225,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                         const parsed = parseJSONSchema(rawSchema)
                         const config = parsed ?? getDefaultConfig()
                         applyConfigAndSync(config)
+                        setIsDirty(false)
                         setMode("basic")
                     },
                 })
@@ -228,6 +235,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
             const parsed = parseJSONSchema(rawSchema)
             const config = parsed ?? getDefaultConfig()
             applyConfigAndSync(config)
+            setIsDirty(false)
             setMode("basic")
             return
         }
@@ -237,16 +245,19 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
     const addCategory = () => {
         setCategories([...categories, {name: "", description: ""}])
+        setIsDirty(true)
     }
 
     const removeCategory = (index: number) => {
         setCategories(categories.filter((_, i) => i !== index))
+        setIsDirty(true)
     }
 
     const updateCategory = (index: number, field: "name" | "description", value: string) => {
         const updated = [...categories]
         updated[index][field] = value
         setCategories(updated)
+        setIsDirty(true)
     }
 
     if (mode === "advanced") {
@@ -320,7 +331,10 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                         <Select
                             style={{width: "100%"}}
                             value={responseFormat}
-                            onChange={(value) => setResponseFormat(value)}
+                            onChange={(value) => {
+                                setResponseFormat(value)
+                                setIsDirty(true)
+                            }}
                             options={[
                                 {label: "Boolean (True/False)", value: "boolean"},
                                 {label: "Continuous (Numeric Range)", value: "continuous"},
@@ -357,7 +371,10 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                                 <InputNumber
                                     style={{width: "100%"}}
                                     value={minValue}
-                                    onChange={(value) => setMinValue(value ?? 0)}
+                                    onChange={(value) => {
+                                        setMinValue(value ?? 0)
+                                        setIsDirty(true)
+                                    }}
                                 />
                             </div>
                             <div>
@@ -377,7 +394,10 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                                 <InputNumber
                                     style={{width: "100%"}}
                                     value={maxValue}
-                                    onChange={(value) => setMaxValue(value ?? 10)}
+                                    onChange={(value) => {
+                                        setMaxValue(value ?? 10)
+                                        setIsDirty(true)
+                                    }}
                                 />
                             </div>
                         </div>
@@ -440,7 +460,10 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                     <div style={{display: "flex", alignItems: "center", gap: 4}}>
                         <Checkbox
                             checked={includeReasoning}
-                            onChange={(e) => setIncludeReasoning(e.target.checked)}
+                            onChange={(e) => {
+                                setIncludeReasoning(e.target.checked)
+                                setIsDirty(true)
+                            }}
                         >
                             <Typography.Text strong>Include reasoning</Typography.Text>
                         </Checkbox>

From 18ec6be877ee308f21e48e2ed4a7c3b237e3e5ea Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Fri, 16 Jan 2026 01:26:50 +0100
Subject: [PATCH 15/20] fix

---
 .../JSONSchema/JSONSchemaEditor.tsx           | 34 +++++++++++++++----
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
index a3c58b098..167c923d3 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
@@ -14,6 +14,7 @@ import {
     Alert,
     Tooltip,
     Modal,
+    Form,
 } from "antd"
 import {createUseStyles} from "react-jss"
 
@@ -82,6 +83,24 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
     const lastSyncedValueRef = useRef<string | undefined>(undefined)
 
     const namePath = useMemo(() => (Array.isArray(name) ? name : [name]), [name])
+    const watchedValue = Form.useWatch(namePath as any, form)
+
+    const normalizeSchemaValue = useCallback((value: unknown) => {
+        if (typeof value === "string") return value
+        if (value && typeof value === "object") {
+            return JSON.stringify(value, null, 2)
+        }
+        return undefined
+    }, [])
+
+    const normalizedWatchedValue = useMemo(
+        () => normalizeSchemaValue(watchedValue),
+        [normalizeSchemaValue, watchedValue],
+    )
+    const normalizedDefaultValue = useMemo(
+        () => normalizeSchemaValue(defaultValue),
+        [normalizeSchemaValue, defaultValue],
+    )
 
     const applyParsedConfig = useCallback((parsed: SchemaConfig) => {
         setResponseFormat(parsed.responseFormat)
@@ -133,7 +152,8 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
     // Initialize from default value
     useEffect(() => {
-        if (!defaultValue) {
+        const sourceValue = normalizedWatchedValue ?? normalizedDefaultValue
+        if (!sourceValue) {
             setSupportsBasicMode(true)
             setRawSchema("")
             lastSyncedValueRef.current = undefined
@@ -142,20 +162,20 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
             return
         }
 
-        if (lastSyncedValueRef.current === defaultValue) {
+        if (lastSyncedValueRef.current === sourceValue) {
             setIsInitialized(true)
             return
         }
 
-        const parsed = parseJSONSchema(defaultValue)
+        const parsed = parseJSONSchema(sourceValue)
         if (parsed) applyParsedConfig(parsed)
 
-        setSupportsBasicMode(isSchemaCompatibleWithBasicMode(defaultValue))
-        setRawSchema(defaultValue)
-        syncFormValue(defaultValue)
+        setSupportsBasicMode(isSchemaCompatibleWithBasicMode(sourceValue))
+        setRawSchema(sourceValue)
+        syncFormValue(sourceValue)
         setIsInitialized(true)
         setIsDirty(false)
-    }, [defaultValue, applyParsedConfig, syncFormValue])
+    }, [applyParsedConfig, normalizedDefaultValue, normalizedWatchedValue, syncFormValue])
 
     useEffect(() => {
         if (!supportsBasicMode && mode !== "advanced") {

From 37e84efa725f0cc88d26a22b52db6fe91f52003b Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Fri, 16 Jan 2026 09:47:43 +0100
Subject: [PATCH 16/20] fix: streamline evaluator configuration handling and
 remove unnecessary state management

---
 .../JSONSchema/JSONSchemaEditor.tsx           | 66 ++++---------------
 1 file changed, 12 insertions(+), 54 deletions(-)

diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
index 167c923d3..d86583516 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
@@ -78,29 +78,10 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         return isSchemaCompatibleWithBasicMode(defaultValue)
     })
     const [isInitialized, setIsInitialized] = useState(false)
-    const [isDirty, setIsDirty] = useState(false)
 
     const lastSyncedValueRef = useRef<string | undefined>(undefined)
 
     const namePath = useMemo(() => (Array.isArray(name) ? name : [name]), [name])
-    const watchedValue = Form.useWatch(namePath as any, form)
-
-    const normalizeSchemaValue = useCallback((value: unknown) => {
-        if (typeof value === "string") return value
-        if (value && typeof value === "object") {
-            return JSON.stringify(value, null, 2)
-        }
-        return undefined
-    }, [])
-
-    const normalizedWatchedValue = useMemo(
-        () => normalizeSchemaValue(watchedValue),
-        [normalizeSchemaValue, watchedValue],
-    )
-    const normalizedDefaultValue = useMemo(
-        () => normalizeSchemaValue(defaultValue),
-        [normalizeSchemaValue, defaultValue],
-    )
 
     const applyParsedConfig = useCallback((parsed: SchemaConfig) => {
         setResponseFormat(parsed.responseFormat)
@@ -116,7 +97,6 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         } else {
             setCategories(createDefaultCategories())
         }
-        setIsDirty(false)
     }, [])
 
     const syncFormValue = useCallback(
@@ -152,30 +132,27 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
     // Initialize from default value
     useEffect(() => {
-        const sourceValue = normalizedWatchedValue ?? normalizedDefaultValue
-        if (!sourceValue) {
+        if (!defaultValue) {
             setSupportsBasicMode(true)
             setRawSchema("")
             lastSyncedValueRef.current = undefined
             setIsInitialized(true)
-            setIsDirty(false)
             return
         }
 
-        if (lastSyncedValueRef.current === sourceValue) {
+        if (lastSyncedValueRef.current === defaultValue) {
             setIsInitialized(true)
             return
         }
 
-        const parsed = parseJSONSchema(sourceValue)
+        const parsed = parseJSONSchema(defaultValue)
         if (parsed) applyParsedConfig(parsed)
 
-        setSupportsBasicMode(isSchemaCompatibleWithBasicMode(sourceValue))
-        setRawSchema(sourceValue)
-        syncFormValue(sourceValue)
+        setSupportsBasicMode(isSchemaCompatibleWithBasicMode(defaultValue))
+        setRawSchema(defaultValue)
+        syncFormValue(defaultValue)
         setIsInitialized(true)
-        setIsDirty(false)
-    }, [applyParsedConfig, normalizedDefaultValue, normalizedWatchedValue, syncFormValue])
+    }, [defaultValue, applyParsedConfig, syncFormValue])
 
     useEffect(() => {
         if (!supportsBasicMode && mode !== "advanced") {
@@ -185,7 +162,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
     // Update form when basic mode changes
     useEffect(() => {
-        if (!isInitialized || !isDirty) return
+        if (!isInitialized) return
         if (mode === "basic" && supportsBasicMode) {
             const config: SchemaConfig = {
                 responseFormat,
@@ -196,12 +173,10 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
             const schema = generateJSONSchema(config)
             const schemaString = JSON.stringify(schema, null, 2)
 
-            setRawSchema(schemaString)
             syncFormValue(schemaString)
         }
     }, [
         isInitialized,
-        isDirty,
         mode,
         responseFormat,
         includeReasoning,
@@ -245,7 +220,6 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                         const parsed = parseJSONSchema(rawSchema)
                         const config = parsed ?? getDefaultConfig()
                         applyConfigAndSync(config)
-                        setIsDirty(false)
                         setMode("basic")
                     },
                 })
@@ -255,7 +229,6 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
             const parsed = parseJSONSchema(rawSchema)
             const config = parsed ?? getDefaultConfig()
             applyConfigAndSync(config)
-            setIsDirty(false)
             setMode("basic")
             return
         }
@@ -265,19 +238,16 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
     const addCategory = () => {
         setCategories([...categories, {name: "", description: ""}])
-        setIsDirty(true)
     }
 
     const removeCategory = (index: number) => {
         setCategories(categories.filter((_, i) => i !== index))
-        setIsDirty(true)
     }
 
     const updateCategory = (index: number, field: "name" | "description", value: string) => {
         const updated = [...categories]
         updated[index][field] = value
         setCategories(updated)
-        setIsDirty(true)
     }
 
     if (mode === "advanced") {
@@ -351,10 +321,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                         <Select
                             style={{width: "100%"}}
                             value={responseFormat}
-                            onChange={(value) => {
-                                setResponseFormat(value)
-                                setIsDirty(true)
-                            }}
+                            onChange={(value) => setResponseFormat(value)}
                             options={[
                                 {label: "Boolean (True/False)", value: "boolean"},
                                 {label: "Continuous (Numeric Range)", value: "continuous"},
@@ -391,10 +358,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                                 <InputNumber
                                     style={{width: "100%"}}
                                     value={minValue}
-                                    onChange={(value) => {
-                                        setMinValue(value ?? 0)
-                                        setIsDirty(true)
-                                    }}
+                                    onChange={(value) => setMinValue(value ?? 0)}
                                 />
                             </div>
                             <div>
@@ -414,10 +378,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                                 <InputNumber
                                     style={{width: "100%"}}
                                     value={maxValue}
-                                    onChange={(value) => {
-                                        setMaxValue(value ?? 10)
-                                        setIsDirty(true)
-                                    }}
+                                    onChange={(value) => setMaxValue(value ?? 10)}
                                 />
                             </div>
                         </div>
@@ -480,10 +441,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                     <div style={{display: "flex", alignItems: "center", gap: 4}}>
                         <Checkbox
                             checked={includeReasoning}
-                            onChange={(e) => {
-                                setIncludeReasoning(e.target.checked)
-                                setIsDirty(true)
-                            }}
+                            onChange={(e) => setIncludeReasoning(e.target.checked)}
                         >
                             <Typography.Text strong>Include reasoning</Typography.Text>
                         </Checkbox>

From 840b13ad0af402b7875fc5dce5b6ab3a690520db Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Fri, 16 Jan 2026 10:10:32 +0100
Subject: [PATCH 17/20] fix: enhance schema normalization and ensure evaluator
 configuration changes persist

---
 .../JSONSchema/JSONSchemaEditor.tsx           | 118 ++++++++++--------
 1 file changed, 66 insertions(+), 52 deletions(-)

diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
index d86583516..8189ba7dc 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
@@ -34,6 +34,14 @@ interface JSONSchemaEditorProps {
     defaultValue?: string
 }
 
+const normalizeSchemaValue = (value: unknown): string | undefined => {
+    if (typeof value === "string") return value
+    if (value && typeof value === "object") {
+        return JSON.stringify(value, null, 2)
+    }
+    return undefined
+}
+
 const createDefaultCategories = (): CategoricalOption[] => [
     {name: "good", description: "The response is good"},
     {name: "bad", description: "The response is bad"},
@@ -69,19 +77,20 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
     const [categories, setCategories] = useState<CategoricalOption[]>(createDefaultCategories())
 
     // Advanced mode state
-    const [rawSchema, setRawSchema] = useState(defaultValue ?? "")
-    const [supportsBasicMode, setSupportsBasicMode] = useState<boolean>(() => {
-        if (!defaultValue) {
-            return true
-        }
-
-        return isSchemaCompatibleWithBasicMode(defaultValue)
-    })
+    const initialSchema = normalizeSchemaValue(defaultValue)
+    const [rawSchema, setRawSchema] = useState(initialSchema ?? "")
+    const [supportsBasicMode, setSupportsBasicMode] = useState<boolean>(() =>
+        initialSchema ? isSchemaCompatibleWithBasicMode(initialSchema) : true,
+    )
     const [isInitialized, setIsInitialized] = useState(false)
+    const [isDirty, setIsDirty] = useState(false)
 
     const lastSyncedValueRef = useRef<string | undefined>(undefined)
 
     const namePath = useMemo(() => (Array.isArray(name) ? name : [name]), [name])
+    const watchedValue = Form.useWatch(namePath as any, form)
+    const normalizedWatchedValue = useMemo(() => normalizeSchemaValue(watchedValue), [watchedValue])
+    const normalizedDefaultValue = useMemo(() => normalizeSchemaValue(defaultValue), [defaultValue])
 
     const applyParsedConfig = useCallback((parsed: SchemaConfig) => {
         setResponseFormat(parsed.responseFormat)
@@ -97,6 +106,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         } else {
             setCategories(createDefaultCategories())
         }
+        setIsDirty(false)
     }, [])
 
     const syncFormValue = useCallback(
@@ -110,6 +120,16 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         [form, namePath],
     )
 
+    const buildConfig = useCallback(
+        (): SchemaConfig => ({
+            responseFormat,
+            includeReasoning,
+            continuousConfig: {minimum: minValue, maximum: maxValue},
+            categoricalOptions: categories,
+        }),
+        [categories, includeReasoning, maxValue, minValue, responseFormat],
+    )
+
     const getDefaultConfig = useCallback((): SchemaConfig => {
         return {
             responseFormat: "boolean",
@@ -130,29 +150,32 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         [applyParsedConfig, syncFormValue],
     )
 
-    // Initialize from default value
+    // Initialize from form value (preferred) or default fallback.
     useEffect(() => {
-        if (!defaultValue) {
+        const sourceValue = normalizedWatchedValue ?? normalizedDefaultValue
+        if (!sourceValue) {
             setSupportsBasicMode(true)
             setRawSchema("")
             lastSyncedValueRef.current = undefined
             setIsInitialized(true)
+            setIsDirty(false)
             return
         }
 
-        if (lastSyncedValueRef.current === defaultValue) {
+        if (lastSyncedValueRef.current === sourceValue) {
             setIsInitialized(true)
             return
         }
 
-        const parsed = parseJSONSchema(defaultValue)
+        const parsed = parseJSONSchema(sourceValue)
         if (parsed) applyParsedConfig(parsed)
 
-        setSupportsBasicMode(isSchemaCompatibleWithBasicMode(defaultValue))
-        setRawSchema(defaultValue)
-        syncFormValue(defaultValue)
+        setSupportsBasicMode(isSchemaCompatibleWithBasicMode(sourceValue))
+        setRawSchema(sourceValue)
+        syncFormValue(sourceValue)
         setIsInitialized(true)
-    }, [defaultValue, applyParsedConfig, syncFormValue])
+        setIsDirty(false)
+    }, [applyParsedConfig, normalizedDefaultValue, normalizedWatchedValue, syncFormValue])
 
     useEffect(() => {
         if (!supportsBasicMode && mode !== "advanced") {
@@ -162,30 +185,15 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
     // Update form when basic mode changes
     useEffect(() => {
-        if (!isInitialized) return
+        if (!isInitialized || !isDirty) return
         if (mode === "basic" && supportsBasicMode) {
-            const config: SchemaConfig = {
-                responseFormat,
-                includeReasoning,
-                continuousConfig: {minimum: minValue, maximum: maxValue},
-                categoricalOptions: categories,
-            }
-            const schema = generateJSONSchema(config)
+            const schema = generateJSONSchema(buildConfig())
             const schemaString = JSON.stringify(schema, null, 2)
 
+            setRawSchema(schemaString)
             syncFormValue(schemaString)
         }
-    }, [
-        isInitialized,
-        mode,
-        responseFormat,
-        includeReasoning,
-        minValue,
-        maxValue,
-        categories,
-        supportsBasicMode,
-        syncFormValue,
-    ])
+    }, [isInitialized, isDirty, mode, buildConfig, supportsBasicMode, syncFormValue])
 
     const handleModeSwitch = (newMode: "basic" | "advanced") => {
         if (newMode === mode) {
@@ -193,13 +201,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
         }
 
         if (newMode === "advanced" && mode === "basic") {
-            const config: SchemaConfig = {
-                responseFormat,
-                includeReasoning,
-                continuousConfig: {minimum: minValue, maximum: maxValue},
-                categoricalOptions: categories,
-            }
-            const schema = generateJSONSchema(config)
+            const schema = generateJSONSchema(buildConfig())
             const schemaString = JSON.stringify(schema, null, 2)
             setRawSchema(schemaString)
             syncFormValue(schemaString)
@@ -220,6 +222,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                         const parsed = parseJSONSchema(rawSchema)
                         const config = parsed ?? getDefaultConfig()
                         applyConfigAndSync(config)
+                        setIsDirty(false)
                         setMode("basic")
                     },
                 })
@@ -229,6 +232,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
             const parsed = parseJSONSchema(rawSchema)
             const config = parsed ?? getDefaultConfig()
             applyConfigAndSync(config)
+            setIsDirty(false)
             setMode("basic")
             return
         }
@@ -238,16 +242,19 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
 
     const addCategory = () => {
         setCategories([...categories, {name: "", description: ""}])
+        setIsDirty(true)
     }
 
     const removeCategory = (index: number) => {
         setCategories(categories.filter((_, i) => i !== index))
+        setIsDirty(true)
     }
 
     const updateCategory = (index: number, field: "name" | "description", value: string) => {
         const updated = [...categories]
         updated[index][field] = value
         setCategories(updated)
+        setIsDirty(true)
     }
 
     if (mode === "advanced") {
@@ -274,12 +281,7 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                                 setSupportsBasicMode(
                                     value ? isSchemaCompatibleWithBasicMode(value) : false,
                                 )
-
-                                if (Array.isArray(name)) {
-                                    form.setFieldValue(name, value)
-                                } else {
-                                    form.setFieldValue([name], value)
-                                }
+                                form.setFieldValue(namePath, value)
                             }
                         }}
                         editorProps={{
@@ -321,7 +323,10 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                         <Select
                             style={{width: "100%"}}
                             value={responseFormat}
-                            onChange={(value) => setResponseFormat(value)}
+                            onChange={(value) => {
+                                setResponseFormat(value)
+                                setIsDirty(true)
+                            }}
                             options={[
                                 {label: "Boolean (True/False)", value: "boolean"},
                                 {label: "Continuous (Numeric Range)", value: "continuous"},
@@ -358,7 +363,10 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                                 <InputNumber
                                     style={{width: "100%"}}
                                     value={minValue}
-                                    onChange={(value) => setMinValue(value ?? 0)}
+                                    onChange={(value) => {
+                                        setMinValue(value ?? 0)
+                                        setIsDirty(true)
+                                    }}
                                 />
                             </div>
                             <div>
@@ -378,7 +386,10 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                                 <InputNumber
                                     style={{width: "100%"}}
                                     value={maxValue}
-                                    onChange={(value) => setMaxValue(value ?? 10)}
+                                    onChange={(value) => {
+                                        setMaxValue(value ?? 10)
+                                        setIsDirty(true)
+                                    }}
                                 />
                             </div>
                         </div>
@@ -441,7 +452,10 @@ export const JSONSchemaEditor: React.FC<JSONSchemaEditorProps> = ({form, name, d
                     <div style={{display: "flex", alignItems: "center", gap: 4}}>
                         <Checkbox
                             checked={includeReasoning}
-                            onChange={(e) => setIncludeReasoning(e.target.checked)}
+                            onChange={(e) => {
+                                setIncludeReasoning(e.target.checked)
+                                setIsDirty(true)
+                            }}
                         >
                             <Typography.Text strong>Include reasoning</Typography.Text>
                         </Checkbox>

From 4b4dfe0823fd9e4750aedbaeb8f5433ebe0406f3 Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Fri, 16 Jan 2026 11:19:07 +0100
Subject: [PATCH 18/20] feat: enable selection mode for LoadTestsetModal and
 TestsetPreviewPanel components

---
 .../assets/LoadTestsetModalContent/index.tsx  |  4 +--
 .../Modals/LoadTestsetModal/assets/types.ts   |  2 ++
 .../components/TestsetPreviewPanel.tsx        | 27 ++++++++++++++++---
 .../Modals/LoadTestsetModal/index.tsx         |  4 +--
 .../components/TestcasesTableShell.tsx        |  5 ++++
 .../ConfigureEvaluator/DebugSection.tsx       |  1 +
 6 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/assets/LoadTestsetModalContent/index.tsx b/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/assets/LoadTestsetModalContent/index.tsx
index 08bc34b10..1efb4b713 100644
--- a/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/assets/LoadTestsetModalContent/index.tsx
+++ b/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/assets/LoadTestsetModalContent/index.tsx
@@ -23,7 +23,7 @@ const NoResultsFound = dynamic(
     },
 )
 
-const LoadTestsetModalContent = ({modalProps}: LoadTestsetModalContentProps) => {
+const LoadTestsetModalContent = ({modalProps, selectionMode}: LoadTestsetModalContentProps) => {
     const projectId = useAtomValue(projectIdAtom)
     const isCreatingNew = useAtomValue(isCreatingNewTestsetAtom)
     const router = useRouter()
@@ -80,7 +80,7 @@ const LoadTestsetModalContent = ({modalProps}: LoadTestsetModalContentProps) =>
                 <Divider orientation="vertical" className="m-0 h-full" />
 
                 <div className="w-full h-full flex flex-col gap-4 grow min-h-0 overflow-hidden">
-                    <TestsetPreviewPanel />
+                    <TestsetPreviewPanel selectionMode={selectionMode} />
                 </div>
             </section>
         </div>
diff --git a/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/assets/types.ts b/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/assets/types.ts
index f71c8eb9c..1dac3f1c1 100644
--- a/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/assets/types.ts
+++ b/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/assets/types.ts
@@ -9,6 +9,7 @@ export interface LoadTestsetSelectionPayload {
 
 export interface LoadTestsetModalProps extends ModalProps {
     setTestsetData: (payload: LoadTestsetSelectionPayload | null) => void
+    selectionMode?: "single" | "multiple"
 }
 
 /**
@@ -17,6 +18,7 @@ export interface LoadTestsetModalProps extends ModalProps {
  */
 export interface LoadTestsetModalContentProps {
     modalProps: ModalProps
+    selectionMode?: "single" | "multiple"
 }
 
 export interface LoadTestsetModalFooterProps {
diff --git a/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/components/TestsetPreviewPanel.tsx b/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/components/TestsetPreviewPanel.tsx
index bb0fb0b18..b9e58d137 100644
--- a/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/components/TestsetPreviewPanel.tsx
+++ b/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/components/TestsetPreviewPanel.tsx
@@ -22,10 +22,12 @@ const TestcasesTablePreview = ({
     revisionId,
     isCreateMode = false,
     showActions = false,
+    selectionMode = "multiple",
 }: {
     revisionId: string
     isCreateMode?: boolean
     showActions?: boolean
+    selectionMode?: "single" | "multiple"
 }) => {
     const [selectedRowKeys, setSelectedRowKeys] = useAtom(selectedTestcaseRowKeysAtom)
     const table = useTestcasesTable({revisionId, mode: isCreateMode ? "edit" : "view"})
@@ -43,6 +45,9 @@ const TestcasesTablePreview = ({
             const key = record?.key
             if (key === undefined || key === null) return
             setSelectedRowKeys((prev) => {
+                if (selectionMode === "single") {
+                    return [key]
+                }
                 const exists = prev.includes(key)
                 if (exists) {
                     return prev.filter((k) => k !== key)
@@ -54,7 +59,19 @@ const TestcasesTablePreview = ({
                 setEditingTestcaseId(recordId)
             }
         },
-        [setSelectedRowKeys, showActions],
+        [selectionMode, setSelectedRowKeys, showActions],
+    )
+
+    const handleSelectedRowKeysChange = useCallback(
+        (keys: React.Key[]) => {
+            if (selectionMode === "single") {
+                const nextKey = keys[keys.length - 1]
+                setSelectedRowKeys(nextKey !== undefined ? [nextKey] : [])
+                return
+            }
+            setSelectedRowKeys(keys)
+        },
+        [selectionMode, setSelectedRowKeys],
     )
 
     const handleAddRow = useCallback(() => {
@@ -111,7 +128,7 @@ const TestcasesTablePreview = ({
                     table={table}
                     rowHeight={rowHeight}
                     selectedRowKeys={selectedRowKeys}
-                    onSelectedRowKeysChange={setSelectedRowKeys}
+                    onSelectedRowKeysChange={handleSelectedRowKeysChange}
                     onRowClick={handleRowClick}
                     onDeleteSelected={handleDeleteSelected}
                     searchTerm={table.searchTerm}
@@ -120,6 +137,7 @@ const TestcasesTablePreview = ({
                     actions={actionsNode}
                     hideControls={false}
                     enableSelection
+                    selectionType={selectionMode === "single" ? "radio" : "checkbox"}
                     autoHeight
                     disableDeleteAction={!showActions}
                     onAddColumn={showActions ? () => setIsAddColumnModalOpen(true) : undefined}
@@ -172,7 +190,9 @@ const TestcasesTablePreview = ({
     )
 }
 
-export const TestsetPreviewPanel: React.FC = () => {
+export const TestsetPreviewPanel: React.FC<{selectionMode?: "single" | "multiple"}> = ({
+    selectionMode = "multiple",
+}) => {
     const selectedRevisionId = useAtomValue(selectedRevisionIdAtom)
     const isCreatingNew = useAtomValue(isCreatingNewTestsetAtom)
 
@@ -182,6 +202,7 @@ export const TestsetPreviewPanel: React.FC = () => {
                 revisionId={selectedRevisionId}
                 isCreateMode={isCreatingNew}
                 showActions={isCreatingNew}
+                selectionMode={selectionMode}
             />
         )
     }
diff --git a/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/index.tsx b/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/index.tsx
index 6bc171fcd..23edcc135 100644
--- a/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/index.tsx
+++ b/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/index.tsx
@@ -23,7 +23,7 @@ const LoadTestsetModalContent = dynamic(() => import("./assets/LoadTestsetModalC
 })
 
 const LoadTestsetModal: React.FC<LoadTestsetModalProps> = ({setTestsetData, ...props}) => {
-    const {onCancel, afterClose, ...modalProps} = props
+    const {onCancel, afterClose, selectionMode = "multiple", ...modalProps} = props
 
     // Use atoms for all modal state
     const selectedRevisionId = useAtomValue(selectedRevisionIdAtom)
@@ -83,7 +83,7 @@ const LoadTestsetModal: React.FC<LoadTestsetModalProps> = ({setTestsetData, ...p
             }}
             {...modalProps}
         >
-            <LoadTestsetModalContent modalProps={modalProps} />
+            <LoadTestsetModalContent modalProps={modalProps} selectionMode={selectionMode} />
         </EnhancedModal>
     )
 }
diff --git a/web/oss/src/components/TestcasesTableNew/components/TestcasesTableShell.tsx b/web/oss/src/components/TestcasesTableNew/components/TestcasesTableShell.tsx
index f4d580eb8..bc92ae11e 100644
--- a/web/oss/src/components/TestcasesTableNew/components/TestcasesTableShell.tsx
+++ b/web/oss/src/components/TestcasesTableNew/components/TestcasesTableShell.tsx
@@ -47,6 +47,8 @@ export interface TestcasesTableShellProps {
     onSearchChange: (term: string) => void
     header: React.ReactNode
     actions: React.ReactNode
+    /** Checkbox (default) or radio selection */
+    selectionType?: "checkbox" | "radio"
     hideControls?: boolean
     enableSelection?: boolean
     autoHeight?: boolean
@@ -89,6 +91,7 @@ export function TestcasesTableShell(props: TestcasesTableShellProps) {
         onSearchChange,
         header,
         actions,
+        selectionType = "checkbox",
         hideControls = false,
         enableSelection = mode !== "view",
         autoHeight = true,
@@ -135,6 +138,8 @@ export function TestcasesTableShell(props: TestcasesTableShellProps) {
                 ? {
                       selectedRowKeys: showRowIndex ? [] : selectedRowKeys,
                       onChange: showRowIndex ? undefined : onSelectedRowKeysChange,
+                      type: selectionType,
+                      hideSelectAll: selectionType === "radio",
                       columnWidth: 48,
                       fixed: "left" as const,
                       columnTitle: showRowIndex ? (
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx
index f75d653c1..5de982d58 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx
@@ -1136,6 +1136,7 @@ const DebugSection = () => {
                 open={openTestcaseModal}
                 onCancel={() => setOpenTestcaseModal(false)}
                 setTestsetData={handleEvaluatorTestsetData}
+                selectionMode="single"
             />
         </div>
     )

From 780011763b5a7a5b3deb4e7c6ca1efec1a74093c Mon Sep 17 00:00:00 2001
From: Mahmoud Mabrouk <mahmoud@agenta.ai>
Date: Fri, 16 Jan 2026 11:30:40 +0100
Subject: [PATCH 19/20] fix(api): remove legacy testset endpoints

- Remove legacy router mount and testset_router.py
- Remove dead frontend code (updateTestset, uploadTestsets, importTestsetsViaEndpoint, fetchTestset)
- Remove fetchAllComparisonResults and testsetCsvDataQueryAtomFamily (dead code)
- Update E2E tests to use preview endpoints
- Remove legacy testset tests from api and sdk
- Update manual test to use preview endpoint

Legacy endpoints removed:
- POST /api/testsets/upload
- POST /api/testsets/
- PUT /api/testsets/{id}
- GET /api/testsets/
- GET /api/testsets/{id}
- DELETE /api/testsets/

All testset operations now use preview endpoints:
- POST /api/preview/simple/testsets/
- GET /api/preview/simple/testsets/{id}
- PUT /api/preview/simple/testsets/{id}
- POST /api/preview/testsets/query
- POST /api/preview/simple/testsets/upload
---
 api/entrypoints/routers.py                    |   7 -
 api/oss/src/routers/evaluation_router.py      |   3 -
 api/oss/src/routers/testset_router.py         | 471 ------------
 .../test_variant_testset_router.py            | 148 ----
 api/oss/tests/legacy/testsets/__init__.py     |   0
 .../legacy/testsets/assets/baby_names.csv     |  31 -
 api/oss/tests/legacy/testsets/fixtures.py     |  28 -
 api/oss/tests/legacy/testsets/tests.py        | 703 ------------------
 .../manual/testsets/test_testset_limits.py    |  11 +-
 .../tests/pytest/testsets/legacy/__init__.py  |   0
 .../testsets/legacy/test_testsets_basics.py   | 284 -------
 .../pytest/testsets/test_testsets_jit.py      | 165 ----
 .../legacy/new_tests/testsets/__init__.py     |   0
 .../new_tests/testsets/assets/baby_names.csv  |  31 -
 .../legacy/new_tests/testsets/fixtures.py     |  28 -
 sdk/tests/legacy/new_tests/testsets/tests.py  | 703 ------------------
 .../LoadTestsetModal/assets/testsetCsvData.ts |  32 -
 web/oss/src/lib/Types.ts                      |   3 +-
 web/oss/src/services/evaluations/api/index.ts |  83 ---
 web/oss/src/services/testsets/api/index.ts    |  72 --
 web/oss/tests/5-testsset/index.ts             |  24 +-
 .../fixtures/base.fixture/apiHelpers/index.ts |  11 +-
 22 files changed, 33 insertions(+), 2805 deletions(-)
 delete mode 100644 api/oss/src/routers/testset_router.py
 delete mode 100644 api/oss/tests/legacy/old_tests/variants_main_router/test_variant_testset_router.py
 delete mode 100644 api/oss/tests/legacy/testsets/__init__.py
 delete mode 100644 api/oss/tests/legacy/testsets/assets/baby_names.csv
 delete mode 100644 api/oss/tests/legacy/testsets/fixtures.py
 delete mode 100644 api/oss/tests/legacy/testsets/tests.py
 delete mode 100644 api/oss/tests/pytest/testsets/legacy/__init__.py
 delete mode 100644 api/oss/tests/pytest/testsets/legacy/test_testsets_basics.py
 delete mode 100644 api/oss/tests/pytest/testsets/test_testsets_jit.py
 delete mode 100644 sdk/tests/legacy/new_tests/testsets/__init__.py
 delete mode 100644 sdk/tests/legacy/new_tests/testsets/assets/baby_names.csv
 delete mode 100644 sdk/tests/legacy/new_tests/testsets/fixtures.py
 delete mode 100644 sdk/tests/legacy/new_tests/testsets/tests.py
 delete mode 100644 web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/assets/testsetCsvData.ts

diff --git a/api/entrypoints/routers.py b/api/entrypoints/routers.py
index 631eb3596..18d8d4c89 100644
--- a/api/entrypoints/routers.py
+++ b/api/entrypoints/routers.py
@@ -96,7 +96,6 @@
     app_router,
     environment_router,
     evaluators_router,
-    testset_router,
     user_profile,
     variants_router,
     configs_router,
@@ -556,12 +555,6 @@ async def lifespan(*args, **kwargs):
     tags=["Evaluators"],
 )
 
-app.include_router(
-    testset_router.router,
-    prefix="/testsets",
-    tags=["Testsets"],
-)
-
 app.include_router(
     environment_router.router,
     prefix="/environments",
diff --git a/api/oss/src/routers/evaluation_router.py b/api/oss/src/routers/evaluation_router.py
index 65cdc87c0..3e69ae85c 100644
--- a/api/oss/src/routers/evaluation_router.py
+++ b/api/oss/src/routers/evaluation_router.py
@@ -36,9 +36,6 @@
         NOT_ENTITLED_RESPONSE,
     )
 
-from oss.src.routers.testset_router import _validate_testset_limits
-
-
 from oss.src.apis.fastapi.evaluations.models import EvaluationRunsResponse
 
 
diff --git a/api/oss/src/routers/testset_router.py b/api/oss/src/routers/testset_router.py
deleted file mode 100644
index 4853dce04..000000000
--- a/api/oss/src/routers/testset_router.py
+++ /dev/null
@@ -1,471 +0,0 @@
-import io
-import os
-import csv
-import sys
-import json
-import httpx
-from pathlib import Path
-from typing import Optional, List, Dict, Any
-from datetime import datetime, timezone
-
-from pydantic import ValidationError
-from fastapi.responses import JSONResponse
-from fastapi import HTTPException, UploadFile, File, Form, Request, Query
-
-from oss.src.utils.logging import get_module_logger
-from oss.src.services import db_manager
-from oss.src.utils.common import APIRouter, is_ee
-from oss.src.models.converters import testset_db_to_pydantic
-
-from oss.src.utils.common import is_uuid7
-from oss.src.models.api.testset_model import (
-    NewTestset,
-    DeleteTestsets,
-    TestsetSimpleResponse,
-    TestsetOutputResponse,
-)
-
-PARENT_DIRECTORY = Path(__file__).parent
-ASSETS_DIRECTORY = os.path.join(str(PARENT_DIRECTORY), "/resources/default_testsets")
-
-if is_ee():
-    from ee.src.utils.permissions import (
-        check_action_access,
-    )  # noqa pylint: disable-all
-    from ee.src.models.shared_models import (
-        Permission,
-    )  # noqa pylint: disable-all
-
-
-router = APIRouter()
-
-log = get_module_logger(__name__)
-
-upload_folder = "./path/to/upload/folder"
-
-
-def _infer_columns_from_csvdata(csvdata: Any) -> List[str]:
-    if not isinstance(csvdata, list) or len(csvdata) == 0:
-        return []
-
-    first_row = csvdata[0]
-    if not isinstance(first_row, dict):
-        return []
-
-    data_section = first_row.get("data")
-    if isinstance(data_section, dict):
-        return [str(key) for key in data_section.keys()]
-
-    return [str(key) for key in first_row.keys()]
-
-
-def _infer_columns_from_revision_data(revision_data: Any) -> List[str]:
-    testcases = getattr(revision_data, "testcases", None)
-    if not isinstance(testcases, list) or len(testcases) == 0:
-        return []
-
-    first_case = testcases[0]
-    data_section = None
-    if hasattr(first_case, "data"):
-        data_section = getattr(first_case, "data")
-    elif isinstance(first_case, dict):
-        maybe_data = first_case.get("data")
-        data_section = maybe_data if isinstance(maybe_data, dict) else first_case
-
-    if isinstance(data_section, dict) and data_section:
-        return [str(key) for key in data_section.keys()]
-
-    return []
-
-
-TESTSETS_COUNT_LIMIT = 10 * 1_000  # 10,000 testcases per testset
-TESTSETS_SIZE_LIMIT = 10 * 1024 * 1024  # 10 MB per testset
-TESTSETS_FIELD_SIZE_LIMIT = 10 * 1024 * 1024  # 10 MB per CSV field
-
-csv.field_size_limit(TESTSETS_FIELD_SIZE_LIMIT)
-
-TESTSETS_COUNT_WARNING = f"Testset exceeds the maximum count of {TESTSETS_COUNT_LIMIT} testcases per testset."
-TESTSETS_SIZE_WARNING = f"Testset exceeds the maximum size of {TESTSETS_SIZE_LIMIT // (1024 * 1024)} MB per testset."
-
-TESTSETS_SIZE_EXCEPTION = HTTPException(
-    status_code=400,
-    detail=TESTSETS_SIZE_WARNING,
-)
-
-TESTSETS_COUNT_EXCEPTION = HTTPException(
-    status_code=400,
-    detail=TESTSETS_COUNT_WARNING,
-)
-
-
-def _validate_testset_limits(rows: List[dict]) -> tuple[int, int]:
-    i = -1
-    total_size = 2
-    for i, row in enumerate(rows):
-        row_str = json.dumps(row)
-        total_size += len(row_str.encode("utf-8"))
-        if i > 0:
-            total_size += 1
-        if i + 1 > TESTSETS_COUNT_LIMIT:
-            log.error(TESTSETS_COUNT_WARNING)
-            raise TESTSETS_COUNT_EXCEPTION
-        if total_size > TESTSETS_SIZE_LIMIT:
-            log.error(TESTSETS_SIZE_WARNING)
-            raise TESTSETS_SIZE_EXCEPTION
-    return i + 1, total_size
-
-
-@router.post(
-    "/upload", response_model=TestsetSimpleResponse, operation_id="upload_file"
-)
-async def upload_file(
-    request: Request,
-    upload_type: str = Form(None),
-    file: UploadFile = File(...),
-    testset_name: Optional[str] = File(None),
-):
-    """
-    Uploads a CSV or JSON file and saves its data to Postgres.
-
-    Args:
-    upload_type : Either a json or csv file.
-        file (UploadFile): The CSV or JSON file to upload.
-        testset_name (Optional): the name of the testset if provided.
-
-    Returns:
-        dict: The result of the upload process.
-    """
-
-    if is_ee():
-        has_permission = await check_action_access(
-            user_uid=request.state.user_id,
-            project_id=request.state.project_id,
-            permission=Permission.CREATE_TESTSET,
-        )
-        if not has_permission:
-            error_msg = f"You do not have permission to perform this action. Please contact your organization admin."
-            log.error(error_msg)
-            return JSONResponse(
-                {"detail": error_msg},
-                status_code=403,
-            )
-
-    if file.size > TESTSETS_SIZE_LIMIT:  # Preemptively check file size
-        raise TESTSETS_SIZE_EXCEPTION
-
-    # Create a document
-    document = {
-        "name": testset_name if testset_name else file.filename,
-        "csvdata": [],
-    }
-
-    if upload_type.upper() == "JSON":
-        # Read and parse the JSON file
-        json_data = await file.read()
-        json_text = json_data.decode("utf-8")
-        json_object = json.loads(json_text)
-
-        # Populate the document with column names and values
-        for i, row in enumerate(json_object):
-            document["csvdata"].append(row)
-
-    elif upload_type.upper() == "CSV" or upload_type is None:
-        # Read and parse the CSV file
-        csv_data = await file.read()
-        csv_text = csv_data.decode("utf-8")
-
-        # Use StringIO to create a file-like object from the string
-        csv_file_like_object = io.StringIO(csv_text)
-        csv_reader = csv.DictReader(csv_file_like_object)
-
-        # Populate the document with rows from the CSV reader
-        for i, row in enumerate(csv_reader):
-            document["csvdata"].append(row)
-
-    else:
-        log.error(f"Unsupported upload type: {upload_type}")
-        raise HTTPException(status_code=400, detail="Unsupported upload type")
-
-    _validate_testset_limits(document["csvdata"])
-
-    try:
-        testset = await db_manager.create_testset(
-            project_id=request.state.project_id,
-            #
-            testset_data=document,
-        )
-        return TestsetSimpleResponse(
-            id=str(testset.id),
-            name=document["name"],
-            created_at=str(testset.created_at),
-        )
-    except ValidationError as e:
-        raise HTTPException(status_code=403, detail=e.errors())
-
-
-@router.post(
-    "/", response_model=TestsetSimpleResponse, operation_id="create_legacy_testset"
-)
-async def create_testset(
-    csvdata: NewTestset,
-    request: Request,
-    #
-    testset_id: Optional[str] = None,
-):
-    """
-    Create a testset with given name, save the testset to Postgres.
-
-    Args:
-    name (str): name of the testset.
-    testset (Dict[str, str]): testset data.
-
-    Returns:
-    str: The id of the testset created.
-    """
-
-    if is_ee():
-        has_permission = await check_action_access(
-            user_uid=request.state.user_id,
-            project_id=request.state.project_id,
-            permission=Permission.CREATE_TESTSET,
-        )
-        if not has_permission:
-            error_msg = f"You do not have permission to perform this action. Please contact your organization admin."
-            log.error(error_msg)
-            return JSONResponse(
-                {"detail": error_msg},
-                status_code=403,
-            )
-
-    if testset_id is not None and not is_uuid7(testset_id):
-        raise HTTPException(
-            status_code=400, detail="Invalid testset_id format. Must be UUIDv7."
-        )
-
-    _validate_testset_limits(csvdata.csvdata)
-
-    testset_data = {
-        "name": csvdata.name,
-        "csvdata": csvdata.csvdata,
-    }
-    testset_instance = await db_manager.create_testset(
-        project_id=request.state.project_id,
-        #
-        testset_data=testset_data,
-        #
-        testset_id=testset_id,
-    )
-    if testset_instance is not None:
-        return TestsetSimpleResponse(
-            id=str(testset_instance.id),
-            name=testset_instance.name,  # type: ignore
-            created_at=str(testset_instance.created_at),
-        )
-
-
-@router.put("/{testset_id}", operation_id="update_testset")
-async def update_testset(
-    testset_id: str,
-    csvdata: NewTestset,
-    request: Request,
-):
-    """
-    Update a testset with given id, update the testset in Postgres.
-
-    Args:
-    testset_id (str): id of the testset to be updated.
-    csvdata (NewTestset): New data to replace the old testset.
-
-    Returns:
-    str: The id of the testset updated.
-    """
-
-    testset = await db_manager.fetch_testset_by_id(
-        project_id=request.state.project_id,
-        #
-        testset_id=testset_id,
-    )
-    if testset is None:
-        raise HTTPException(status_code=404, detail="testset not found")
-
-    if is_ee():
-        has_permission = await check_action_access(
-            user_uid=request.state.user_id,
-            project_id=str(testset.project_id),
-            permission=Permission.EDIT_TESTSET,
-        )
-        if not has_permission:
-            error_msg = f"You do not have permission to perform this action. Please contact your organization admin."
-            log.error(error_msg)
-            return JSONResponse(
-                {"detail": error_msg},
-                status_code=403,
-            )
-
-    _validate_testset_limits(csvdata.csvdata)
-
-    testset_update = {
-        "name": csvdata.name,
-        "csvdata": csvdata.csvdata,
-        "updated_at": datetime.now(timezone.utc),
-    }
-    await db_manager.update_testset(
-        project_id=request.state.project_id,
-        #
-        testset_id=str(testset.id),
-        #
-        values_to_update=testset_update,
-    )
-    return {
-        "status": "success",
-        "message": "testset updated successfully",
-        "_id": testset_id,
-    }
-
-
-@router.get("/", operation_id="get_testsets")
-async def get_testsets(
-    request: Request,
-    #
-    name: Optional[str] = Query(None),
-) -> List[TestsetOutputResponse]:
-    """
-    Get all testsets.
-
-    Returns:
-    - A list of testset objects.
-
-    Raises:
-    - `HTTPException` with status code 404 if no testsets are found.
-    """
-    try:
-        if is_ee():
-            has_permission = await check_action_access(
-                user_uid=request.state.user_id,
-                project_id=request.state.project_id,
-                permission=Permission.VIEW_TESTSET,
-            )
-            if not has_permission:
-                error_msg = (
-                    "You do not have permission to perform this action. "
-                    + "Please contact your organization admin."
-                )
-                log.error(error_msg)
-
-                return JSONResponse(
-                    status_code=403,
-                    content={"detail": error_msg},
-                )
-
-        testsets = await db_manager.fetch_testsets_by_project_id(
-            project_id=request.state.project_id,
-            name=name,
-        )
-
-        return [
-            TestsetOutputResponse(
-                _id=str(testset.id),  # type: ignore
-                name=testset.name,
-                created_at=str(testset.created_at),
-                updated_at=str(testset.updated_at),
-                columns=_infer_columns_from_csvdata(testset.csvdata),
-            )
-            for testset in testsets
-        ]
-
-    except Exception as e:
-        log.error(f"An error occurred: {str(e)}")
-
-        raise HTTPException(
-            status_code=500,
-            detail=str(e),
-        )
-
-
-@router.get("/{testset_id}", operation_id="get_single_testset")
-async def get_single_testset(
-    testset_id: str,
-    request: Request,
-):
-    """
-    Fetch a specific testset in Postgres.
-
-    Args:
-        testset_id (str): The id of the testset to fetch.
-
-    Returns:
-        The requested testset if found, else an HTTPException.
-    """
-
-    try:
-        testset = await db_manager.fetch_testset_by_id(
-            project_id=request.state.project_id,
-            #
-            testset_id=testset_id,
-        )
-    except Exception as exc:
-        log.error(f"An error occurred: {str(exc)}", exc_info=True)
-        status_code = exc.status_code if hasattr(exc, "status_code") else 500  # type: ignore
-        raise HTTPException(status_code=status_code, detail=str(exc))
-
-    if testset is not None:
-        if is_ee():
-            has_permission = await check_action_access(
-                user_uid=request.state.user_id,
-                project_id=str(testset.project_id),
-                permission=Permission.VIEW_TESTSET,
-            )
-            if not has_permission:
-                error_msg = f"You do not have permission to perform this action. Please contact your organization admin."
-                log.error(error_msg)
-                return JSONResponse(
-                    {"detail": error_msg},
-                    status_code=403,
-                )
-    else:
-        raise HTTPException(status_code=404, detail="testset not found")
-
-    return testset_db_to_pydantic(testset)
-
-
-@router.delete("/", response_model=List[str], operation_id="delete_testsets")
-async def delete_testsets(
-    payload: DeleteTestsets,
-    request: Request,
-):
-    """
-    Delete specific testsets based on their unique IDs.
-
-    Args:
-    testset_ids (List[str]): The unique identifiers of the testsets to delete.
-
-    Returns:
-    A list of the deleted testsets' IDs.
-    """
-
-    if is_ee():
-        for testset_id in payload.testset_ids:
-            testset = await db_manager.fetch_testset_by_id(
-                project_id=request.state.project_id,
-                #
-                testset_id=testset_id,
-            )
-            has_permission = await check_action_access(
-                user_uid=request.state.user_id,
-                project_id=str(testset.project_id),
-                permission=Permission.DELETE_TESTSET,
-            )
-            if not has_permission:
-                error_msg = f"You do not have permission to perform this action. Please contact your organization admin."
-                log.error(error_msg)
-                return JSONResponse(
-                    {"detail": error_msg},
-                    status_code=403,
-                )
-
-    await db_manager.remove_testsets(
-        project_id=request.state.project_id,
-        #
-        testset_ids=payload.testset_ids,
-    )
-    return payload.testset_ids
diff --git a/api/oss/tests/legacy/old_tests/variants_main_router/test_variant_testset_router.py b/api/oss/tests/legacy/old_tests/variants_main_router/test_variant_testset_router.py
deleted file mode 100644
index ab129bcac..000000000
--- a/api/oss/tests/legacy/old_tests/variants_main_router/test_variant_testset_router.py
+++ /dev/null
@@ -1,148 +0,0 @@
-import os
-
-import httpx
-import pytest
-from sqlalchemy.future import select
-
-from oss.src.models.db_models import (
-    AppDB,
-    TestsetDB,
-)
-
-from oss.src.dbs.postgres.shared.engine import engine
-
-# Initialize http client
-test_client = httpx.AsyncClient()
-timeout = httpx.Timeout(timeout=5, read=None, write=5)
-
-# Set global variables
-ENVIRONMENT = os.environ.get("ENVIRONMENT")
-if ENVIRONMENT == "development":
-    BACKEND_API_HOST = "http://host.docker.internal/api"
-elif ENVIRONMENT == "github":
-    BACKEND_API_HOST = "http://agenta-backend-test:8000"
-
-
-# TODO: test_csv_upload_file
-# TODO: test_json_upload_file
-
-
-@pytest.mark.asyncio
-async def test_create_testset():
-    payload = {
-        "name": "create_testset_main",
-        "csvdata": [
-            {
-                "country": "Comoros",
-                "correct_answer": "The capital of Comoros is Moroni",
-            },
-            {
-                "country": "Kyrgyzstan",
-                "correct_answer": "The capital of Kyrgyzstan is Bishkek",
-            },
-            {
-                "country": "Azerbaijan",
-                "correct_answer": "The capital of Azerbaijan is Baku",
-            },
-        ],
-    }
-    response = await test_client.post(
-        f"{BACKEND_API_HOST}/testsets",
-        json=payload,
-    )
-    assert response.status_code == 200
-    assert response.json()["name"] == payload["name"]
-
-
-@pytest.mark.asyncio
-async def test_update_testset():
-    async with engine.core_session() as session:
-        result = await session.execute(
-            select(AppDB).filter_by(app_name="app_variant_test")
-        )
-        app = result.scalars().first()
-
-        testset_result = await session.execute(
-            select(TestsetDB).filter_by(project_id=app.project_id)
-        )
-        testset = testset_result.scalars().first()
-
-        payload = {
-            "name": "update_testset",
-            "csvdata": [
-                {
-                    "country": "Comoros",
-                    "correct_answer": "The capital of Comoros is Moroni",
-                },
-                {
-                    "country": "Kyrgyzstan",
-                    "correct_answer": "The capital of Kyrgyzstan is Bishkek",
-                },
-                {
-                    "country": "Azerbaijan",
-                    "correct_answer": "The capital of Azerbaijan is Baku",
-                },
-            ],
-        }
-        response = await test_client.put(
-            f"{BACKEND_API_HOST}/testsets/{str(testset.id)}/", json=payload
-        )
-
-        assert response.status_code == 200
-        assert response.json()["_id"] == str(testset.id)
-        assert response.json()["status"] == "success"
-        assert response.json()["message"] == "testset updated successfully"
-
-
-@pytest.mark.asyncio
-async def test_get_testsets():
-    response = await test_client.get(f"{BACKEND_API_HOST}/testsets")
-
-    assert response.status_code == 200
-    assert len(response.json()) == 2
-
-
-@pytest.mark.asyncio()
-async def test_get_testset():
-    async with engine.core_session() as session:
-        result = await session.execute(
-            select(AppDB).filter_by(app_name="app_variant_test")
-        )
-        app = result.scalars().first()
-
-        testset_result = await session.execute(
-            select(TestsetDB).filter_by(project_id=app.project_id)
-        )
-        testset = testset_result.scalars().first()
-
-        response = await test_client.get(
-            f"{BACKEND_API_HOST}/testsets/{str(testset.id)}/"
-        )
-
-        assert response.status_code == 200
-        assert response.json()["name"] == testset.name
-        assert response.json()["id"] == str(testset.id)
-
-
-@pytest.mark.asyncio
-async def test_delete_testsets():
-    async with engine.core_session() as session:
-        result = await session.execute(
-            select(AppDB).filter_by(app_name="app_variant_test")
-        )
-        app = result.scalars().first()
-
-        testset_result = await session.execute(
-            select(TestsetDB).filter_by(project_id=app.project_id)
-        )
-        testsets = testset_result.scalars().all()
-
-        testset_ids = [str(testset.id) for testset in testsets]
-        payload = {"testset_ids": testset_ids}
-
-        response = await test_client.request(
-            method="DELETE", url=f"{BACKEND_API_HOST}/testsets/", json=payload
-        )
-
-        assert response.status_code == 200
-        assert response.json() == testset_ids
diff --git a/api/oss/tests/legacy/testsets/__init__.py b/api/oss/tests/legacy/testsets/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/api/oss/tests/legacy/testsets/assets/baby_names.csv b/api/oss/tests/legacy/testsets/assets/baby_names.csv
deleted file mode 100644
index 0c3508a75..000000000
--- a/api/oss/tests/legacy/testsets/assets/baby_names.csv
+++ /dev/null
@@ -1,31 +0,0 @@
-gender,country,correct_answer
-Female,Nigeria,"1. Adaeze
-2. Chidinma
-3. Amarachi
-4. Ifeoma
-5. Ngozi
-6. Yemisi
-7. Zara
-8. Chinelo
-9. Funmilayo
-10. Temitope"
-Female,Ghana,"1. Akosua
-2. Abena
-3. Adwoa
-4. Afia
-5. Ama
-6. Aku
-7. Asabea
-8. Ayodele
-9. Ekua
-10. Yaa"
-Female,Malta,"1. Maria
-2. Elena
-3. Giorgia
-4. Alessia
-5. Sofia
-6. Valentina
-7. Francesca
-8. Giulia
-9. Chiara
-10. Emilia"
\ No newline at end of file
diff --git a/api/oss/tests/legacy/testsets/fixtures.py b/api/oss/tests/legacy/testsets/fixtures.py
deleted file mode 100644
index 25c0d0ab4..000000000
--- a/api/oss/tests/legacy/testsets/fixtures.py
+++ /dev/null
@@ -1,28 +0,0 @@
-async def create_testset(client, testset_name: str, headers: dict):
-    """
-    Factory fixture to create a new testset.
-    """
-
-    response = await client.post(
-        f"testsets", json={"name": testset_name, "csvdata": []}, headers=headers
-    )
-    response.raise_for_status()
-    response_data = response.json()
-
-    return response_data
-
-
-async def delete_testset(client, testset_id: str, headers: dict):
-    """
-    Factory fixture to delete a testset.
-    """
-
-    response = await client.request(
-        "DELETE",
-        f"testsets",
-        json={"testset_ids": [testset_id]},
-        headers=headers,
-    )
-    response.raise_for_status()
-
-    return response
diff --git a/api/oss/tests/legacy/testsets/tests.py b/api/oss/tests/legacy/testsets/tests.py
deleted file mode 100644
index 68931fd20..000000000
--- a/api/oss/tests/legacy/testsets/tests.py
+++ /dev/null
@@ -1,703 +0,0 @@
-import os
-import uuid
-from pathlib import Path
-
-import pytest
-import pytest_asyncio
-
-from oss.src.tests.testsets.fixtures import *
-
-
-DATASETS_DIRECTORY = Path(__file__).parent
-ASSETS_DIRECTORY = os.path.join(str(DATASETS_DIRECTORY), "/datasets/assets")
-
-
-class TestDatasetsCreation:
-    @pytest_asyncio.fixture(autouse=True)
-    async def setup_fixture(
-        self,
-        request,
-        create_programmatic_owner_user,
-        create_programmatic_non_member_user,
-        create_programmatic_all_users,
-    ):
-        request.cls.owner_scope_response = create_programmatic_owner_user
-        request.cls.non_member_scope_response = create_programmatic_non_member_user
-        request.cls.all_members_scope_response = create_programmatic_all_users
-
-    # @pytest.mark.asyncio
-    # @pytest.mark.typical
-    # @pytest.mark.happy
-    # @pytest.mark.functional
-    # async def test_upload_file_success(self, http_client):
-    #     # Arrange
-    #     expected_status = 200
-    #     testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-    #     description = "Upload file successfully"
-    #     headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-
-    #     # Act
-    #     with open(f"{ASSETS_DIRECTORY}/baby_names.csv", "rb") as file:
-    #         files = {
-    #             "upload_type": (None, ""),
-    #             "file": (
-    #                 "baby_names.csv",
-    #                 file,
-    #                 "text/csv",
-    #             ),
-    #             "testset_name": (None, testset_name),
-    #         }
-    #         response = await http_client.put(
-    #             f"/testsets/upload", headers=headers, files=files
-    #         )
-
-    #     response.raise_for_status()
-    #     response_data = response.json()
-
-    #     # Assert
-    #     assert (
-    #         response.status_code == expected_status
-    #     ), f"Failed for case: {description}"
-    #     assert "id" in response_data, f"Failed for case: {description}"
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.functional
-    async def test_upload_file_validation_failure(self, http_client):
-        # Arrange
-        expected_status = 422
-        description = "Upload file with invalid format"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        invalid_file_data = {"csv_file": ("invalidfile.txt", b"Invalid data")}
-
-        # Act
-        response = await http_client.post(
-            "/testsets/upload", headers=headers, files=invalid_file_data
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    # @pytest.mark.asyncio
-    # @pytest.mark.typical
-    # @pytest.mark.grumpy
-    # @pytest.mark.security
-    # async def test_upload_file_non_member_access(self, http_client):
-    #     # Arrange
-    #     expected_status = 403
-    #     testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-    #     description = "Non-member tries to upload a file"
-    #     non_member_headers = {
-    #         "Authorization": self.non_member_scope_response.get("credentials", "")
-    #     }
-
-    #     # Act
-    #     # with open(f"{DATASETS_DIRECTORY}/assets/baby_names.csv", "rb") as file:
-    #     files = {
-    #         "file": open(f"{DATASETS_DIRECTORY}/assets/baby_names.csv", "rb"),
-    #         "testset_name": (None, testset_name),
-    #     }
-    #     response = await http_client.post(
-    #         "/testsets/upload", headers=non_member_headers, files=files
-    #     )
-
-    #     # Assert
-    #     assert (
-    #         response.status_code == expected_status
-    #     ), f"Failed for case: {description}"
-
-    # @pytest.mark.asyncio
-    # @pytest.mark.typical
-    # @pytest.mark.grumpy
-    # @pytest.mark.security
-    # async def test_upload_file_non_owner_access(self, http_client):
-    #     # Arrange
-    #     expected_status = 403
-    #     description = "Non-owner tries to upload a file"
-    #     non_owner_headers = {
-    #         "Authorization": self.all_members_scope_response.get("credentials", "")
-    #     }
-    #     file_data = {"file": ("testfile.csv", b"Test data")}
-
-    #     # Act
-    #     response = await http_client.post(
-    #         "/testsets/upload", headers=non_owner_headers, files=file_data
-    #     )
-
-    #     # Assert
-    #     assert (
-    #         response.status_code == expected_status
-    #     ), f"Failed for case: {description}"
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.security
-    async def test_get_testset_owner_access(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "Owner accesses testset details"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get(f"/testsets/{testset['id']}", headers=headers)
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert "id" in response.json(), f"Failed for case: {description}"
-
-        # Cleanup
-        await delete_testset(http_client, testset["id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_create_testset_success(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "Create testset successfully"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        data = {
-            "name": f"testset_{uuid.uuid4().hex[:8]}",
-            "csvdata": [
-                {
-                    "country": "Comoros",
-                    "correct_answer": "The capital of Comoros is Moroni",
-                },
-                {
-                    "country": "Kyrgyzstan",
-                    "correct_answer": "The capital of Kyrgyzstan is Bishkek",
-                },
-                {
-                    "country": "Azerbaijan",
-                    "correct_answer": "The capital of Azerbaijan is Baku",
-                },
-            ],
-        }
-
-        # Act
-        response = await http_client.post("/testsets", headers=headers, json=data)
-        response_data = response.json()
-
-        # Cleanup
-        await delete_testset(http_client, response_data["id"], headers)
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert "id" in response_data, f"Failed for case: {description}"
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.functional
-    async def test_create_testset_validation_failure(self, http_client):
-        # Arrange
-        expected_status = 422
-        description = "Create testset with invalid data"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        invalid_data = {"testset_name": ""}
-
-        # Act
-        response = await http_client.post(
-            "/testsets", headers=headers, json=invalid_data
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.security
-    async def test_create_testset_non_member_access(self, http_client):
-        # Arrange
-        expected_status = 403
-        description = "Non-member tries to create a testset"
-        owner_scope_response = self.owner_scope_response
-        non_member_api_credentials = self.non_member_scope_response.get(
-            "credentials", ""
-        )
-        non_member_headers = {"Authorization": non_member_api_credentials}
-        owner_project_id = owner_scope_response.get("project", {}).get("id")
-        data = {"name": f"testset_{uuid.uuid4().hex[:8]}", "csvdata": []}
-
-        # Act
-        response = await http_client.post(
-            f"/testsets?project_id={owner_project_id}",
-            headers=non_member_headers,
-            json=data,
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_no_element(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with no elements"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 0, f"Failed for case: {description}"
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_one_element(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with one element"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 1, f"Failed for case: {description}"
-
-        # Cleanup
-        await delete_testset(http_client, response_data[0]["_id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_many_elements_small_data(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with small dataset"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        for _ in range(3):
-            testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-            await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 3, f"Failed for case: {description}"
-
-        # Cleanup
-        for testset in response_data:
-            await delete_testset(http_client, testset["_id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.edge
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_many_elements_big_data(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with large dataset"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        for _ in range(6):
-            testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-            await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 6, f"Failed for case: {description}"
-
-        # Cleanup
-        for testset in response_data:
-            await delete_testset(http_client, testset["_id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.security
-    async def test_permissions_principal_not_in_scope(self, http_client):
-        # Arrange
-        expected_status = 403
-        description = "Access control for non-member"
-        owner_scope_response = self.owner_scope_response
-        owner_headers = {"Authorization": owner_scope_response.get("credentials", "")}
-        non_member_api_credentials = self.non_member_scope_response.get(
-            "credentials", ""
-        )
-        non_member_headers = {"Authorization": non_member_api_credentials}
-        owner_project_id = owner_scope_response.get("project", {}).get("id")
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, owner_headers)
-
-        # Act
-        response = await http_client.get(
-            f"/testsets?project_id={owner_project_id}", headers=non_member_headers
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-        # Cleanup
-        await delete_testset(http_client, testset["id"], owner_headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.security
-    async def test_permissions_allowed(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "Access control for owner"
-        owner_headers = {
-            "Authorization": self.owner_scope_response.get("credentials", "")
-        }
-
-        # Act
-        response = await http_client.get("/testsets", headers=owner_headers)
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_no_element(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with no elements"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 0, f"Failed for case: {description}"
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_one_element(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with one element"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 1, f"Failed for case: {description}"
-
-        # Cleanup
-        await delete_testset(http_client, response_data[0]["_id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_many_elements_small_data(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with small dataset"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        for _ in range(3):
-            testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-            await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 3, f"Failed for case: {description}"
-
-        # Cleanup
-        for testset in response_data:
-            await delete_testset(http_client, testset["_id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.edge
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_many_elements_big_data(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with large dataset"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        for _ in range(6):
-            testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-            await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 6, f"Failed for case: {description}"
-
-        # Cleanup
-        for testset in response_data:
-            await delete_testset(http_client, testset["_id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.security
-    async def test_permissions_principal_not_in_scope(self, http_client):
-        # Arrange
-        expected_status = 403
-        description = "Access control for non-member"
-        owner_scope_response = self.owner_scope_response
-        owner_headers = {"Authorization": owner_scope_response.get("credentials", "")}
-        non_member_api_credentials = self.non_member_scope_response.get(
-            "credentials", ""
-        )
-        non_member_headers = {"Authorization": non_member_api_credentials}
-        owner_project_id = owner_scope_response.get("project", {}).get("id")
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, owner_headers)
-
-        # Act
-        response = await http_client.get(
-            f"/testsets?project_id={owner_project_id}", headers=non_member_headers
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-        # Cleanup
-        await delete_testset(http_client, testset["id"], owner_headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.security
-    async def test_permissions_allowed(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "Access control for owner"
-        owner_headers = {
-            "Authorization": self.owner_scope_response.get("credentials", "")
-        }
-
-        # Act
-        response = await http_client.get("/testsets", headers=owner_headers)
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_update_success(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "Update testset successfully"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, headers)
-        payload = {"name": f"updated_{testset_name}", "csvdata": []}
-
-        # Act
-        response = await http_client.put(
-            f"testsets/{testset['id']}", headers=headers, json=payload
-        )
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert response_data["_id"] == testset["id"], f"Failed for case: {description}"
-
-        # Cleanup
-        await delete_testset(http_client, testset["id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.functional
-    async def test_update_validation_failure(self, http_client):
-        # Arrange
-        expected_status = 422
-        description = "Update testset with invalid data"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, headers)
-        invalid_update_data = {"test_name": ""}
-
-        # Act
-        response = await http_client.put(
-            f"testsets/{testset['id']}", headers=headers, json=invalid_update_data
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-        # Cleanup
-        await delete_testset(http_client, testset["id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.security
-    async def test_update_non_member_access(self, http_client):
-        # Arrange
-        expected_status = 403
-        description = "Non-member tries to update a testset"
-        api_credentials = self.owner_scope_response.get("credentials", "")
-        member_headers = {"Authorization": api_credentials}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, member_headers)
-        update_data = {"name": f"updated_{testset_name}", "csvdata": []}
-        non_member_headers = {
-            "Authorization": self.non_member_scope_response.get("credentials", "")
-        }
-
-        # Act
-        response = await http_client.put(
-            f"testsets/{testset['id']}", headers=non_member_headers, json=update_data
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-        # Cleanup
-        await delete_testset(http_client, testset["id"], member_headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_delete_success(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "Delete testset successfully"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.request(
-            "DELETE",
-            f"testsets",
-            headers=headers,
-            json={"testset_ids": [testset["id"]]},
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.functional
-    async def test_delete_validation_failure(self, http_client):
-        # Arrange
-        expected_status = 422
-        description = "Delete testset with invalid ID"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        invalid_testset_id = str(uuid.uuid4())
-
-        # Act
-        response = await http_client.request(
-            "DELETE",
-            f"testsets",
-            headers=headers,
-            json={"testsets_ids": [invalid_testset_id]},
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.functional
-    async def test_delete_non_existent(self, http_client):
-        # Arrange
-        expected_status = 500
-        description = "Delete testset with non-existent ID"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        nonexistent_testset_id = str(uuid.uuid4())
-
-        # Act
-        response = await http_client.request(
-            "DELETE",
-            f"testsets",
-            headers=headers,
-            json={"testset_ids": [nonexistent_testset_id]},
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
diff --git a/api/oss/tests/manual/testsets/test_testset_limits.py b/api/oss/tests/manual/testsets/test_testset_limits.py
index f67b169c3..8156fa9d5 100644
--- a/api/oss/tests/manual/testsets/test_testset_limits.py
+++ b/api/oss/tests/manual/testsets/test_testset_limits.py
@@ -1,8 +1,10 @@
 import requests
 from pathlib import Path
 
-API_URL = "http://localhost:80/api/testsets/upload"
+# Use the preview endpoint for testset upload
+API_URL = "http://localhost:80/api/preview/simple/testsets/upload"
 API_KEY = "ApiKey xxx.xxx"  # Replace with your actual key
+PROJECT_ID = "xxx"  # Replace with your actual project ID
 TESTSET_DIR = Path("testsets")
 FILES = [
     ("testset_1000.json", "testset_1000_json"),
@@ -22,10 +24,13 @@
         files = {"file": file}
         data = {
             "testset_name": testset_name,
-            "upload_type": "JSON" if file_path.suffix == ".json" else "CSV",
+            "file_type": "json" if file_path.suffix == ".json" else "csv",
         }
         response = requests.post(
-            API_URL, files=files, data=data, headers={"Authorization": API_KEY}
+            f"{API_URL}?project_id={PROJECT_ID}",
+            files=files,
+            data=data,
+            headers={"Authorization": API_KEY},
         )
     print(f"{file_path.name} → {response.status_code}")
     try:
diff --git a/api/oss/tests/pytest/testsets/legacy/__init__.py b/api/oss/tests/pytest/testsets/legacy/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/api/oss/tests/pytest/testsets/legacy/test_testsets_basics.py b/api/oss/tests/pytest/testsets/legacy/test_testsets_basics.py
deleted file mode 100644
index 17b3f10fc..000000000
--- a/api/oss/tests/pytest/testsets/legacy/test_testsets_basics.py
+++ /dev/null
@@ -1,284 +0,0 @@
-from uuid import uuid4
-from tempfile import TemporaryFile
-from csv import DictWriter
-from json import dumps
-
-
-class TestLegacyTestsetsBasics:
-    def test_create_legacy_testsets(self, authed_api):
-        # ACT ------------------------------------------------------------------
-        name = uuid4().hex
-
-        csvdata = [
-            {"column1": "data1", "column2": "data2", "column3": "data3"},
-            {"column1": "data4", "column2": "data5", "column3": "data6"},
-        ]
-
-        response = authed_api(
-            "POST",
-            "/testsets/",
-            json={
-                "name": name,
-                "csvdata": csvdata,
-            },
-        )
-        # ----------------------------------------------------------------------
-
-        # ASSERT ---------------------------------------------------------------
-        assert response.status_code == 200
-        response = response.json()
-        assert "id" in response
-        assert response["name"] == name
-        # ----------------------------------------------------------------------
-
-    def test_fetch_legacy_testsets(self, authed_api):
-        # ARRANGE --------------------------------------------------------------
-        name = uuid4().hex
-
-        csvdata = [
-            {"column1": "data1", "column2": "data2", "column3": "data3"},
-            {"column1": "data4", "column2": "data5", "column3": "data6"},
-        ]
-
-        response = authed_api(
-            "POST",
-            "/testsets/",
-            json={
-                "name": name,
-                "csvdata": csvdata,
-            },
-        )
-
-        assert response.status_code == 200
-
-        testset_id = response.json()["id"]
-        # ----------------------------------------------------------------------
-
-        # ACT ------------------------------------------------------------------
-        response = authed_api(
-            "GET",
-            f"/testsets/{testset_id}",
-        )
-        # ----------------------------------------------------------------------
-
-        # ASSERT ---------------------------------------------------------------
-        assert response.status_code == 200
-        response = response.json()
-        assert response["id"] == testset_id
-        assert response["name"] == name
-        assert response["csvdata"] == csvdata
-        # ----------------------------------------------------------------------
-
-    def test_edit_legacy_testsets(self, authed_api):
-        # ARRANGE --------------------------------------------------------------
-        name = uuid4().hex
-
-        csvdata = [
-            {"column1": "data1", "column2": "data2", "column3": "data3"},
-            {"column1": "data4", "column2": "data5", "column3": "data6"},
-        ]
-
-        response = authed_api(
-            "POST",
-            "/testsets/",
-            json={
-                "name": name,
-                "csvdata": csvdata,
-            },
-        )
-
-        assert response.status_code == 200
-
-        testset_id = response.json()["id"]
-        # ----------------------------------------------------------------------
-
-        # ACT ------------------------------------------------------------------
-        new_name = uuid4().hex
-        new_csvdata = [
-            {"column1": "data6", "column2": "data5", "column3": "data4"},
-            {"column1": "data3", "column2": "data2", "column3": "data1"},
-        ]
-
-        response = authed_api(
-            "PUT",
-            f"/testsets/{testset_id}",
-            json={
-                "name": new_name,
-                "csvdata": new_csvdata,
-            },
-        )
-        # ----------------------------------------------------------------------
-
-        # ASSERT ---------------------------------------------------------------
-        assert response.status_code == 200
-        response = response.json()
-        assert response["_id"] == testset_id
-        # ----------------------------------------------------------------------
-
-    def test_delete_legacy_testsets(self, authed_api):
-        # ARRANGE --------------------------------------------------------------
-        name = uuid4().hex
-
-        csvdata = [
-            {"column1": "data1", "column2": "data2", "column3": "data3"},
-            {"column1": "data4", "column2": "data5", "column3": "data6"},
-        ]
-
-        response = authed_api(
-            "POST",
-            "/testsets/",
-            json={
-                "name": name,
-                "csvdata": csvdata,
-            },
-        )
-
-        assert response.status_code == 200
-
-        testset_id = response.json()["id"]
-        # ----------------------------------------------------------------------
-
-        # ACT ------------------------------------------------------------------
-        response = authed_api(
-            "DELETE",
-            "/testsets/",
-            json={
-                "testset_ids": [testset_id],
-            },
-        )
-        # ----------------------------------------------------------------------
-
-        # ASSERT ---------------------------------------------------------------
-        assert response.status_code == 200
-        response = response.json()
-        # ----------------------------------------------------------------------
-
-    def test_list_legacy_testsets(self, authed_api):
-        # ARRANGE --------------------------------------------------------------
-        name1 = uuid4().hex
-        name2 = uuid4().hex
-
-        csvdata1 = [
-            {"column1": "data1", "column2": "data2", "column3": "data3"},
-            {"column1": "data4", "column2": "data5", "column3": "data6"},
-        ]
-
-        csvdata2 = [
-            {"column1": "data7", "column2": "data8", "column3": "data9"},
-            {"column1": "data10", "column2": "data11", "column3": "data12"},
-        ]
-
-        response1 = authed_api(
-            "POST",
-            "/testsets/",
-            json={
-                "name": name1,
-                "csvdata": csvdata1,
-            },
-        )
-
-        response2 = authed_api(
-            "POST",
-            "/testsets/",
-            json={
-                "name": name2,
-                "csvdata": csvdata2,
-            },
-        )
-
-        assert response1.status_code == 200
-        assert response2.status_code == 200
-
-        # ----------------------------------------------------------------------
-
-        # ACT ------------------------------------------------------------------
-        response = authed_api(
-            "GET",
-            "/testsets/",
-        )
-        # ----------------------------------------------------------------------
-
-        # ASSERT ---------------------------------------------------------------
-        assert response.status_code == 200
-        response = response.json()
-        assert len(response) >= 2
-        assert any(testset["name"] == name1 for testset in response)
-        assert any(testset["name"] == name2 for testset in response)
-        # ----------------------------------------------------------------------
-
-    def test_upload_legacy_testsets_from_csv(self, authed_api):
-        # ACT ------------------------------------------------------------------
-        name = uuid4().hex
-        file_name = uuid4().hex + ".csv"
-
-        csvdata = [
-            {"column1": "data1", "column2": "data2", "column3": "data3"},
-            {"column1": "data4", "column2": "data5", "column3": "data6"},
-        ]
-
-        with TemporaryFile("w+", newline="") as temp_file:
-            writer = DictWriter(temp_file, fieldnames=csvdata[0].keys())
-            writer.writeheader()
-            writer.writerows(csvdata)
-            temp_file.seek(0)
-
-            files = {
-                "file": (file_name, temp_file, "text/csv"),
-                "testset_name": (None, name),
-            }
-            data = {
-                "upload_type": "CSV",
-            }
-
-            response = authed_api(
-                "POST",
-                "/testsets/upload",
-                files=files,
-                data=data,
-            )
-        # ----------------------------------------------------------------------
-
-        # ASSERT ---------------------------------------------------------------
-        assert response.status_code == 200
-        response = response.json()
-        assert "id" in response
-        assert response["name"] == name
-        # ----------------------------------------------------------------------
-
-    def test_upload_legacy_testsets_from_json(self, authed_api):
-        # ACT ------------------------------------------------------------------
-        name = uuid4().hex
-        file_name = uuid4().hex + ".json"
-
-        jsondata = [
-            {"column1": "data1", "column2": "data2", "column3": "data3"},
-            {"column1": "data4", "column2": "data5", "column3": "data6"},
-        ]
-
-        # Create a temporary JSON file
-        with TemporaryFile("w+", newline="") as temp_file:
-            temp_file.write(dumps(jsondata))
-            temp_file.seek(0)
-            # Upload the JSON file
-            files = {
-                "file": (file_name, temp_file, "application/json"),
-                "testset_name": (None, name),
-            }
-            data = {
-                "upload_type": "JSON",
-            }
-
-            response = authed_api(
-                "POST",
-                "/testsets/upload",
-                files=files,
-                data=data,
-            )
-        # ----------------------------------------------------------------------
-
-        # ASSERT ---------------------------------------------------------------
-        assert response.status_code == 200
-        response = response.json()
-        assert "id" in response
-        assert response["name"] == name
-        # ----------------------------------------------------------------------
diff --git a/api/oss/tests/pytest/testsets/test_testsets_jit.py b/api/oss/tests/pytest/testsets/test_testsets_jit.py
deleted file mode 100644
index f53399908..000000000
--- a/api/oss/tests/pytest/testsets/test_testsets_jit.py
+++ /dev/null
@@ -1,165 +0,0 @@
-from uuid import uuid4
-
-
-class TestTestsetsJIT:
-    def test_transfer_testset(self, authed_api):
-        # ARRANGE --------------------------------------------------------------
-        name = uuid4().hex
-
-        csvdata = [
-            {"column1": "data1", "column2": "data2", "column3": "data3"},
-            {"column1": "data4", "column2": "data5", "column3": "data6"},
-        ]
-
-        response = authed_api(
-            "POST",
-            "/testsets/",
-            json={
-                "name": name,
-                "csvdata": csvdata,
-            },
-        )
-
-        assert response.status_code == 200
-        response = response.json()
-        assert "id" in response
-        assert response["name"] == name
-        # ----------------------------------------------------------------------
-
-        # ACT ------------------------------------------------------------------
-        testset_id = response["id"]
-        response = authed_api(
-            "POST",
-            f"/preview/simple/testsets/{testset_id}/transfer",
-        )
-        # ----------------------------------------------------------------------
-
-        # ASSERT ---------------------------------------------------------------
-        assert response.status_code == 200
-        response = response.json()
-        assert response["count"] == 1
-        assert response["testset"]["name"] == name
-        assert response["testset"]["id"] == testset_id
-
-        testcases = response["testset"]["data"]["testcases"]
-        assert len(testcases) == len(csvdata)
-        # ----------------------------------------------------------------------
-
-        # ACT ------------------------------------------------------------------
-        csvdata = [testcase["data"] for testcase in testcases][:-1] + [
-            {"column1": "data7", "column2": "data8", "column3": "data9"},
-        ]
-
-        response = authed_api(
-            "PUT",
-            f"/testsets/{testset_id}",
-            json={
-                "name": name,
-                "csvdata": csvdata,
-            },
-        )
-        # ----------------------------------------------------------------------
-
-        # ASSERT ---------------------------------------------------------------
-        assert response.status_code == 200
-        response = response.json()
-        assert response["_id"] == testset_id
-        # ----------------------------------------------------------------------
-
-        # ACT ------------------------------------------------------------------
-        response = authed_api(
-            "POST",
-            f"/preview/simple/testsets/{testset_id}/transfer",
-        )
-        # ----------------------------------------------------------------------
-
-        # ASSERT ---------------------------------------------------------------
-        assert response.status_code == 200
-        response = response.json()
-        assert response["count"] == 1
-        assert response["testset"]["name"] == name
-        assert response["testset"]["id"] == testset_id
-
-        testcases = response["testset"]["data"]["testcases"]
-        assert len(testcases) == len(csvdata)
-        # ----------------------------------------------------------------------
-
-    def test_transfer_testset_no_changes(self, authed_api):
-        # ARRANGE --------------------------------------------------------------
-        name = uuid4().hex
-
-        csvdata = [
-            {"column1": "data1", "column2": "data2", "column3": "data3"},
-            {"column1": "data4", "column2": "data5", "column3": "data6"},
-        ]
-
-        response = authed_api(
-            "POST",
-            "/testsets/",
-            json={
-                "name": name,
-                "csvdata": csvdata,
-            },
-        )
-
-        assert response.status_code == 200
-        response = response.json()
-        assert "id" in response
-        assert response["name"] == name
-        # ----------------------------------------------------------------------
-
-        # ACT ------------------------------------------------------------------
-        testset_id = response["id"]
-        response = authed_api(
-            "POST",
-            f"/preview/simple/testsets/{testset_id}/transfer",
-        )
-        # ----------------------------------------------------------------------
-
-        # ASSERT ---------------------------------------------------------------
-        assert response.status_code == 200
-        response = response.json()
-        assert response["count"] == 1
-        assert response["testset"]["name"] == name
-        assert response["testset"]["id"] == testset_id
-
-        testcases = response["testset"]["data"]["testcases"]
-        assert len(testcases) == len(csvdata)
-        # ----------------------------------------------------------------------
-
-        # ACT ------------------------------------------------------------------
-        csvdata = [testcase["data"] for testcase in testcases]
-
-        response = authed_api(
-            "PUT",
-            f"/testsets/{testset_id}",
-            json={
-                "name": name,
-                "csvdata": csvdata,
-            },
-        )
-        # ----------------------------------------------------------------------
-
-        # ASSERT ---------------------------------------------------------------
-        assert response.status_code == 200
-        response = response.json()
-        assert response["_id"] == testset_id
-        # ----------------------------------------------------------------------
-
-        # ACT ------------------------------------------------------------------
-        response = authed_api(
-            "POST",
-            f"/preview/simple/testsets/{testset_id}/transfer",
-        )
-        # ----------------------------------------------------------------------
-
-        # ASSERT ---------------------------------------------------------------
-        assert response.status_code == 200
-        response = response.json()
-        assert response["count"] == 1
-        assert response["testset"]["name"] == name
-        assert response["testset"]["id"] == testset_id
-
-        testcases = response["testset"]["data"]["testcases"]
-        assert len(testcases) == len(csvdata)
-        # ----------------------------------------------------------------------
diff --git a/sdk/tests/legacy/new_tests/testsets/__init__.py b/sdk/tests/legacy/new_tests/testsets/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/sdk/tests/legacy/new_tests/testsets/assets/baby_names.csv b/sdk/tests/legacy/new_tests/testsets/assets/baby_names.csv
deleted file mode 100644
index 0c3508a75..000000000
--- a/sdk/tests/legacy/new_tests/testsets/assets/baby_names.csv
+++ /dev/null
@@ -1,31 +0,0 @@
-gender,country,correct_answer
-Female,Nigeria,"1. Adaeze
-2. Chidinma
-3. Amarachi
-4. Ifeoma
-5. Ngozi
-6. Yemisi
-7. Zara
-8. Chinelo
-9. Funmilayo
-10. Temitope"
-Female,Ghana,"1. Akosua
-2. Abena
-3. Adwoa
-4. Afia
-5. Ama
-6. Aku
-7. Asabea
-8. Ayodele
-9. Ekua
-10. Yaa"
-Female,Malta,"1. Maria
-2. Elena
-3. Giorgia
-4. Alessia
-5. Sofia
-6. Valentina
-7. Francesca
-8. Giulia
-9. Chiara
-10. Emilia"
\ No newline at end of file
diff --git a/sdk/tests/legacy/new_tests/testsets/fixtures.py b/sdk/tests/legacy/new_tests/testsets/fixtures.py
deleted file mode 100644
index 25c0d0ab4..000000000
--- a/sdk/tests/legacy/new_tests/testsets/fixtures.py
+++ /dev/null
@@ -1,28 +0,0 @@
-async def create_testset(client, testset_name: str, headers: dict):
-    """
-    Factory fixture to create a new testset.
-    """
-
-    response = await client.post(
-        f"testsets", json={"name": testset_name, "csvdata": []}, headers=headers
-    )
-    response.raise_for_status()
-    response_data = response.json()
-
-    return response_data
-
-
-async def delete_testset(client, testset_id: str, headers: dict):
-    """
-    Factory fixture to delete a testset.
-    """
-
-    response = await client.request(
-        "DELETE",
-        f"testsets",
-        json={"testset_ids": [testset_id]},
-        headers=headers,
-    )
-    response.raise_for_status()
-
-    return response
diff --git a/sdk/tests/legacy/new_tests/testsets/tests.py b/sdk/tests/legacy/new_tests/testsets/tests.py
deleted file mode 100644
index 543bdacea..000000000
--- a/sdk/tests/legacy/new_tests/testsets/tests.py
+++ /dev/null
@@ -1,703 +0,0 @@
-import os
-import uuid
-from pathlib import Path
-
-import pytest
-import pytest_asyncio
-
-from agenta_backend.tests.testsets.fixtures import *
-
-
-DATASETS_DIRECTORY = Path(__file__).parent
-ASSETS_DIRECTORY = os.path.join(str(DATASETS_DIRECTORY), "/datasets/assets")
-
-
-class TestDatasetsCreation:
-    @pytest_asyncio.fixture(autouse=True)
-    async def setup_fixture(
-        self,
-        request,
-        create_programmatic_owner_user,
-        create_programmatic_non_member_user,
-        create_programmatic_all_users,
-    ):
-        request.cls.owner_scope_response = create_programmatic_owner_user
-        request.cls.non_member_scope_response = create_programmatic_non_member_user
-        request.cls.all_members_scope_response = create_programmatic_all_users
-
-    # @pytest.mark.asyncio
-    # @pytest.mark.typical
-    # @pytest.mark.happy
-    # @pytest.mark.functional
-    # async def test_upload_file_success(self, http_client):
-    #     # Arrange
-    #     expected_status = 200
-    #     testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-    #     description = "Upload file successfully"
-    #     headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-
-    #     # Act
-    #     with open(f"{ASSETS_DIRECTORY}/baby_names.csv", "rb") as file:
-    #         files = {
-    #             "upload_type": (None, ""),
-    #             "file": (
-    #                 "baby_names.csv",
-    #                 file,
-    #                 "text/csv",
-    #             ),
-    #             "testset_name": (None, testset_name),
-    #         }
-    #         response = await http_client.put(
-    #             f"/testsets/upload", headers=headers, files=files
-    #         )
-
-    #     response.raise_for_status()
-    #     response_data = response.json()
-
-    #     # Assert
-    #     assert (
-    #         response.status_code == expected_status
-    #     ), f"Failed for case: {description}"
-    #     assert "id" in response_data, f"Failed for case: {description}"
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.functional
-    async def test_upload_file_validation_failure(self, http_client):
-        # Arrange
-        expected_status = 422
-        description = "Upload file with invalid format"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        invalid_file_data = {"csv_file": ("invalidfile.txt", b"Invalid data")}
-
-        # Act
-        response = await http_client.post(
-            "/testsets/upload", headers=headers, files=invalid_file_data
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    # @pytest.mark.asyncio
-    # @pytest.mark.typical
-    # @pytest.mark.grumpy
-    # @pytest.mark.security
-    # async def test_upload_file_non_member_access(self, http_client):
-    #     # Arrange
-    #     expected_status = 403
-    #     testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-    #     description = "Non-member tries to upload a file"
-    #     non_member_headers = {
-    #         "Authorization": self.non_member_scope_response.get("credentials", "")
-    #     }
-
-    #     # Act
-    #     # with open(f"{DATASETS_DIRECTORY}/assets/baby_names.csv", "rb") as file:
-    #     files = {
-    #         "file": open(f"{DATASETS_DIRECTORY}/assets/baby_names.csv", "rb"),
-    #         "testset_name": (None, testset_name),
-    #     }
-    #     response = await http_client.post(
-    #         "/testsets/upload", headers=non_member_headers, files=files
-    #     )
-
-    #     # Assert
-    #     assert (
-    #         response.status_code == expected_status
-    #     ), f"Failed for case: {description}"
-
-    # @pytest.mark.asyncio
-    # @pytest.mark.typical
-    # @pytest.mark.grumpy
-    # @pytest.mark.security
-    # async def test_upload_file_non_owner_access(self, http_client):
-    #     # Arrange
-    #     expected_status = 403
-    #     description = "Non-owner tries to upload a file"
-    #     non_owner_headers = {
-    #         "Authorization": self.all_members_scope_response.get("credentials", "")
-    #     }
-    #     file_data = {"file": ("testfile.csv", b"Test data")}
-
-    #     # Act
-    #     response = await http_client.post(
-    #         "/testsets/upload", headers=non_owner_headers, files=file_data
-    #     )
-
-    #     # Assert
-    #     assert (
-    #         response.status_code == expected_status
-    #     ), f"Failed for case: {description}"
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.security
-    async def test_get_testset_owner_access(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "Owner accesses testset details"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get(f"/testsets/{testset['id']}", headers=headers)
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert "id" in response.json(), f"Failed for case: {description}"
-
-        # Cleanup
-        await delete_testset(http_client, testset["id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_create_testset_success(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "Create testset successfully"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        data = {
-            "name": f"testset_{uuid.uuid4().hex[:8]}",
-            "csvdata": [
-                {
-                    "country": "Comoros",
-                    "correct_answer": "The capital of Comoros is Moroni",
-                },
-                {
-                    "country": "Kyrgyzstan",
-                    "correct_answer": "The capital of Kyrgyzstan is Bishkek",
-                },
-                {
-                    "country": "Azerbaijan",
-                    "correct_answer": "The capital of Azerbaijan is Baku",
-                },
-            ],
-        }
-
-        # Act
-        response = await http_client.post("/testsets", headers=headers, json=data)
-        response_data = response.json()
-
-        # Cleanup
-        await delete_testset(http_client, response_data["id"], headers)
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert "id" in response_data, f"Failed for case: {description}"
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.functional
-    async def test_create_testset_validation_failure(self, http_client):
-        # Arrange
-        expected_status = 422
-        description = "Create testset with invalid data"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        invalid_data = {"testset_name": ""}
-
-        # Act
-        response = await http_client.post(
-            "/testsets", headers=headers, json=invalid_data
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.security
-    async def test_create_testset_non_member_access(self, http_client):
-        # Arrange
-        expected_status = 403
-        description = "Non-member tries to create a testset"
-        owner_scope_response = self.owner_scope_response
-        non_member_api_credentials = self.non_member_scope_response.get(
-            "credentials", ""
-        )
-        non_member_headers = {"Authorization": non_member_api_credentials}
-        owner_project_id = owner_scope_response.get("project", {}).get("id")
-        data = {"name": f"testset_{uuid.uuid4().hex[:8]}", "csvdata": []}
-
-        # Act
-        response = await http_client.post(
-            f"/testsets?project_id={owner_project_id}",
-            headers=non_member_headers,
-            json=data,
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_no_element(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with no elements"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 0, f"Failed for case: {description}"
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_one_element(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with one element"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 1, f"Failed for case: {description}"
-
-        # Cleanup
-        await delete_testset(http_client, response_data[0]["_id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_many_elements_small_data(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with small dataset"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        for _ in range(3):
-            testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-            await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 3, f"Failed for case: {description}"
-
-        # Cleanup
-        for testset in response_data:
-            await delete_testset(http_client, testset["_id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.edge
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_many_elements_big_data(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with large dataset"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        for _ in range(6):
-            testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-            await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 6, f"Failed for case: {description}"
-
-        # Cleanup
-        for testset in response_data:
-            await delete_testset(http_client, testset["_id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.security
-    async def test_permissions_principal_not_in_scope(self, http_client):
-        # Arrange
-        expected_status = 403
-        description = "Access control for non-member"
-        owner_scope_response = self.owner_scope_response
-        owner_headers = {"Authorization": owner_scope_response.get("credentials", "")}
-        non_member_api_credentials = self.non_member_scope_response.get(
-            "credentials", ""
-        )
-        non_member_headers = {"Authorization": non_member_api_credentials}
-        owner_project_id = owner_scope_response.get("project", {}).get("id")
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, owner_headers)
-
-        # Act
-        response = await http_client.get(
-            f"/testsets?project_id={owner_project_id}", headers=non_member_headers
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-        # Cleanup
-        await delete_testset(http_client, testset["id"], owner_headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.security
-    async def test_permissions_allowed(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "Access control for owner"
-        owner_headers = {
-            "Authorization": self.owner_scope_response.get("credentials", "")
-        }
-
-        # Act
-        response = await http_client.get("/testsets", headers=owner_headers)
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_no_element(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with no elements"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 0, f"Failed for case: {description}"
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_one_element(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with one element"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 1, f"Failed for case: {description}"
-
-        # Cleanup
-        await delete_testset(http_client, response_data[0]["_id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_many_elements_small_data(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with small dataset"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        for _ in range(3):
-            testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-            await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 3, f"Failed for case: {description}"
-
-        # Cleanup
-        for testset in response_data:
-            await delete_testset(http_client, testset["_id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.edge
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_many_elements_big_data(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "List testsets with large dataset"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        for _ in range(6):
-            testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-            await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.get("/testsets", headers=headers)
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert len(response_data) == 6, f"Failed for case: {description}"
-
-        # Cleanup
-        for testset in response_data:
-            await delete_testset(http_client, testset["_id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.security
-    async def test_permissions_principal_not_in_scope(self, http_client):
-        # Arrange
-        expected_status = 403
-        description = "Access control for non-member"
-        owner_scope_response = self.owner_scope_response
-        owner_headers = {"Authorization": owner_scope_response.get("credentials", "")}
-        non_member_api_credentials = self.non_member_scope_response.get(
-            "credentials", ""
-        )
-        non_member_headers = {"Authorization": non_member_api_credentials}
-        owner_project_id = owner_scope_response.get("project", {}).get("id")
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, owner_headers)
-
-        # Act
-        response = await http_client.get(
-            f"/testsets?project_id={owner_project_id}", headers=non_member_headers
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-        # Cleanup
-        await delete_testset(http_client, testset["id"], owner_headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.security
-    async def test_permissions_allowed(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "Access control for owner"
-        owner_headers = {
-            "Authorization": self.owner_scope_response.get("credentials", "")
-        }
-
-        # Act
-        response = await http_client.get("/testsets", headers=owner_headers)
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_update_success(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "Update testset successfully"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, headers)
-        payload = {"name": f"updated_{testset_name}", "csvdata": []}
-
-        # Act
-        response = await http_client.put(
-            f"testsets/{testset['id']}", headers=headers, json=payload
-        )
-        response_data = response.json()
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-        assert response_data["_id"] == testset["id"], f"Failed for case: {description}"
-
-        # Cleanup
-        await delete_testset(http_client, testset["id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.functional
-    async def test_update_validation_failure(self, http_client):
-        # Arrange
-        expected_status = 422
-        description = "Update testset with invalid data"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, headers)
-        invalid_update_data = {"test_name": ""}
-
-        # Act
-        response = await http_client.put(
-            f"testsets/{testset['id']}", headers=headers, json=invalid_update_data
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-        # Cleanup
-        await delete_testset(http_client, testset["id"], headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.security
-    async def test_update_non_member_access(self, http_client):
-        # Arrange
-        expected_status = 403
-        description = "Non-member tries to update a testset"
-        api_credentials = self.owner_scope_response.get("credentials", "")
-        member_headers = {"Authorization": api_credentials}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, member_headers)
-        update_data = {"name": f"updated_{testset_name}", "csvdata": []}
-        non_member_headers = {
-            "Authorization": self.non_member_scope_response.get("credentials", "")
-        }
-
-        # Act
-        response = await http_client.put(
-            f"testsets/{testset['id']}", headers=non_member_headers, json=update_data
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-        # Cleanup
-        await delete_testset(http_client, testset["id"], member_headers)
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.happy
-    @pytest.mark.functional
-    async def test_delete_success(self, http_client):
-        # Arrange
-        expected_status = 200
-        description = "Delete testset successfully"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        testset_name = f"testset_{uuid.uuid4().hex[:8]}"
-        testset = await create_testset(http_client, testset_name, headers)
-
-        # Act
-        response = await http_client.request(
-            "DELETE",
-            f"testsets",
-            headers=headers,
-            json={"testset_ids": [testset["id"]]},
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.functional
-    async def test_delete_validation_failure(self, http_client):
-        # Arrange
-        expected_status = 422
-        description = "Delete testset with invalid ID"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        invalid_testset_id = str(uuid.uuid4())
-
-        # Act
-        response = await http_client.request(
-            "DELETE",
-            f"testsets",
-            headers=headers,
-            json={"testsets_ids": [invalid_testset_id]},
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
-
-    @pytest.mark.asyncio
-    @pytest.mark.typical
-    @pytest.mark.grumpy
-    @pytest.mark.functional
-    async def test_delete_non_existent(self, http_client):
-        # Arrange
-        expected_status = 500
-        description = "Delete testset with non-existent ID"
-        headers = {"Authorization": self.owner_scope_response.get("credentials", "")}
-        nonexistent_testset_id = str(uuid.uuid4())
-
-        # Act
-        response = await http_client.request(
-            "DELETE",
-            f"testsets",
-            headers=headers,
-            json={"testset_ids": [nonexistent_testset_id]},
-        )
-
-        # Assert
-        assert response.status_code == expected_status, (
-            f"Failed for case: {description}"
-        )
diff --git a/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/assets/testsetCsvData.ts b/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/assets/testsetCsvData.ts
deleted file mode 100644
index 41e8ec892..000000000
--- a/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/assets/testsetCsvData.ts
+++ /dev/null
@@ -1,32 +0,0 @@
-import {atomFamily} from "jotai/utils"
-import {atomWithQuery} from "jotai-tanstack-query"
-
-import {Testset} from "@/oss/lib/Types"
-import {fetchTestset} from "@/oss/services/testsets/api"
-
-export interface TestsetCsvParams {
-    testsetId?: string
-    enabled?: boolean
-}
-
-/**
- * Atom family to fetch CSV data for a given testset ID.
- * Returns the raw csvdata array from the testset response.
- */
-export const testsetCsvDataQueryAtomFamily = atomFamily((params: TestsetCsvParams) =>
-    atomWithQuery<Testset["csvdata"]>((get) => {
-        const {testsetId, enabled = true} = params || {}
-        return {
-            queryKey: ["testsetCsvData", testsetId],
-            queryFn: async () => {
-                if (!testsetId) return []
-                const data = await fetchTestset(testsetId)
-                return data.csvdata || []
-            },
-            enabled: !!testsetId && enabled,
-            staleTime: 1000 * 60 * 2, // 2 minutes
-            refetchOnWindowFocus: false,
-            refetchOnReconnect: true,
-        }
-    }),
-)
diff --git a/web/oss/src/lib/Types.ts b/web/oss/src/lib/Types.ts
index cfcb4ee49..5bf49ea25 100644
--- a/web/oss/src/lib/Types.ts
+++ b/web/oss/src/lib/Types.ts
@@ -55,7 +55,8 @@ export interface Workspace {
 export type JSSTheme = GlobalToken & {isDark: boolean; fontWeightMedium: number}
 
 export interface testset {
-    _id: string
+    _id?: string
+    id?: string
     name: string
     created_at: string
     updated_at: string
diff --git a/web/oss/src/services/evaluations/api/index.ts b/web/oss/src/services/evaluations/api/index.ts
index 96ee3a792..43bfdb3ca 100644
--- a/web/oss/src/services/evaluations/api/index.ts
+++ b/web/oss/src/services/evaluations/api/index.ts
@@ -1,19 +1,13 @@
-import uniqBy from "lodash/uniqBy"
-import {v4 as uuidv4} from "uuid"
-
 import axios from "@/oss/lib/api/assets/axiosConfig"
 import {calcEvalDuration} from "@/oss/lib/evaluations/legacy"
 import {assertValidId, isValidId} from "@/oss/lib/helpers/serviceValidations"
 import {
-    ComparisonResultRow,
     EvaluationStatus,
     KeyValuePair,
     LLMRunRateLimit,
-    Testset,
     _Evaluation,
     _EvaluationScenario,
 } from "@/oss/lib/Types"
-import {fetchTestset} from "@/oss/services/testsets/api"
 import {getProjectValues} from "@/oss/state/project"
 
 // Re-export evaluator config functions from the canonical source
@@ -169,83 +163,6 @@ export const updateScenarioStatus = async (
     })
 }
 
-// Comparison
-export const fetchAllComparisonResults = async (evaluationIds: string[]) => {
-    // Defensive check: Only accept valid UUIDs
-    const validIds = evaluationIds.filter((id) => isValidId(id))
-    if (validIds.length === 0) {
-        throw new Error("No valid evaluation IDs provided")
-    }
-    const scenarioGroups = await Promise.all(validIds.map(fetchAllEvaluationScenarios))
-    const testset: Testset = await fetchTestset(scenarioGroups[0][0].evaluation?.testset?.id)
-
-    const inputsNameSet = new Set<string>()
-    scenarioGroups.forEach((group) => {
-        group.forEach((scenario) => {
-            scenario.inputs.forEach((input) => inputsNameSet.add(input.name))
-        })
-    })
-
-    const rows: ComparisonResultRow[] = []
-    const inputNames = Array.from(inputsNameSet)
-    const inputValuesSet = new Set<string>()
-    const variants = scenarioGroups.map((group) => group[0].evaluation.variants[0])
-    const correctAnswers = uniqBy(
-        scenarioGroups.map((group) => group[0].correct_answers).flat(),
-        "key",
-    )
-
-    for (const data of testset.csvdata) {
-        const inputValues = inputNames
-            .filter((name) => data[name] !== undefined)
-            .map((name) => ({name, value: data[name]}))
-        const inputValuesStr = inputValues.map((ip) => ip.value).join("")
-        if (inputValuesSet.has(inputValuesStr)) continue
-        else inputValuesSet.add(inputValuesStr)
-
-        rows.push({
-            id: inputValuesStr,
-            rowId: uuidv4(),
-            inputs: inputNames
-                .map((name) => ({name, value: data[name]}))
-                .filter((ip) => ip.value !== undefined),
-            ...correctAnswers.reduce((acc, curr) => {
-                return {...acc, [`correctAnswer_${curr?.key}`]: data[curr?.key!]}
-            }, {}),
-            variants: variants.map((variant, ix) => {
-                const group = scenarioGroups[ix]
-                const scenario = group.find((scenario) =>
-                    scenario.inputs.every((input) =>
-                        inputValues.some(
-                            (ip) => ip.name === input.name && ip.value === input.value,
-                        ),
-                    ),
-                )
-                return {
-                    variantId: variant.variantId,
-                    variantName: variant.variantName,
-                    output: scenario?.outputs[0] || {
-                        result: {type: "string", value: "", error: null},
-                    },
-                    evaluationId: scenario?.evaluation.id || "",
-                    evaluatorConfigs: (scenario?.evaluators_configs || []).map((config) => ({
-                        evaluatorConfig: config,
-                        result: scenario?.results.find(
-                            (result) => result.evaluator_config === config.id,
-                        )?.result || {type: "string", value: "", error: null}, // Adjust this line
-                    })),
-                }
-            }),
-        })
-    }
-
-    return {
-        rows,
-        testset,
-        evaluations: scenarioGroups.map((group) => group[0].evaluation),
-    }
-}
-
 // Evaluation IDs by resource
 export const fetchEvaluatonIdsByResource = async ({
     resourceIds,
diff --git a/web/oss/src/services/testsets/api/index.ts b/web/oss/src/services/testsets/api/index.ts
index 2195afe93..2ba65d348 100644
--- a/web/oss/src/services/testsets/api/index.ts
+++ b/web/oss/src/services/testsets/api/index.ts
@@ -1,7 +1,6 @@
 import axios from "@/oss/lib/api/assets/axiosConfig"
 import {getAgentaApiUrl} from "@/oss/lib/helpers/api"
 import {validateUUID} from "@/oss/lib/helpers/validators"
-import {Testset, PreviewTestset} from "@/oss/lib/Types"
 import {getProjectValues} from "@/oss/state/project"
 
 import {PreviewTestsetsQueryPayload} from "./types"
@@ -73,19 +72,6 @@ export async function createNewTestset(
     return response
 }
 
-export async function updateTestset(testsetId: string, testsetName: string, testsetData: any) {
-    const {projectId} = getProjectValues()
-
-    const response = await axios.put(
-        `${getAgentaApiUrl()}/testsets/${testsetId}?project_id=${projectId}`,
-        {
-            name: testsetName,
-            csvdata: testsetData,
-        },
-    )
-    return response
-}
-
 /**
  * Fetch a simple testset by ID using the preview API
  * Returns testset with its latest revision data
@@ -201,51 +187,6 @@ export async function cloneTestset(sourceTestsetId: string, newName: string) {
     return response
 }
 
-export async function fetchTestset<T extends boolean = false>(
-    testsetId: string,
-    preview?: T,
-): Promise<T extends true ? PreviewTestset : Testset> {
-    if (!testsetId) {
-        return null as any
-    }
-    const {projectId} = getProjectValues()
-
-    if (preview) {
-        // Use the query endpoint for preview
-        const response = await axios.post(
-            `${getAgentaApiUrl()}/preview/testsets/query?project_id=${projectId}`,
-            {
-                testset_refs: [{id: testsetId}],
-                windowing: {limit: 1},
-            },
-        )
-        const testsets = response?.data?.testsets ?? []
-        return testsets[0] as T extends true ? PreviewTestset : Testset
-    }
-
-    const response = await axios.get(
-        `${getAgentaApiUrl()}/testsets/${testsetId}?project_id=${projectId}`,
-    )
-    return response?.data as T extends true ? PreviewTestset : Testset
-}
-
-export const uploadTestsets = async (formData: FormData) => {
-    const {projectId} = getProjectValues()
-
-    const response = await axios.post(
-        `${getAgentaApiUrl()}/testsets/upload?project_id=${projectId}`,
-        formData,
-        {
-            headers: {
-                "Content-Type": "multipart/form-data",
-            },
-            //@ts-ignore
-            _ignoreError: true,
-        },
-    )
-    return response
-}
-
 /**
  * Upload a testset file using the preview API (multipart file upload)
  * Sends the file to the backend for server-side parsing
@@ -311,19 +252,6 @@ export const uploadTestsetRevisionPreview = async (
     return response
 }
 
-export const importTestsetsViaEndpoint = async (formData: FormData) => {
-    const {projectId} = getProjectValues()
-
-    const response = await axios.post(
-        `${getAgentaApiUrl()}/testsets/endpoint?project_id=${projectId}`,
-        formData,
-        {
-            headers: {"Content-Type": "multipart/form-data"},
-        },
-    )
-    return response
-}
-
 export const deleteTestsets = async (ids: string[]) => {
     const {projectId} = getProjectValues()
 
diff --git a/web/oss/tests/5-testsset/index.ts b/web/oss/tests/5-testsset/index.ts
index 9826e99b4..284b6554f 100644
--- a/web/oss/tests/5-testsset/index.ts
+++ b/web/oss/tests/5-testsset/index.ts
@@ -1,6 +1,5 @@
 import {test} from "@agenta/web-tests/tests/fixtures/base.fixture"
 
-import type {Testset} from "@/oss/lib/Types"
 import {expect} from "@agenta/web-tests/utils"
 import {
     createTagString,
@@ -9,6 +8,14 @@ import {
     TestScope,
 } from "@agenta/web-tests/playwright/config/testTags"
 
+interface SimpleTestset {
+    id: string
+    name: string
+    data?: {
+        testcases: Array<{id: string; data: Record<string, unknown>}>
+    }
+}
+
 const testsetTests = () => {
     test(
         "should view the default testset",
@@ -30,7 +37,8 @@ const testsetTests = () => {
             await uiHelpers.expectText("Testsets", {role: "heading"})
 
             // 3. Verify testset is visible in table
-            const testsetId = testsets[0]._id
+            // Preview endpoint returns 'id' instead of '_id'
+            const testsetId = testsets[0].id || testsets[0]._id
             const testsetName = testsets[0].name
 
             if (!testsetId) {
@@ -45,9 +53,9 @@ const testsetTests = () => {
             // 4. Click on testset row
             await uiHelpers.clickTableRow(testsetName)
 
-            // 5. Fetch testset from API
-            const testsetResponse = await apiHelpers.waitForApiResponse<Testset>({
-                route: `/api/testsets/${testsetId}`,
+            // 5. Fetch testset from API using preview endpoint
+            const testsetResponse = await apiHelpers.waitForApiResponse<{testset: SimpleTestset}>({
+                route: `/api/preview/simple/testsets/${testsetId}`,
                 method: "GET",
             })
 
@@ -55,9 +63,11 @@ const testsetTests = () => {
             await uiHelpers.waitForPath(`/testsets/${testsetId}`)
             await uiHelpers.expectText("Create a new Testset", {role: "heading"})
 
-            const testset = await testsetResponse
+            const response = await testsetResponse
+            const testset = response.testset
             expect(testset.name).toBe(testsetName)
-            expect(testset.csvdata.length).toBeGreaterThan(0)
+            // Preview endpoint returns data.testcases instead of csvdata
+            expect(testset.data?.testcases?.length).toBeGreaterThan(0)
         },
     )
 }
diff --git a/web/tests/tests/fixtures/base.fixture/apiHelpers/index.ts b/web/tests/tests/fixtures/base.fixture/apiHelpers/index.ts
index 1819eac7c..99fec27e7 100644
--- a/web/tests/tests/fixtures/base.fixture/apiHelpers/index.ts
+++ b/web/tests/tests/fixtures/base.fixture/apiHelpers/index.ts
@@ -83,12 +83,13 @@ export const getApp = async (page: Page, type: APP_TYPE = "completion") => {
 }
 
 export const getTestsets = async (page: Page) => {
-    // 2. Fetch testsets from API
-    const testsetsResponse = await waitForApiResponse<testset[]>(page, {
-        route: "/api/testsets",
-        method: "GET",
+    // 2. Fetch testsets from API using preview endpoint
+    const testsetsResponse = await waitForApiResponse<{testsets: testset[]}>(page, {
+        route: "/api/preview/testsets/query",
+        method: "POST",
     })
-    const testsets = await testsetsResponse
+    const response = await testsetsResponse
+    const testsets = response.testsets
     expect(testsets.length).toBeGreaterThan(0)
 
     return testsets

From 7e8cd95f631a15f780dc2b379851235d71085029 Mon Sep 17 00:00:00 2001
From: Kaosiso Ezealigo <ezealigokosiso@gmail.com>
Date: Fri, 16 Jan 2026 11:40:10 +0100
Subject: [PATCH 20/20] fix

---
 .../LoadTestsetModal/components/TestsetPreviewPanel.tsx  | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/components/TestsetPreviewPanel.tsx b/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/components/TestsetPreviewPanel.tsx
index b9e58d137..2c593a109 100644
--- a/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/components/TestsetPreviewPanel.tsx
+++ b/web/oss/src/components/Playground/Components/Modals/LoadTestsetModal/components/TestsetPreviewPanel.tsx
@@ -78,10 +78,15 @@ const TestcasesTablePreview = ({
         if (!showActions) return
         const newRow = table.addTestcase()
         const newRowKey = String(newRow.key ?? newRow.id ?? Date.now())
-        setSelectedRowKeys((prev) => (prev.includes(newRowKey) ? prev : [...prev, newRowKey]))
+        setSelectedRowKeys((prev) => {
+            if (selectionMode === "single") {
+                return [newRowKey]
+            }
+            return prev.includes(newRowKey) ? prev : [...prev, newRowKey]
+        })
         message.success("Row added. Fill in the cells and click Create & Load.")
         setEditingTestcaseId(newRowKey)
-    }, [setSelectedRowKeys, showActions, table])
+    }, [selectionMode, setSelectedRowKeys, showActions, table])
 
     const handleDeleteSelected = useCallback(() => {
         if (!showActions || !selectedRowKeys.length) return