From 0cabd261e5108dfecc9fe281da59735875f21184 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 14 Jan 2026 15:25:55 +0100 Subject: [PATCH 01/20] fix: prevent unnecessary form sync during JSON schema updates --- .../components/ConfigureEvaluator/index.tsx | 9 +++++++-- .../JSONSchema/JSONSchemaEditor.tsx | 7 +++++++ .../EvaluatorsModal/ConfigureEvaluator/index.tsx | 11 +++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx b/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx index f21209048..ca07709a5 100644 --- a/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx +++ b/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx @@ -21,6 +21,7 @@ import {useRouter} from "next/router" import {message} from "@/oss/components/AppMessageContext" import { initPlaygroundAtom, + playgroundEditValuesAtom, resetPlaygroundAtom, } from "@/oss/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms" import useURL from "@/oss/hooks/useURL" @@ -52,11 +53,15 @@ const ConfigureEvaluatorPage = ({evaluatorId}: {evaluatorId?: string | null}) => // Atom actions const initPlayground = useSetAtom(initPlaygroundAtom) const resetPlayground = useSetAtom(resetPlaygroundAtom) + const stagedConfig = useAtomValue(playgroundEditValuesAtom) const existingConfig = useMemo(() => { if (!evaluatorId) return null - return evaluatorConfigs.find((config) => config.id === evaluatorId) ?? null - }, [evaluatorConfigs, evaluatorId]) + return ( + evaluatorConfigs.find((config) => config.id === evaluatorId) ?? + (stagedConfig?.id === evaluatorId ? stagedConfig : null) + ) + }, [evaluatorConfigs, evaluatorId, stagedConfig]) const evaluatorKey = existingConfig?.evaluator_key ?? evaluatorId ?? null diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx index 6aff01d8c..3f7984b1e 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx @@ -78,6 +78,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d }) const lastSyncedValueRef = useRef(undefined) + const skipSyncRef = useRef(false) const namePath = useMemo(() => (Array.isArray(name) ? name : [name]), [name]) @@ -133,6 +134,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d if (!defaultValue) { setSupportsBasicMode(true) setRawSchema("") + skipSyncRef.current = true return } @@ -145,6 +147,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d setSupportsBasicMode(isSchemaCompatibleWithBasicMode(defaultValue)) setRawSchema(defaultValue) + skipSyncRef.current = true }, [defaultValue, applyParsedConfig]) useEffect(() => { @@ -155,6 +158,10 @@ export const JSONSchemaEditor: React.FC = ({form, name, d // Update form when basic mode changes useEffect(() => { + if (skipSyncRef.current) { + skipSyncRef.current = false + return + } if (mode === "basic" && supportsBasicMode) { const config: SchemaConfig = { responseFormat, diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx index 0cae1a371..331afe085 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx @@ -5,10 +5,12 @@ import {ArrowLeft, Info, SidebarSimple} from "@phosphor-icons/react" import {Button, Form, Input, Space, Tag, Tooltip, Typography} from "antd" import {useAtomValue, useSetAtom} from "jotai" import dynamic from "next/dynamic" +import {useRouter} from "next/router" import {createUseStyles} from "react-jss" import {message} from "@/oss/components/AppMessageContext" import {useAppId} from "@/oss/hooks/useAppId" +import useURL from "@/oss/hooks/useURL" import {EvaluationSettingsTemplate, JSSTheme, SettingsPreset} from "@/oss/lib/Types" import { CreateEvaluationConfigData, @@ -127,6 +129,8 @@ const ConfigureEvaluator = ({ const routeAppId = useAppId() const apps = useAppList() const appId = routeAppId ?? apps?.[0]?.app_id + const router = useRouter() + const {projectURL} = useURL() const classes = useStyles() // ================================================================ @@ -352,6 +356,13 @@ const ConfigureEvaluator = ({ if (createdConfig) { // Use commitPlayground to update state and switch to edit mode commitPlayground(createdConfig) + if (uiVariant === "page" && createdConfig.id) { + await router.replace( + `${projectURL}/evaluators/configure/${encodeURIComponent( + createdConfig.id, + )}`, + ) + } } } From 3fda588bf9c16167ec5ed6ba4530f0ecdca5c637 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Thu, 15 Jan 2026 10:12:28 +0100 Subject: [PATCH 02/20] fix: improve JSON schema parsing and synchronization in JSONSchemaEditor --- .../JSONSchema/JSONSchemaEditor.tsx | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx index 3f7984b1e..9e50da98f 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx @@ -78,10 +78,21 @@ export const JSONSchemaEditor: React.FC = ({form, name, d }) const lastSyncedValueRef = useRef(undefined) - const skipSyncRef = useRef(false) const namePath = useMemo(() => (Array.isArray(name) ? name : [name]), [name]) + const parseSchemaObject = useCallback((value: string) => { + try { + const parsed = JSON.parse(value) + if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) { + return null + } + return parsed as Record + } catch { + return null + } + }, []) + const applyParsedConfig = useCallback((parsed: SchemaConfig) => { setResponseFormat(parsed.responseFormat) setIncludeReasoning(parsed.includeReasoning) @@ -99,14 +110,14 @@ export const JSONSchemaEditor: React.FC = ({form, name, d }, []) const syncFormValue = useCallback( - (value: string) => { - const current = form.getFieldValue(namePath) - if (current === value && lastSyncedValueRef.current === value) return + (value: string, parsedValue?: Record | null) => { + if (lastSyncedValueRef.current === value) return - form.setFieldValue(namePath, value) + const nextValue = parsedValue ?? parseSchemaObject(value) ?? value + form.setFieldValue(namePath, nextValue) lastSyncedValueRef.current = value }, - [form, namePath], + [form, namePath, parseSchemaObject], ) const getDefaultConfig = useCallback((): SchemaConfig => { @@ -121,9 +132,10 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const applyConfigAndSync = useCallback( (config: SchemaConfig) => { applyParsedConfig(config) - const schemaString = JSON.stringify(generateJSONSchema(config), null, 2) + const schema = generateJSONSchema(config) + const schemaString = JSON.stringify(schema, null, 2) setRawSchema(schemaString) - syncFormValue(schemaString) + syncFormValue(schemaString, schema) setSupportsBasicMode(true) }, [applyParsedConfig, syncFormValue], @@ -134,7 +146,6 @@ export const JSONSchemaEditor: React.FC = ({form, name, d if (!defaultValue) { setSupportsBasicMode(true) setRawSchema("") - skipSyncRef.current = true return } @@ -147,7 +158,6 @@ export const JSONSchemaEditor: React.FC = ({form, name, d setSupportsBasicMode(isSchemaCompatibleWithBasicMode(defaultValue)) setRawSchema(defaultValue) - skipSyncRef.current = true }, [defaultValue, applyParsedConfig]) useEffect(() => { @@ -158,10 +168,6 @@ export const JSONSchemaEditor: React.FC = ({form, name, d // Update form when basic mode changes useEffect(() => { - if (skipSyncRef.current) { - skipSyncRef.current = false - return - } if (mode === "basic" && supportsBasicMode) { const config: SchemaConfig = { responseFormat, @@ -172,7 +178,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const schema = generateJSONSchema(config) const schemaString = JSON.stringify(schema, null, 2) - syncFormValue(schemaString) + syncFormValue(schemaString, schema) } }, [ mode, @@ -200,7 +206,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const schema = generateJSONSchema(config) const schemaString = JSON.stringify(schema, null, 2) setRawSchema(schemaString) - syncFormValue(schemaString) + syncFormValue(schemaString, schema) setSupportsBasicMode(true) setMode("advanced") return @@ -273,11 +279,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d value ? isSchemaCompatibleWithBasicMode(value) : false, ) - if (Array.isArray(name)) { - form.setFieldValue(name, value) - } else { - form.setFieldValue([name], value) - } + syncFormValue(value) } }} editorProps={{ From 66f839ad802ada5f7b2a0a6272bf8ace6c9e849b Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Thu, 15 Jan 2026 11:47:12 +0100 Subject: [PATCH 03/20] added console log --- .../autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx index 331afe085..beef41885 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx @@ -154,6 +154,7 @@ const ConfigureEvaluator = ({ null, ) const [form] = Form.useForm() + console.log("ConfigureEvaluator: ", {form: form.getFieldsValue()}) const [submitLoading, setSubmitLoading] = useState(false) // Store form ref in atom so DebugSection can access it From ee481b7f69b15f325f71d1166aa8084ce47800c5 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Thu, 15 Jan 2026 12:18:59 +0100 Subject: [PATCH 04/20] fix --- web/oss/src/services/evaluators/index.ts | 31 ++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/web/oss/src/services/evaluators/index.ts b/web/oss/src/services/evaluators/index.ts index 2a9bb15de..30e0f5a00 100644 --- a/web/oss/src/services/evaluators/index.ts +++ b/web/oss/src/services/evaluators/index.ts @@ -103,6 +103,22 @@ export const fetchAllEvaluators = async (includeArchived = false) => { } // Evaluator Configs +const normalizeSettingsValues = (settingsValues?: Record | null) => { + if (!settingsValues) return settingsValues + const jsonSchema = settingsValues.json_schema + if (typeof jsonSchema !== "string") return settingsValues + + try { + const parsed = JSON.parse(jsonSchema) + if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) { + return settingsValues + } + return {...settingsValues, json_schema: parsed} + } catch { + return settingsValues + } +} + export const fetchAllEvaluatorConfigs = async ( appId?: string | null, projectIdOverride?: string | null, @@ -123,6 +139,7 @@ export const fetchAllEvaluatorConfigs = async ( }) const evaluatorConfigs = (response.data || []).map((item: EvaluatorConfig) => ({ ...item, + settings_values: normalizeSettingsValues(item.settings_values), icon_url: evaluatorIconsMap[item.evaluator_key as keyof typeof evaluatorIconsMap], color: tagColors[stringToNumberInRange(item.evaluator_key, 0, tagColors.length - 1)], })) as EvaluatorConfig[] @@ -137,8 +154,13 @@ export const createEvaluatorConfig = async ( const {projectId} = getProjectValues() void _appId - return axios.post(`/evaluators/configs?project_id=${projectId}`, { + const normalizedConfig = { ...config, + settings_values: normalizeSettingsValues(config.settings_values), + } + + return axios.post(`/evaluators/configs?project_id=${projectId}`, { + ...normalizedConfig, }) } @@ -148,7 +170,12 @@ export const updateEvaluatorConfig = async ( ) => { const {projectId} = getProjectValues() - return axios.put(`/evaluators/configs/${configId}?project_id=${projectId}`, config) + const normalizedConfig = { + ...config, + settings_values: normalizeSettingsValues(config.settings_values), + } + + return axios.put(`/evaluators/configs/${configId}?project_id=${projectId}`, normalizedConfig) } export const deleteEvaluatorConfig = async (configId: string) => { From e916b9e2ba7b4ece9a2cc0a9a9edfaff0f76b6ba Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Thu, 15 Jan 2026 12:40:07 +0100 Subject: [PATCH 05/20] revert --- .../JSONSchema/JSONSchemaEditor.tsx | 37 +++++++------------ web/oss/src/services/evaluators/index.ts | 31 +--------------- 2 files changed, 16 insertions(+), 52 deletions(-) diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx index 9e50da98f..6aff01d8c 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx @@ -81,18 +81,6 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const namePath = useMemo(() => (Array.isArray(name) ? name : [name]), [name]) - const parseSchemaObject = useCallback((value: string) => { - try { - const parsed = JSON.parse(value) - if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) { - return null - } - return parsed as Record - } catch { - return null - } - }, []) - const applyParsedConfig = useCallback((parsed: SchemaConfig) => { setResponseFormat(parsed.responseFormat) setIncludeReasoning(parsed.includeReasoning) @@ -110,14 +98,14 @@ export const JSONSchemaEditor: React.FC = ({form, name, d }, []) const syncFormValue = useCallback( - (value: string, parsedValue?: Record | null) => { - if (lastSyncedValueRef.current === value) return + (value: string) => { + const current = form.getFieldValue(namePath) + if (current === value && lastSyncedValueRef.current === value) return - const nextValue = parsedValue ?? parseSchemaObject(value) ?? value - form.setFieldValue(namePath, nextValue) + form.setFieldValue(namePath, value) lastSyncedValueRef.current = value }, - [form, namePath, parseSchemaObject], + [form, namePath], ) const getDefaultConfig = useCallback((): SchemaConfig => { @@ -132,10 +120,9 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const applyConfigAndSync = useCallback( (config: SchemaConfig) => { applyParsedConfig(config) - const schema = generateJSONSchema(config) - const schemaString = JSON.stringify(schema, null, 2) + const schemaString = JSON.stringify(generateJSONSchema(config), null, 2) setRawSchema(schemaString) - syncFormValue(schemaString, schema) + syncFormValue(schemaString) setSupportsBasicMode(true) }, [applyParsedConfig, syncFormValue], @@ -178,7 +165,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const schema = generateJSONSchema(config) const schemaString = JSON.stringify(schema, null, 2) - syncFormValue(schemaString, schema) + syncFormValue(schemaString) } }, [ mode, @@ -206,7 +193,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const schema = generateJSONSchema(config) const schemaString = JSON.stringify(schema, null, 2) setRawSchema(schemaString) - syncFormValue(schemaString, schema) + syncFormValue(schemaString) setSupportsBasicMode(true) setMode("advanced") return @@ -279,7 +266,11 @@ export const JSONSchemaEditor: React.FC = ({form, name, d value ? isSchemaCompatibleWithBasicMode(value) : false, ) - syncFormValue(value) + if (Array.isArray(name)) { + form.setFieldValue(name, value) + } else { + form.setFieldValue([name], value) + } } }} editorProps={{ diff --git a/web/oss/src/services/evaluators/index.ts b/web/oss/src/services/evaluators/index.ts index 30e0f5a00..2a9bb15de 100644 --- a/web/oss/src/services/evaluators/index.ts +++ b/web/oss/src/services/evaluators/index.ts @@ -103,22 +103,6 @@ export const fetchAllEvaluators = async (includeArchived = false) => { } // Evaluator Configs -const normalizeSettingsValues = (settingsValues?: Record | null) => { - if (!settingsValues) return settingsValues - const jsonSchema = settingsValues.json_schema - if (typeof jsonSchema !== "string") return settingsValues - - try { - const parsed = JSON.parse(jsonSchema) - if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) { - return settingsValues - } - return {...settingsValues, json_schema: parsed} - } catch { - return settingsValues - } -} - export const fetchAllEvaluatorConfigs = async ( appId?: string | null, projectIdOverride?: string | null, @@ -139,7 +123,6 @@ export const fetchAllEvaluatorConfigs = async ( }) const evaluatorConfigs = (response.data || []).map((item: EvaluatorConfig) => ({ ...item, - settings_values: normalizeSettingsValues(item.settings_values), icon_url: evaluatorIconsMap[item.evaluator_key as keyof typeof evaluatorIconsMap], color: tagColors[stringToNumberInRange(item.evaluator_key, 0, tagColors.length - 1)], })) as EvaluatorConfig[] @@ -154,13 +137,8 @@ export const createEvaluatorConfig = async ( const {projectId} = getProjectValues() void _appId - const normalizedConfig = { - ...config, - settings_values: normalizeSettingsValues(config.settings_values), - } - return axios.post(`/evaluators/configs?project_id=${projectId}`, { - ...normalizedConfig, + ...config, }) } @@ -170,12 +148,7 @@ export const updateEvaluatorConfig = async ( ) => { const {projectId} = getProjectValues() - const normalizedConfig = { - ...config, - settings_values: normalizeSettingsValues(config.settings_values), - } - - return axios.put(`/evaluators/configs/${configId}?project_id=${projectId}`, normalizedConfig) + return axios.put(`/evaluators/configs/${configId}?project_id=${projectId}`, config) } export const deleteEvaluatorConfig = async (configId: string) => { From 8dc83882765203faecd2c087dced532e2813d922 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Thu, 15 Jan 2026 13:26:56 +0100 Subject: [PATCH 06/20] removed console log --- .../autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx index beef41885..331afe085 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx @@ -154,7 +154,6 @@ const ConfigureEvaluator = ({ null, ) const [form] = Form.useForm() - console.log("ConfigureEvaluator: ", {form: form.getFieldsValue()}) const [submitLoading, setSubmitLoading] = useState(false) // Store form ref in atom so DebugSection can access it From 91a88ba0c7d1343f23af0b0a83fde056a7531122 Mon Sep 17 00:00:00 2001 From: bekossy <99529776+bekossy@users.noreply.github.com> Date: Thu, 15 Jan 2026 15:02:00 +0000 Subject: [PATCH 07/20] v0.77.4 --- api/pyproject.toml | 2 +- sdk/pyproject.toml | 2 +- web/ee/package.json | 2 +- web/oss/package.json | 4 ++-- web/package.json | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/api/pyproject.toml b/api/pyproject.toml index dbf772b78..8882202a4 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "api" -version = "0.77.3" +version = "0.77.4" description = "Agenta API" authors = [ { name = "Mahmoud Mabrouk", email = "mahmoud@agenta.ai" }, diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml index a26b90916..079717a0b 100644 --- a/sdk/pyproject.toml +++ b/sdk/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "agenta" -version = "0.77.3" +version = "0.77.4" description = "The SDK for agenta is an open-source LLMOps platform." readme = "README.md" authors = [ diff --git a/web/ee/package.json b/web/ee/package.json index 78c5f6e45..d21522b18 100644 --- a/web/ee/package.json +++ b/web/ee/package.json @@ -1,6 +1,6 @@ { "name": "@agenta/ee", - "version": "0.77.3", + "version": "0.77.4", "private": true, "engines": { "node": ">=18" diff --git a/web/oss/package.json b/web/oss/package.json index d6dc78e83..30153a341 100644 --- a/web/oss/package.json +++ b/web/oss/package.json @@ -1,6 +1,6 @@ { "name": "@agenta/oss", - "version": "0.77.3", + "version": "0.77.4", "private": true, "engines": { "node": ">=18" @@ -20,6 +20,7 @@ }, "dependencies": { "@agenta/web-tests": "workspace:../tests", + "@agentaai/nextstepjs": "^2.1.3-agenta.1", "@ant-design/colors": "^7.2.1", "@ant-design/cssinjs": "^2.0.1", "@ant-design/icons": "^6.1.0", @@ -89,7 +90,6 @@ "lodash": "^4.17.21", "lucide-react": "^0.475.0", "motion": "^12.0.0", - "@agentaai/nextstepjs": "^2.1.3-agenta.1", "next": "15.5.9", "papaparse": "^5.5.3", "postcss": "^8.5.6", diff --git a/web/package.json b/web/package.json index 5560faf56..117e5dcb5 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "agenta-web", - "version": "0.77.3", + "version": "0.77.4", "workspaces": [ "ee", "oss", From 17b70f284d3b7dc1f07c8e7d132be0e1c008a0e3 Mon Sep 17 00:00:00 2001 From: Mahmoud Mabrouk Date: Thu, 15 Jan 2026 20:30:05 +0100 Subject: [PATCH 08/20] docs: update testset docs to new SDK Switch examples from legacy /testsets APIs to ag.testsets + aevaluate, and clarify revision vs testset IDs. --- .../evaluations/sdk/testset-management.ipynb | 10 +- .../_evaluation-from-sdk/01-quick-start.mdx | 66 ++++---- .../02-setup-configuration.mdx | 6 +- .../03-managing-test-sets.mdx | 62 +++++-- .../04-configuring-evaluators.mdx | 53 +++--- .../05-running-evaluations.mdx | 64 +++---- .../06-viewing-results.mdx | 32 ++-- .../02-managing-testsets.mdx | 42 +++-- .../02-create-programatically.mdx | 79 +++++---- .../docs/tutorials/sdk/_evaluate-with-SDK.mdx | 158 +++++++++--------- examples/jupyter/evaluation/quick-start.ipynb | 4 +- .../evaluation/testset-management.ipynb | 4 +- 12 files changed, 303 insertions(+), 277 deletions(-) diff --git a/api/ee/tests/manual/evaluations/sdk/testset-management.ipynb b/api/ee/tests/manual/evaluations/sdk/testset-management.ipynb index 0eed3141b..49124966e 100644 --- a/api/ee/tests/manual/evaluations/sdk/testset-management.ipynb +++ b/api/ee/tests/manual/evaluations/sdk/testset-management.ipynb @@ -120,8 +120,8 @@ "print(f\" Slug: {testset.slug}\")\n", "print(f\" Description: {testset.description}\")\n", "\n", - "# Save the ID for later use\n", - "testset_id = testset.id" + "# Save the parent testset ID for later use\n", + "testset_id = testset.testset_id or testset.id" ] }, { @@ -137,8 +137,10 @@ " Description: A testset of countries and their capitals for geography evaluation\n", "```\n", "\n", - "The `create_testset` function returns a `SimpleTestset` object with the following fields:\n", - "- `id`: Unique UUID for the testset\n", + "The `acreate` function returns a `TestsetRevision` object with the following fields:\n", + "- `id`: The revision UUID\n", + "- `testset_id`: The parent testset UUID\n", + "- `version`: The revision version\n", "- `name`: The name you provided\n", "- `slug`: A shortened identifier\n", "- `description`: Your description\n", diff --git a/docs/docs/evaluation/_evaluation-from-sdk/01-quick-start.mdx b/docs/docs/evaluation/_evaluation-from-sdk/01-quick-start.mdx index c7d594aef..ccc62414b 100644 --- a/docs/docs/evaluation/_evaluation-from-sdk/01-quick-start.mdx +++ b/docs/docs/evaluation/_evaluation-from-sdk/01-quick-start.mdx @@ -29,41 +29,51 @@ pip install -U agenta ## Quick example ```python +import asyncio import agenta as ag -from agenta.client.api import AgentaApi +from agenta.sdk.evaluations import aevaluate -# Initialize the SDK -client = AgentaApi( - base_url="https://cloud.agenta.ai/api", - api_key="your-api-key" -) +ag.init(host="https://cloud.agenta.ai", api_key="your-api-key") -# Create a test set -test_set = client.testsets.create_testset( - request={ - "name": "my_test_set", - "csvdata": [ - {"input": "Hello", "expected": "Hi there!"}, - {"input": "How are you?", "expected": "I'm doing well!"} - ] - } +@ag.application( + slug="capital_finder", + name="Capital Finder", ) +async def capital_finder(country: str): + capitals = { + "Germany": "Berlin", + "France": "Paris", + } + return capitals.get(country, "Unknown") -# Run evaluation -evaluation = client.evaluations.create_evaluation( - app_id="your-app-id", - variant_ids=["variant-id"], - testset_id=test_set.id, - evaluators_configs=["evaluator-config-id"] +@ag.evaluator( + slug="exact_match", + name="Exact Match", ) +async def exact_match(expected: str, outputs: str): + return { + "score": 1.0 if outputs == expected else 0.0, + "success": outputs == expected, + } -# Check status -status = client.evaluations.fetch_evaluation_status(evaluation.id) -print(f"Evaluation status: {status}") - -# Get results when complete -results = client.evaluations.fetch_evaluation_results(evaluation.id) -print(results) +async def run(): + testset = await ag.testsets.acreate( + name="my_test_set", + data=[ + {"country": "Germany", "expected": "Berlin"}, + {"country": "France", "expected": "Paris"}, + ], + ) + + result = await aevaluate( + testsets=[testset.id], + applications=[capital_finder], + evaluators=[exact_match], + ) + return result + +result = asyncio.run(run()) +print(result) ``` ## Next steps diff --git a/docs/docs/evaluation/_evaluation-from-sdk/02-setup-configuration.mdx b/docs/docs/evaluation/_evaluation-from-sdk/02-setup-configuration.mdx index 28b858b81..1bf90a330 100644 --- a/docs/docs/evaluation/_evaluation-from-sdk/02-setup-configuration.mdx +++ b/docs/docs/evaluation/_evaluation-from-sdk/02-setup-configuration.mdx @@ -18,7 +18,7 @@ pip install -U agenta ## Initialize the SDK client ```python -from agenta.client.api import AgentaApi +import agenta as ag app_id = "667d8cfad1812781f7e375d9" @@ -29,8 +29,8 @@ api_key = "EUqJGOUu.xxxx" # Host host = "https://cloud.agenta.ai" -# Initialize the client -client = AgentaApi(base_url=host + "/api", api_key=api_key) +# Initialize the SDK +ag.init(host=host, api_key=api_key) ``` ## Configuration options diff --git a/docs/docs/evaluation/_evaluation-from-sdk/03-managing-test-sets.mdx b/docs/docs/evaluation/_evaluation-from-sdk/03-managing-test-sets.mdx index d86671094..f6c6aa515 100644 --- a/docs/docs/evaluation/_evaluation-from-sdk/03-managing-test-sets.mdx +++ b/docs/docs/evaluation/_evaluation-from-sdk/03-managing-test-sets.mdx @@ -5,35 +5,61 @@ description: "Learn how to create, load, and manage test sets using the SDK" sidebar_position: 3 --- - - -## Creating test sets +## Creating, retrieving, and updating test sets ```python -from agenta.client.types.new_testset import NewTestset +import asyncio +import agenta as ag -csvdata = [ - {"country": "france", "capital": "Paris"}, - {"country": "Germany", "capital": "Berlin"} -] +# Initialize from environment variables if set (AGENTA_HOST, AGENTA_API_KEY) +ag.init() -response = client.testsets.create_testset( - request=NewTestset(name="test set", csvdata=csvdata) -) -test_set_id = response.id -``` +async def main(): + # Create a testset (returns a TestsetRevision) + created = await ag.testsets.acreate( + name="test set", + data=[ + {"country": "France", "capital": "Paris"}, + {"country": "Germany", "capital": "Berlin"}, + ], + ) -## Loading existing test sets + testset_id = created.testset_id or created.id + print(f"Testset ID: {testset_id}") + print(f"Revision ID: {created.id}") - + # Retrieve the latest revision for a testset + retrieved = await ag.testsets.aretrieve(testset_id=testset_id) + if retrieved: + print(f"Retrieved testset revision: {retrieved.id}") + print(f"Version: {retrieved.version}") -## Updating test sets + # Update the testset data + await ag.testsets.aedit( + testset_id=testset_id, + name="test set v2", + data=[ + {"country": "France", "capital": "Paris"}, + {"country": "Germany", "capital": "Berlin"}, + {"country": "Spain", "capital": "Madrid"}, + ], + ) - + # Fetch the latest revision after editing + updated = await ag.testsets.aretrieve(testset_id=testset_id) + if updated: + print(f"Latest revision ID: {updated.id}") + print(f"Version: {updated.version}") + +asyncio.run(main()) +``` ## Deleting test sets - +The SDK does not currently expose delete helpers. To archive or delete testsets directly, use the API endpoints: + +- [Archive simple testset](/reference/api/archive-simple-testset) +- [Unarchive simple testset](/reference/api/unarchive-simple-testset) ## Next steps diff --git a/docs/docs/evaluation/_evaluation-from-sdk/04-configuring-evaluators.mdx b/docs/docs/evaluation/_evaluation-from-sdk/04-configuring-evaluators.mdx index 0de67a068..5eb4955b2 100644 --- a/docs/docs/evaluation/_evaluation-from-sdk/04-configuring-evaluators.mdx +++ b/docs/docs/evaluation/_evaluation-from-sdk/04-configuring-evaluators.mdx @@ -5,46 +5,41 @@ description: "Learn how to configure built-in and custom evaluators using the SD sidebar_position: 4 --- - - ## Creating evaluators -### Custom code evaluator - -Let's create a custom code evaluator that returns 1.0 if the first letter of the app output is uppercase: +### Custom evaluator ```python -code_snippet = """ -from typing import Dict - -def evaluate( - app_params: Dict[str, str], - inputs: Dict[str, str], - output: str, # output of the llm app - datapoint: Dict[str, str] # contains the testset row -) -> float: - if output and output[0].isupper(): - return 1.0 - else: - return 0.0 -""" - -response = client.evaluators.create_new_evaluator_config( - app_id=app_id, - name="capital_letter_evaluator", - evaluator_key="auto_custom_code_run", - settings_values={"code": code_snippet} +import agenta as ag + +@ag.evaluator( + slug="capital_letter_evaluator", + name="Capital Letter Evaluator", ) -letter_match_eval_id = response.id +async def capital_letter_evaluator(outputs: str): + is_capitalized = bool(outputs) and outputs[0].isupper() + return { + "score": 1.0 if is_capitalized else 0.0, + "success": is_capitalized, + } ``` -## Using built-in evaluators +### Built-in evaluators + +Agenta ships built-in evaluators you can configure directly: - +```python +from agenta.sdk.workflows import builtin + +exact_match = builtin.auto_exact_match( + name="Capital Exact Match", + correct_answer_key="capital", +) +``` ## Configuring evaluator settings - +Built-in evaluators accept parameters (like `correct_answer_key`) when you construct them, so you can tailor scoring to your testset schema. ## Next steps diff --git a/docs/docs/evaluation/_evaluation-from-sdk/05-running-evaluations.mdx b/docs/docs/evaluation/_evaluation-from-sdk/05-running-evaluations.mdx index aa9a523a0..036aeb41e 100644 --- a/docs/docs/evaluation/_evaluation-from-sdk/05-running-evaluations.mdx +++ b/docs/docs/evaluation/_evaluation-from-sdk/05-running-evaluations.mdx @@ -5,55 +5,35 @@ description: "Learn how to run evaluations programmatically using the SDK" sidebar_position: 5 --- - - ## Running an evaluation -First, let's grab the first variant in the app: - ```python -response = client.apps.list_app_variants(app_id=app_id) -print(response) -myvariant_id = response[0].variant_id -``` - -Then, let's start the evaluation jobs: - -```python -from agenta.client.types.llm_run_rate_limit import LlmRunRateLimit - -rate_limit_config = LlmRunRateLimit( - batch_size=10, # number of rows to call in parallel - max_retries=3, # max number of time to retry a failed llm call - retry_delay=2, # delay before retrying a failed llm call - delay_between_batches=5, # delay between batches -) - -response = client.evaluations.create_evaluation( - app_id=app_id, - variant_ids=[myvariant_id], - testset_id=test_set_id, - evaluators_configs=[letter_match_eval_id], - rate_limit=rate_limit_config -) -print(response) +import asyncio +import agenta as ag +from agenta.sdk.evaluations import aevaluate + +# Initialize from environment variables if set (AGENTA_HOST, AGENTA_API_KEY) +ag.init() + +# Assume `testset`, `capital_finder`, and `capital_letter_evaluator` are already defined +async def main(): + result = await aevaluate( + name="My Evaluation", + # You can pass a testset revision id (recommended) + testsets=[testset.id], + applications=[capital_finder], + evaluators=[capital_letter_evaluator], + ) + + print(f"Run ID: {result['run'].id}") + return result + +result = asyncio.run(main()) ``` ## Checking evaluation status -Now we can check for the status of the job: - -```python -client.evaluations.fetch_evaluation_status('667d98fbd1812781f7e3761a') -``` - -## Configuring rate limits - - - -## Handling errors - - +`aevaluate()` prints progress as it runs. You can also look up the run in the UI using the run ID printed above. ## Next steps diff --git a/docs/docs/evaluation/_evaluation-from-sdk/06-viewing-results.mdx b/docs/docs/evaluation/_evaluation-from-sdk/06-viewing-results.mdx index 7e8609e71..c17e0c819 100644 --- a/docs/docs/evaluation/_evaluation-from-sdk/06-viewing-results.mdx +++ b/docs/docs/evaluation/_evaluation-from-sdk/06-viewing-results.mdx @@ -5,40 +5,28 @@ description: "Learn how to retrieve and analyze evaluation results using the SDK sidebar_position: 6 --- - - ## Fetching overall results -As soon as the evaluation is done, we can fetch the overall results: +`aevaluate()` returns the run, scenarios, and metrics in a single object. You can inspect the metrics directly: ```python -response = client.evaluations.fetch_evaluation_results('667d98fbd1812781f7e3761a') - -results = [ - (evaluator["evaluator_config"]["name"], evaluator["result"]) - for evaluator in response["results"] -] -print(results) +metrics = result["metrics"] +print(metrics) ``` ## Fetching detailed results -Get detailed results for each test case: +Use the built-in display helper to render a detailed report: ```python -detailed_results = client.evaluations.fetch_evaluation_scenarios( - evaluations_ids='667d98fbd1812781f7e3761a' -) -print(detailed_results) -``` +import asyncio +from agenta.sdk.evaluations import display -## Analyzing results +async def main(): + await display(result) - - -## Exporting results - - +asyncio.run(main()) +``` ## Next steps diff --git a/docs/docs/evaluation/evaluation-from-sdk/02-managing-testsets.mdx b/docs/docs/evaluation/evaluation-from-sdk/02-managing-testsets.mdx index db8b6239f..cc011277c 100644 --- a/docs/docs/evaluation/evaluation-from-sdk/02-managing-testsets.mdx +++ b/docs/docs/evaluation/evaluation-from-sdk/02-managing-testsets.mdx @@ -15,6 +15,19 @@ This guide covers how to create, list, and retrieve testsets using the Agenta SD Open in Google Colaboratory +:::tip Async examples +Agenta's SDK uses async APIs. In Jupyter/Colab you can use top-level `await`. In a regular Python script, wrap async code like this: + +```python +import asyncio + +async def main(): + ... + +asyncio.run(main()) +``` +::: + ## Creating a Testset Use `ag.testsets.acreate()` to create a new testset with data: @@ -35,7 +48,9 @@ testset = await ag.testsets.acreate( name="Country Capitals", ) -print(f"Created testset with ID: {testset.id}") +testset_id = testset.testset_id or testset.id +print(f"Testset ID: {testset_id}") +print(f"Revision ID: {testset.id}") print(f"Name: {testset.name}") print(f"Slug: {testset.slug}") ``` @@ -48,9 +63,11 @@ print(f"Slug: {testset.slug}") - `name`: The name of your testset. **Returns:** A `TestsetRevision` object containing: -- `id`: The UUID of the created testset +- `id`: The UUID of the created testset revision +- `testset_id`: The parent testset UUID (stable across revisions) - `name`: The testset name -- `slug`: The testset slug +- `slug`: The revision slug +- `version`: The revision version string (e.g. "1") - `data`: The test data (with `testcases` structure) **Sample Output:** @@ -95,7 +112,9 @@ testset = await ag.testsets.aupsert( ], ) -print(f"Upserted testset with ID: {testset.id}") +testset_id = testset.testset_id or testset.id +print(f"Testset ID: {testset_id}") +print(f"Revision ID: {testset.id}") ``` @@ -127,7 +146,8 @@ testsets = await ag.testsets.alist() print(f"Found {len(testsets)} testsets:") for testset in testsets: - print(f" - {testset.name} (ID: {testset.id})") + testset_id = testset.testset_id or testset.id + print(f" - {testset.name} (testset_id: {testset_id})") ``` @@ -135,10 +155,11 @@ for testset in testsets: **Parameters:** None required. -**Returns:** A list of `TestsetRevision` objects, each containing: -- `id`: The testset UUID +**Returns:** A list of `TestsetRevision` objects. For each item: +- `id`: The latest revision UUID +- `testset_id`: The parent testset UUID - `name`: The testset name -- `slug`: The testset slug +- `slug`: The revision slug - Additional metadata fields **Sample Output:** @@ -210,7 +231,7 @@ else: ``` :::info -Currently using the legacy testset API. When retrieving a testset, the function returns a `TestsetRevision` object with version "1". In the future, this will support the new versioning system where each update creates a new revision. +Testsets are versioned. Each update via `ag.testsets.aedit()` or `ag.testsets.aupsert()` creates a new `TestsetRevision`, while the parent `testset_id` stays the same. ::: ## Retrieving a Testset by Name @@ -240,7 +261,8 @@ async def get_testset_by_name(name: str): testset = await get_testset_by_name("Country Capitals") if testset: - print(f"Found testset: {testset.name} with ID: {testset.id}") + testset_id = testset.testset_id or testset.id + print(f"Found testset: {testset.name} (testset_id: {testset_id}, revision_id: {testset.id})") else: print("Testset not found") ``` diff --git a/docs/docs/evaluation/managing-test-sets/02-create-programatically.mdx b/docs/docs/evaluation/managing-test-sets/02-create-programatically.mdx index 79c271af2..966cc213b 100644 --- a/docs/docs/evaluation/managing-test-sets/02-create-programatically.mdx +++ b/docs/docs/evaluation/managing-test-sets/02-create-programatically.mdx @@ -11,14 +11,14 @@ Creating test sets programmatically allows you to automate test set generation, ## Creating via API -You can upload a test set using our API. Find the [API endpoint reference here](/reference/api/upload-file). +You can create a versioned testset using the simple testset API. Find the [API endpoint reference here](/reference/api/create-simple-testset). Here's an example of such a call: **HTTP Request:** ``` -POST /testsets +POST /preview/simple/testsets/ ``` @@ -26,58 +26,65 @@ POST /testsets ```json { - "name": "testsetname", - "csvdata": [ - { "column1": "row1col1", "column2": "row1col2" }, - { "column1": "row2col1", "column2": "row2col2" } - ] + "testset": { + "slug": "countries-capitals", + "name": "countries_capitals", + "data": { + "testcases": [ + {"data": {"country": "France", "capital": "Paris"}}, + {"data": {"country": "Germany", "capital": "Berlin"}} + ] + } + } } ``` ### Example with curl ```bash -curl -X POST "https://cloud.agenta.ai/api/testsets" \ +curl -X POST "https://cloud.agenta.ai/api/preview/simple/testsets/" \ -H "Content-Type: application/json" \ -H "Authorization: ApiKey YOUR_API_KEY" \ -d '{ - "name": "my_test_set", - "csvdata": [ - {"input": "Hello", "expected": "Hi there!"}, - {"input": "How are you?", "expected": "I am doing well!"} - ] + "testset": { + "slug": "my-test-set", + "name": "my_test_set", + "data": { + "testcases": [ + {"data": {"input": "Hello", "expected": "Hi there!"}}, + {"data": {"input": "How are you?", "expected": "I am doing well!"}} + ] + } + } }' ``` ## Creating via SDK ```python -from agenta.client.api import AgentaApi -from agenta.client.types.new_testset import NewTestset - -# Initialize the client -client = AgentaApi( - base_url="https://cloud.agenta.ai/api", - api_key="your-api-key" -) - -# Create test set data -csvdata = [ - {"country": "France", "capital": "Paris"}, - {"country": "Germany", "capital": "Berlin"}, - {"country": "Spain", "capital": "Madrid"} -] - -# Create the test set -response = client.testsets.create_testset( - request=NewTestset( +import asyncio +import agenta as ag + +ag.init(host="https://cloud.agenta.ai", api_key="your-api-key") + +async def main(): + # Create test set data + csvdata = [ + {"country": "France", "capital": "Paris"}, + {"country": "Germany", "capital": "Berlin"}, + {"country": "Spain", "capital": "Madrid"}, + ] + + # Create the testset (returns a TestsetRevision) + testset = await ag.testsets.acreate( name="countries_capitals", - csvdata=csvdata + data=csvdata, ) -) -test_set_id = response.id -print(f"Created test set with ID: {test_set_id}") + testset_revision_id = testset.id + print(f"Created testset revision with ID: {testset_revision_id}") + +asyncio.run(main()) ``` ## Next steps diff --git a/docs/docs/tutorials/sdk/_evaluate-with-SDK.mdx b/docs/docs/tutorials/sdk/_evaluate-with-SDK.mdx index defa5c1b0..a62577708 100644 --- a/docs/docs/tutorials/sdk/_evaluate-with-SDK.mdx +++ b/docs/docs/tutorials/sdk/_evaluate-with-SDK.mdx @@ -40,12 +40,7 @@ This operation is managed through TaskIQ tasks. The interactions with the LLM ap # In this example we will use the default template single_prompt which has the prompt "Determine the capital of {country}" # You can find the application ID in the URL. For example, in the URL https://cloud.agenta.ai/apps/666dde95962bbaffdb0072b5/playground?variant=app.default, the application ID is `666dde95962bbaffdb0072b5`. -from agenta.client.client import AgentaApi -# Let's list the applications -client.apps.list_apps() -``` - -```python +import agenta as ag app_id = "667d8cfad1812781f7e375d9" @@ -55,101 +50,102 @@ api_key = "EUqJGOUu.xxxx" # Host. host = "https://cloud.agenta.ai" -# Initialize the client - -client = AgentaApi(base_url=host + "/api", api_key=api_key) +# Initialize the SDK +ag.init(host=host, api_key=api_key) ``` -## Create a test set +## Define the application ```python -from agenta.client.types.new_testset import NewTestset +@ag.application( + slug="capital_finder", + name="Capital Finder", +) +async def capital_finder(country: str): + capitals = { + "Germany": "Berlin", + "France": "Paris", + } + return capitals.get(country, "Unknown") +``` -csvdata = [ - {"country": "france", "capital": "Paris"}, - {"country": "Germany", "capital": "paris"} - ] +## Create a test set -response = client.testsets.create_testset(request=NewTestset(name="test set", csvdata=csvdata)) -test_set_id = response.id +```python +import asyncio +import agenta as ag -# let's now update it +# Assumes `ag.init(...)` has already been called. csvdata = [ - {"country": "france", "capital": "Paris"}, - {"country": "Germany", "capital": "Berlin"} - ] - -client.testsets.update_testset(testset_id=test_set_id, request=NewTestset(name="test set", csvdata=csvdata)) + {"country": "France", "capital": "Paris"}, + {"country": "Germany", "capital": "Paris"}, +] + +async def main(): + # Create a testset (returns a TestsetRevision) + created = await ag.testsets.acreate(name="test set", data=csvdata) + testset_id = created.testset_id or created.id + + # Update the testset data + await ag.testsets.aedit( + testset_id=testset_id, + name="test set", + data=[ + {"country": "France", "capital": "Paris"}, + {"country": "Germany", "capital": "Berlin"}, + ], + ) + + # Fetch the latest revision after editing + updated = await ag.testsets.aretrieve(testset_id=testset_id) + return updated + +updated = asyncio.run(main()) +print(f"Latest revision ID: {updated.id}") ``` # Create evaluators ```python -# Create an evaluator that performs an exact match comparison on the 'capital' column -# You can find the list of evaluator keys and evaluators and their configurations in https://github.com/Agenta-AI/agenta/blob/main/agenta-backend/agenta_backend/resources/evaluators/evaluators.py -response = client.evaluators.create_new_evaluator_config(app_id=app_id, name="capital_evaluator", evaluator_key="auto_exact_match", settings_values={"correct_answer_key": "capital"}) -exact_match_eval_id = response.id - -code_snippet = """ -from typing import Dict - -def evaluate( - app_params: Dict[str, str], - inputs: Dict[str, str], - output: str, # output of the llm app - datapoint: Dict[str, str] # contains the testset row -) -> float: - if output and output[0].isupper(): - return 1.0 - else: - return 0.0 -""" - -response = client.evaluators.create_new_evaluator_config(app_id=app_id, name="capital_letter_evaluator", evaluator_key="auto_custom_code_run", settings_values={"code": code_snippet}) -letter_match_eval_id = response.id -``` - -```python -# get list of all evaluators -client.evaluators.get_evaluator_configs(app_id=app_id) -``` - -# Run an evaluation - -```python -response = client.apps.list_app_variants(app_id=app_id) -print(response) -myvariant_id = response[0].variant_id +@ag.evaluator( + slug="capital_exact_match", + name="Capital Exact Match", +) +async def exact_match(capital: str, outputs: str): + return { + "score": 1.0 if outputs == capital else 0.0, + "success": outputs == capital, + } + +@ag.evaluator( + slug="capital_letter_match", + name="Capital Letter Match", +) +async def letter_match(outputs: str): + is_capitalized = bool(outputs) and outputs[0].isupper() + return { + "score": 1.0 if is_capitalized else 0.0, + "success": is_capitalized, + } ``` -```python # Run an evaluation -from agenta.client.types.llm_run_rate_limit import LlmRunRateLimit -response = client.evaluations.create_evaluation(app_id=app_id, variant_ids=[myvariant_id], testset_id=test_set_id, evaluators_configs=[exact_match_eval_id, letter_match_eval_id], - rate_limit=LlmRunRateLimit( - batch_size=10, # number of rows to call in parallel - max_retries=3, # max number of time to retry a failed llm call - retry_delay=2, # delay before retrying a failed llm call - delay_between_batches=5, # delay between batches - ),) -print(response) -``` ```python -# check the status -client.evaluations.fetch_evaluation_status('667d98fbd1812781f7e3761a') -``` +import asyncio +from agenta.sdk.evaluations import aevaluate -```python -# fetch the overall results -response = client.evaluations.fetch_evaluation_results('667d98fbd1812781f7e3761a') +async def main(): + result = await aevaluate( + name="Capital evaluation", + testsets=[updated.id], + applications=[capital_finder], + evaluators=[exact_match, letter_match], + ) -results = [(evaluator["evaluator_config"]["name"], evaluator["result"]) for evaluator in response["results"]] -# End of Selection -``` + print(result) + return result -```python -# fetch the detailed results -client.evaluations.fetch_evaluation_scenarios(evaluations_ids='667d98fbd1812781f7e3761a') +result = asyncio.run(main()) ``` diff --git a/examples/jupyter/evaluation/quick-start.ipynb b/examples/jupyter/evaluation/quick-start.ipynb index 6467e4a2f..58d2d930a 100644 --- a/examples/jupyter/evaluation/quick-start.ipynb +++ b/examples/jupyter/evaluation/quick-start.ipynb @@ -330,7 +330,7 @@ "output_type": "stream", "text": [ "šŸ“ Creating testset...\n", - "āœ… Testset created with ID: 019a783b-7894-7c80-a5ce-25005d745f5f\n", + "āœ… Testset revision created with ID: 019a783b-7894-7c80-a5ce-25005d745f5f\n", " Contains 4 test cases\n", "\n" ] @@ -349,7 +349,7 @@ "if not testset or not testset.id:\n", " print(\"āŒ Failed to create testset\")\n", "else:\n", - " print(f\"āœ… Testset created with ID: {testset.id}\")\n", + " print(f\"āœ… Testset revision created with ID: {testset.id}\")\n", " print(f\" Contains {len(test_data)} test cases\\n\")" ] }, diff --git a/examples/jupyter/evaluation/testset-management.ipynb b/examples/jupyter/evaluation/testset-management.ipynb index 045a6c8d8..eb8e1b903 100644 --- a/examples/jupyter/evaluation/testset-management.ipynb +++ b/examples/jupyter/evaluation/testset-management.ipynb @@ -90,13 +90,13 @@ "id": "e2b89655", "metadata": {}, "outputs": [], - "source": "# Create a testset with simple data\ntestset = await ag.testsets.acreate(\n data=[\n {\"country\": \"Germany\", \"capital\": \"Berlin\"},\n {\"country\": \"France\", \"capital\": \"Paris\"},\n {\"country\": \"Spain\", \"capital\": \"Madrid\"},\n {\"country\": \"Italy\", \"capital\": \"Rome\"},\n {\"country\": \"Japan\", \"capital\": \"Tokyo\"},\n ],\n name=\"Country Capitals\",\n)\n\nprint(f\"āœ… Created testset with ID: {testset.id}\")\nprint(f\" Name: {testset.name}\")\nprint(f\" Slug: {testset.slug}\")\n\n# Save the ID for later use\ntestset_id = testset.id" + "source": "# Create a testset with simple data\ntestset = await ag.testsets.acreate(\n data=[\n {\"country\": \"Germany\", \"capital\": \"Berlin\"},\n {\"country\": \"France\", \"capital\": \"Paris\"},\n {\"country\": \"Spain\", \"capital\": \"Madrid\"},\n {\"country\": \"Italy\", \"capital\": \"Rome\"},\n {\"country\": \"Japan\", \"capital\": \"Tokyo\"},\n ],\n name=\"Country Capitals\",\n)\n\nprint(f\"āœ… Created testset with ID: {testset.id}\")\nprint(f\" Name: {testset.name}\")\nprint(f\" Slug: {testset.slug}\")\n\n# Save the parent testset ID for later use\ntestset_id = testset.testset_id or testset.id" }, { "cell_type": "markdown", "id": "852d13a8", "metadata": {}, - "source": "**Expected Output:**\n```\nāœ… Created testset with ID: 01963413-3d39-7650-80ce-3ad5d688da6c\n Name: Country Capitals\n Slug: 3ad5d688da6c\n```\n\nThe `acreate` function returns a `TestsetRevision` object with the following fields:\n- `id`: Unique UUID for the testset\n- `name`: The name you provided\n- `slug`: A shortened identifier\n- `data`: The test data in a structured format" + "source": "**Expected Output:**\n```\nāœ… Created testset with ID: 01963413-3d39-7650-80ce-3ad5d688da6c\n Name: Country Capitals\n Slug: 3ad5d688da6c\n```\n\nThe `acreate` function returns a `TestsetRevision` object with the following fields:\n- `id`: The revision UUID\n- `testset_id`: The parent testset UUID\n- `version`: The revision version\n- `name`: The name you provided\n- `slug`: A shortened identifier\n- `data`: The test data in a structured format" }, { "cell_type": "markdown", From 753bdac9fba7a525dfbc368a80b4f4bd1b61dc2e Mon Sep 17 00:00:00 2001 From: Mahmoud Mabrouk Date: Thu, 15 Jan 2026 20:35:12 +0100 Subject: [PATCH 09/20] chore(docs): fix docusaurus build deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing @docusaurus/theme-common dependency and update npm lockfile so > doc@0.0.0 build > docusaurus build [INFO] [en] Creating an optimized production build... [webpackbar] ℹ Compiling Client [webpackbar] ℹ Compiling Server [webpackbar] āœ” Server: Compiled successfully in 2.02s [webpackbar] āœ” Client: Compiled successfully in 2.88s [SUCCESS] Generated static files in "build". [INFO] Use `npm run serve` command to test your build locally. works in clean checkouts. --- docs/package-lock.json | 68 ++++++++++++++++++------------------------ docs/package.json | 1 + 2 files changed, 30 insertions(+), 39 deletions(-) diff --git a/docs/package-lock.json b/docs/package-lock.json index 205fad92b..62c7f7444 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -15,6 +15,7 @@ "@docusaurus/plugin-content-docs": "^3.9.2", "@docusaurus/plugin-ideal-image": "^3.9.2", "@docusaurus/preset-classic": "^3.9.2", + "@docusaurus/theme-common": "^3.9.2", "@docusaurus/theme-search-algolia": "^3.9.2", "@mdx-js/react": "^3.0.0", "clsx": "^2.0.0", @@ -160,6 +161,7 @@ "resolved": "https://registry.npmjs.org/@algolia/cache-browser-local-storage/-/cache-browser-local-storage-4.24.0.tgz", "integrity": "sha512-t63W9BnoXVrGy9iYHBgObNXqYXM3tYXCjDSHeNwnsc324r4o5UiVKUiAB4THQ5z9U5hTj6qUvwg/Ez43ZD85ww==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/cache-common": "4.24.0" } @@ -168,13 +170,15 @@ "version": "4.24.0", "resolved": "https://registry.npmjs.org/@algolia/cache-common/-/cache-common-4.24.0.tgz", "integrity": "sha512-emi+v+DmVLpMGhp0V9q9h5CdkURsNmFC+cOS6uK9ndeJm9J4TiqSvPYVu+THUP8P/S08rxf5x2P+p3CfID0Y4g==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/@algolia/cache-in-memory": { "version": "4.24.0", "resolved": "https://registry.npmjs.org/@algolia/cache-in-memory/-/cache-in-memory-4.24.0.tgz", "integrity": "sha512-gDrt2so19jW26jY3/MkFg5mEypFIPbPoXsQGQWAi6TrCPsNOSEYepBMPlucqWigsmEy/prp5ug2jy/N3PVG/8w==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/cache-common": "4.24.0" } @@ -199,6 +203,7 @@ "resolved": "https://registry.npmjs.org/@algolia/client-account/-/client-account-4.24.0.tgz", "integrity": "sha512-adcvyJ3KjPZFDybxlqnf+5KgxJtBjwTPTeyG2aOyoJvx0Y8dUQAEOEVOJ/GBxX0WWNbmaSrhDURMhc+QeevDsA==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/client-common": "4.24.0", "@algolia/client-search": "4.24.0", @@ -210,6 +215,7 @@ "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-4.24.0.tgz", "integrity": "sha512-bc2ROsNL6w6rqpl5jj/UywlIYC21TwSSoFHKl01lYirGMW+9Eek6r02Tocg4gZ8HAw3iBvu6XQiM3BEbmEMoiA==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/requester-common": "4.24.0", "@algolia/transporter": "4.24.0" @@ -220,6 +226,7 @@ "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-4.24.0.tgz", "integrity": "sha512-uRW6EpNapmLAD0mW47OXqTP8eiIx5F6qN9/x/7HHO6owL3N1IXqydGwW5nhDFBrV+ldouro2W1VX3XlcUXEFCA==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/client-common": "4.24.0", "@algolia/requester-common": "4.24.0", @@ -231,6 +238,7 @@ "resolved": "https://registry.npmjs.org/@algolia/client-analytics/-/client-analytics-4.24.0.tgz", "integrity": "sha512-y8jOZt1OjwWU4N2qr8G4AxXAzaa8DBvyHTWlHzX/7Me1LX8OayfgHexqrsL4vSBcoMmVw2XnVW9MhL+Y2ZDJXg==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/client-common": "4.24.0", "@algolia/client-search": "4.24.0", @@ -243,6 +251,7 @@ "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-4.24.0.tgz", "integrity": "sha512-bc2ROsNL6w6rqpl5jj/UywlIYC21TwSSoFHKl01lYirGMW+9Eek6r02Tocg4gZ8HAw3iBvu6XQiM3BEbmEMoiA==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/requester-common": "4.24.0", "@algolia/transporter": "4.24.0" @@ -253,6 +262,7 @@ "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-4.24.0.tgz", "integrity": "sha512-uRW6EpNapmLAD0mW47OXqTP8eiIx5F6qN9/x/7HHO6owL3N1IXqydGwW5nhDFBrV+ldouro2W1VX3XlcUXEFCA==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/client-common": "4.24.0", "@algolia/requester-common": "4.24.0", @@ -288,6 +298,7 @@ "resolved": "https://registry.npmjs.org/@algolia/client-personalization/-/client-personalization-4.24.0.tgz", "integrity": "sha512-l5FRFm/yngztweU0HdUzz1rC4yoWCFo3IF+dVIVTfEPg906eZg5BOd1k0K6rZx5JzyyoP4LdmOikfkfGsKVE9w==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/client-common": "4.24.0", "@algolia/requester-common": "4.24.0", @@ -299,6 +310,7 @@ "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-4.24.0.tgz", "integrity": "sha512-bc2ROsNL6w6rqpl5jj/UywlIYC21TwSSoFHKl01lYirGMW+9Eek6r02Tocg4gZ8HAw3iBvu6XQiM3BEbmEMoiA==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/requester-common": "4.24.0", "@algolia/transporter": "4.24.0" @@ -324,7 +336,6 @@ "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-5.42.0.tgz", "integrity": "sha512-NZR7yyHj2WzK6D5X8gn+/KOxPdzYEXOqVdSaK/biU8QfYUpUuEA0sCWg/XlO05tPVEcJelF/oLrrNY3UjRbOww==", "license": "MIT", - "peer": true, "dependencies": { "@algolia/client-common": "5.42.0", "@algolia/requester-browser-xhr": "5.42.0", @@ -360,13 +371,15 @@ "version": "4.24.0", "resolved": "https://registry.npmjs.org/@algolia/logger-common/-/logger-common-4.24.0.tgz", "integrity": "sha512-LLUNjkahj9KtKYrQhFKCzMx0BY3RnNP4FEtO+sBybCjJ73E8jNdaKJ/Dd8A/VA4imVHP5tADZ8pn5B8Ga/wTMA==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/@algolia/logger-console": { "version": "4.24.0", "resolved": "https://registry.npmjs.org/@algolia/logger-console/-/logger-console-4.24.0.tgz", "integrity": "sha512-X4C8IoHgHfiUROfoRCV+lzSy+LHMgkoEEU1BbKcsfnV0i0S20zyy0NLww9dwVHUWNfPPxdMU+/wKmLGYf96yTg==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/logger-common": "4.24.0" } @@ -391,6 +404,7 @@ "resolved": "https://registry.npmjs.org/@algolia/recommend/-/recommend-4.24.0.tgz", "integrity": "sha512-P9kcgerfVBpfYHDfVZDvvdJv0lEoCvzNlOy2nykyt5bK8TyieYyiD0lguIJdRZZYGre03WIAFf14pgE+V+IBlw==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/cache-browser-local-storage": "4.24.0", "@algolia/cache-common": "4.24.0", @@ -410,6 +424,7 @@ "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-4.24.0.tgz", "integrity": "sha512-bc2ROsNL6w6rqpl5jj/UywlIYC21TwSSoFHKl01lYirGMW+9Eek6r02Tocg4gZ8HAw3iBvu6XQiM3BEbmEMoiA==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/requester-common": "4.24.0", "@algolia/transporter": "4.24.0" @@ -420,6 +435,7 @@ "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-4.24.0.tgz", "integrity": "sha512-uRW6EpNapmLAD0mW47OXqTP8eiIx5F6qN9/x/7HHO6owL3N1IXqydGwW5nhDFBrV+ldouro2W1VX3XlcUXEFCA==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/client-common": "4.24.0", "@algolia/requester-common": "4.24.0", @@ -431,6 +447,7 @@ "resolved": "https://registry.npmjs.org/@algolia/requester-browser-xhr/-/requester-browser-xhr-4.24.0.tgz", "integrity": "sha512-Z2NxZMb6+nVXSjF13YpjYTdvV3032YTBSGm2vnYvYPA6mMxzM3v5rsCiSspndn9rzIW4Qp1lPHBvuoKJV6jnAA==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/requester-common": "4.24.0" } @@ -440,6 +457,7 @@ "resolved": "https://registry.npmjs.org/@algolia/requester-node-http/-/requester-node-http-4.24.0.tgz", "integrity": "sha512-JF18yTjNOVYvU/L3UosRcvbPMGT9B+/GQWNWnenIImglzNVGpyzChkXLnrSf6uxwVNO6ESGu6oN8MqcGQcjQJw==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/requester-common": "4.24.0" } @@ -460,7 +478,8 @@ "version": "4.24.0", "resolved": "https://registry.npmjs.org/@algolia/requester-common/-/requester-common-4.24.0.tgz", "integrity": "sha512-k3CXJ2OVnvgE3HMwcojpvY6d9kgKMPRxs/kVohrwF5WMr2fnqojnycZkxPoEg+bXm8fi5BBfFmOqgYztRtHsQA==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/@algolia/requester-fetch": { "version": "5.42.0", @@ -491,6 +510,7 @@ "resolved": "https://registry.npmjs.org/@algolia/transporter/-/transporter-4.24.0.tgz", "integrity": "sha512-86nI7w6NzWxd1Zp9q3413dRshDqAzSbsQjhcDhPIatEFiZrL1/TjnHL8S7jVKFePlIMzDsZWXAXwXzcok9c5oA==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/cache-common": "4.24.0", "@algolia/logger-common": "4.24.0", @@ -568,7 +588,6 @@ "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.27.1.tgz", "integrity": "sha512-IaaGWsQqfsQWVLqMn9OB92MNN7zukfVA4s7KKAI0KfrrDsZ0yhi5uV4baBuLuN7n3vsZpwP8asPPcVwApxvjBQ==", "license": "MIT", - "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.27.1", @@ -2342,7 +2361,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=18" }, @@ -2365,7 +2383,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=18" } @@ -2475,7 +2492,6 @@ "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-7.1.0.tgz", "integrity": "sha512-8sLjZwK0R+JlxlYcTuVnyT2v+htpdrjDOKuMcOVdYjt52Lh8hWRYpxBPoKx/Zg+bcjc3wx6fmQevMmUztS/ccA==", "license": "MIT", - "peer": true, "dependencies": { "cssesc": "^3.0.0", "util-deprecate": "^1.0.2" @@ -2897,7 +2913,6 @@ "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-7.1.0.tgz", "integrity": "sha512-8sLjZwK0R+JlxlYcTuVnyT2v+htpdrjDOKuMcOVdYjt52Lh8hWRYpxBPoKx/Zg+bcjc3wx6fmQevMmUztS/ccA==", "license": "MIT", - "peer": true, "dependencies": { "cssesc": "^3.0.0", "util-deprecate": "^1.0.2" @@ -4011,7 +4026,6 @@ "resolved": "https://registry.npmjs.org/@docusaurus/plugin-content-docs/-/plugin-content-docs-3.9.2.tgz", "integrity": "sha512-C5wZsGuKTY8jEYsqdxhhFOe1ZDjH0uIYJ9T/jebHwkyxqnr4wW0jTkB72OMqNjsoQRcb0JN3PcSeTwFlVgzCZg==", "license": "MIT", - "peer": true, "dependencies": { "@docusaurus/core": "3.9.2", "@docusaurus/logger": "3.9.2", @@ -4294,7 +4308,6 @@ "resolved": "https://registry.npmjs.org/@docusaurus/theme-classic/-/theme-classic-3.9.2.tgz", "integrity": "sha512-IGUsArG5hhekXd7RDb11v94ycpJpFdJPkLnt10fFQWOVxAtq5/D7hT6lzc2fhyQKaaCE62qVajOMKL7OiAFAIA==", "license": "MIT", - "peer": true, "dependencies": { "@docusaurus/core": "3.9.2", "@docusaurus/logger": "3.9.2", @@ -4335,7 +4348,6 @@ "resolved": "https://registry.npmjs.org/@docusaurus/theme-common/-/theme-common-3.9.2.tgz", "integrity": "sha512-6c4DAbR6n6nPbnZhY2V3tzpnKnGL+6aOsLvFL26VRqhlczli9eWG0VDUNoCQEPnGwDMhPS42UhSAnz5pThm5Ag==", "license": "MIT", - "peer": true, "dependencies": { "@docusaurus/mdx-loader": "3.9.2", "@docusaurus/module-type-aliases": "3.9.2", @@ -4507,7 +4519,6 @@ "resolved": "https://registry.npmjs.org/@docusaurus/utils/-/utils-3.9.2.tgz", "integrity": "sha512-lBSBiRruFurFKXr5Hbsl2thmGweAPmddhF3jb99U4EMDA5L+e5Y1rAkOS07Nvrup7HUMBDrCV45meaxZnt28nQ==", "license": "MIT", - "peer": true, "dependencies": { "@docusaurus/logger": "3.9.2", "@docusaurus/types": "3.9.2", @@ -4553,7 +4564,6 @@ "resolved": "https://registry.npmjs.org/@docusaurus/utils-validation/-/utils-validation-3.9.2.tgz", "integrity": "sha512-l7yk3X5VnNmATbwijJkexdhulNsQaNDwoagiwujXoxFbWLcxHQqNQ+c/IAlzrfMMOfa/8xSBZ7KEKDesE/2J7A==", "license": "MIT", - "peer": true, "dependencies": { "@docusaurus/logger": "3.9.2", "@docusaurus/utils": "3.9.2", @@ -4905,7 +4915,6 @@ "resolved": "https://registry.npmjs.org/@mdx-js/react/-/react-3.1.0.tgz", "integrity": "sha512-QjHtSaoameoalGnKDT3FoIl4+9RwyTmo9ZJGBdLOks/YOiWHoRDI3PUwEzOE7kEmGcV3AFcp9K6dYu9rEuKLAQ==", "license": "MIT", - "peer": true, "dependencies": { "@types/mdx": "^2.0.0" }, @@ -5597,7 +5606,6 @@ "resolved": "https://registry.npmjs.org/@svgr/core/-/core-8.1.0.tgz", "integrity": "sha512-8QqtOQT5ACVlmsvKOJNEaWmRPmcojMOzCz4Hs2BGG/toAp/K38LcsMRyLp349glq5AzJbCEeimEoxaX6v/fLrA==", "license": "MIT", - "peer": true, "dependencies": { "@babel/core": "^7.21.3", "@svgr/babel-preset": "8.1.0", @@ -6003,7 +6011,6 @@ "resolved": "https://registry.npmjs.org/@types/react/-/react-19.1.3.tgz", "integrity": "sha512-dLWQ+Z0CkIvK1J8+wrDPwGxEYFA4RAyHoZPxHVGspYmFVnwGSNT24cGIhFJrtfRnWVuW8X7NO52gCXmhkVUWGQ==", "license": "MIT", - "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -6336,7 +6343,6 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.1.tgz", "integrity": "sha512-OvQ/2pUDKmgfCg++xsTX1wGxfTaszcHVcTctW4UJB4hibJx2HXxxO5UmVgyjMa+ZDsiaf5wWLXYpRWMmBI0QHg==", "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -6419,7 +6425,6 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.11.0.tgz", "integrity": "sha512-wGgprdCvMalC0BztXvitD2hC04YffAvtsUn93JbGXYLAtCUO4xd17mCCZQxUOItiBwZvJScWo8NIvQMQ71rdpg==", "license": "MIT", - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.1", "json-schema-traverse": "^1.0.0", @@ -6515,6 +6520,7 @@ "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-4.24.0.tgz", "integrity": "sha512-bc2ROsNL6w6rqpl5jj/UywlIYC21TwSSoFHKl01lYirGMW+9Eek6r02Tocg4gZ8HAw3iBvu6XQiM3BEbmEMoiA==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/requester-common": "4.24.0", "@algolia/transporter": "4.24.0" @@ -6525,6 +6531,7 @@ "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-4.24.0.tgz", "integrity": "sha512-uRW6EpNapmLAD0mW47OXqTP8eiIx5F6qN9/x/7HHO6owL3N1IXqydGwW5nhDFBrV+ldouro2W1VX3XlcUXEFCA==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/client-common": "4.24.0", "@algolia/requester-common": "4.24.0", @@ -6536,6 +6543,7 @@ "resolved": "https://registry.npmjs.org/@algolia/requester-browser-xhr/-/requester-browser-xhr-4.24.0.tgz", "integrity": "sha512-Z2NxZMb6+nVXSjF13YpjYTdvV3032YTBSGm2vnYvYPA6mMxzM3v5rsCiSspndn9rzIW4Qp1lPHBvuoKJV6jnAA==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/requester-common": "4.24.0" } @@ -6545,6 +6553,7 @@ "resolved": "https://registry.npmjs.org/@algolia/requester-node-http/-/requester-node-http-4.24.0.tgz", "integrity": "sha512-JF18yTjNOVYvU/L3UosRcvbPMGT9B+/GQWNWnenIImglzNVGpyzChkXLnrSf6uxwVNO6ESGu6oN8MqcGQcjQJw==", "license": "MIT", + "peer": true, "dependencies": { "@algolia/requester-common": "4.24.0" } @@ -7197,7 +7206,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.19", "caniuse-lite": "^1.0.30001751", @@ -8319,7 +8327,6 @@ "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-7.1.0.tgz", "integrity": "sha512-8sLjZwK0R+JlxlYcTuVnyT2v+htpdrjDOKuMcOVdYjt52Lh8hWRYpxBPoKx/Zg+bcjc3wx6fmQevMmUztS/ccA==", "license": "MIT", - "peer": true, "dependencies": { "cssesc": "^3.0.0", "util-deprecate": "^1.0.2" @@ -10900,7 +10907,6 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", "license": "MIT", - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -15879,7 +15885,6 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", "license": "MIT", - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -16660,7 +16665,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", @@ -17677,7 +17681,6 @@ "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-7.1.0.tgz", "integrity": "sha512-8sLjZwK0R+JlxlYcTuVnyT2v+htpdrjDOKuMcOVdYjt52Lh8hWRYpxBPoKx/Zg+bcjc3wx6fmQevMmUztS/ccA==", "license": "MIT", - "peer": true, "dependencies": { "cssesc": "^3.0.0", "util-deprecate": "^1.0.2" @@ -18682,7 +18685,6 @@ "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", "license": "MIT", - "peer": true, "dependencies": { "loose-envify": "^1.1.0" }, @@ -18695,7 +18697,6 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", "license": "MIT", - "peer": true, "dependencies": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" @@ -18732,7 +18733,6 @@ "resolved": "https://registry.npmjs.org/react-hook-form/-/react-hook-form-7.56.2.tgz", "integrity": "sha512-vpfuHuQMF/L6GpuQ4c3ZDo+pRYxIi40gQqsCmmfUBwm+oqvBhKhwghCuj2o00YCgSfU6bR9KC/xnQGWm3Gr08A==", "license": "MIT", - "peer": true, "engines": { "node": ">=18.0.0" }, @@ -18793,7 +18793,6 @@ "resolved": "https://registry.npmjs.org/@docusaurus/react-loadable/-/react-loadable-6.0.0.tgz", "integrity": "sha512-YMMxTUQV/QFSnbgrP3tjDzLHRg7vsbMn8e9HAa8o/1iXoiomo48b7sk/kkmWEuWNDPJVlKSJRB6Y2fHqdJk+SQ==", "license": "MIT", - "peer": true, "dependencies": { "@types/react": "*" }, @@ -19516,7 +19515,6 @@ "resolved": "https://registry.npmjs.org/react-redux/-/react-redux-7.2.9.tgz", "integrity": "sha512-Gx4L3uM182jEEayZfRbI/G11ZpYdNAnBs70lFVMNdHJI76XYtR+7m0MN+eAs7UHBPhWXcnFPaS+9owSCJQHNpQ==", "license": "MIT", - "peer": true, "dependencies": { "@babel/runtime": "^7.15.4", "@types/react-redux": "^7.1.20", @@ -19548,7 +19546,6 @@ "resolved": "https://registry.npmjs.org/react-router/-/react-router-5.3.4.tgz", "integrity": "sha512-Ys9K+ppnJah3QuaRiLxk+jDWOR1MekYQrlytiXxC1RyfbdsZkS5pvKAzCCr031xHixZwpnsYNT5xysdFHQaYsA==", "license": "MIT", - "peer": true, "dependencies": { "@babel/runtime": "^7.12.13", "history": "^4.9.0", @@ -19697,7 +19694,6 @@ "resolved": "https://registry.npmjs.org/redux/-/redux-4.2.1.tgz", "integrity": "sha512-LAUYz4lc+Do8/g7aeRa8JkyDErK6ekstQaqWQrNRW//MY1TvCEpMtpTWvlQ+FPbWCx+Xixu/6SHt5N0HR+SB4w==", "license": "MIT", - "peer": true, "dependencies": { "@babel/runtime": "^7.9.2" } @@ -20309,7 +20305,6 @@ "resolved": "https://registry.npmjs.org/sass/-/sass-1.87.0.tgz", "integrity": "sha512-d0NoFH4v6SjEK7BoX810Jsrhj7IQSYHAHLi/iSpgqKc7LaIDshFRlSg5LOymf9FqQhxEHs2W5ZQXlvy0KD45Uw==", "license": "MIT", - "peer": true, "dependencies": { "chokidar": "^4.0.0", "immutable": "^5.0.2", @@ -21994,8 +21989,7 @@ "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD", - "peer": true + "license": "0BSD" }, "node_modules/tunnel-agent": { "version": "0.6.0", @@ -22049,7 +22043,6 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "devOptional": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -22420,7 +22413,6 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", "license": "MIT", - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -22710,7 +22702,6 @@ "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.99.8.tgz", "integrity": "sha512-lQ3CPiSTpfOnrEGeXDwoq5hIGzSjmwD72GdfVzF7CQAI7t47rJG9eDWvcEkEn3CUQymAElVvDg3YNTlCYj+qUQ==", "license": "MIT", - "peer": true, "dependencies": { "@types/eslint-scope": "^3.7.7", "@types/estree": "^1.0.6", @@ -23319,7 +23310,6 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-4.1.12.tgz", "integrity": "sha512-JInaHOamG8pt5+Ey8kGmdcAcg3OL9reK8ltczgHTAwNhMys/6ThXHityHxVV2p3fkw/c+MAvBHFVYHFZDmjMCQ==", "license": "MIT", - "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/docs/package.json b/docs/package.json index 95d5319b1..a94b4645e 100644 --- a/docs/package.json +++ b/docs/package.json @@ -28,6 +28,7 @@ "@docusaurus/plugin-ideal-image": "^3.9.2", "@docusaurus/preset-classic": "^3.9.2", "@docusaurus/theme-search-algolia": "^3.9.2", + "@docusaurus/theme-common": "^3.9.2", "@mdx-js/react": "^3.0.0", "clsx": "^2.0.0", "docusaurus-plugin-image-zoom": "^2.0.0", From 3b6b69668303e7ff7cdfb74c77e29198702e17ec Mon Sep 17 00:00:00 2001 From: Mahmoud Mabrouk Date: Thu, 15 Jan 2026 20:39:51 +0100 Subject: [PATCH 10/20] [3452] fix(frontend): update testset API modal to preview endpoints Switch CreateTestsetFromApi modal + code snippets from legacy /testsets endpoints to /preview/simple/testsets, matching current backend + docs payloads. --- .../testsets/create_with_json/curl.ts | 17 +++++++++----- .../testsets/create_with_json/python.ts | 22 ++++++++++++------- .../testsets/create_with_json/typescript.ts | 16 +++++++++----- .../testsets/create_with_upload/curl.ts | 9 ++++---- .../testsets/create_with_upload/python.ts | 5 ++++- .../testsets/create_with_upload/typescript.ts | 1 + .../testset/modals/CreateTestsetFromApi.tsx | 18 +++++++++++---- 7 files changed, 60 insertions(+), 28 deletions(-) diff --git a/web/oss/src/code_snippets/testsets/create_with_json/curl.ts b/web/oss/src/code_snippets/testsets/create_with_json/curl.ts index bdd35848c..01d105e32 100644 --- a/web/oss/src/code_snippets/testsets/create_with_json/curl.ts +++ b/web/oss/src/code_snippets/testsets/create_with_json/curl.ts @@ -3,12 +3,17 @@ import {isDemo} from "@/oss/lib/helpers/utils" export default function cURLCode(uri: string, params: string): string { return `curl -X POST ${uri} \ -H 'Content-Type: application/json' \ -${!isDemo() ? "" : "-H 'Authorization: your_api_key'"} \ + ${!isDemo() ? "" : "-H 'Authorization: your_api_key'"} \ -d '{ - "name": "your_testset_name", - "csvdata": [ - {"column1": "value1", "column2": "value2"}, - {"column1": "value3", "column2": "value4"} - ] + "testset": { + "slug": "your-testset-slug", + "name": "your_testset_name", + "data": { + "testcases": [ + {"data": {"column1": "value1", "column2": "value2"}}, + {"data": {"column1": "value3", "column2": "value4"}} + ] + } + } }'` } diff --git a/web/oss/src/code_snippets/testsets/create_with_json/python.ts b/web/oss/src/code_snippets/testsets/create_with_json/python.ts index a7cb8af63..c34f1f471 100644 --- a/web/oss/src/code_snippets/testsets/create_with_json/python.ts +++ b/web/oss/src/code_snippets/testsets/create_with_json/python.ts @@ -5,17 +5,23 @@ export default function pythonCode(uri: string, params: string): string { import json url = '${uri}' + data = { - "name": "your_testset_name", - "csvdata": [ - {"column1": "value1", "column2": "value2"}, - {"column1": "value3", "column2": "value4"} - ] + "testset": { + "slug": "your-testset-slug", + "name": "your_testset_name", + "data": { + "testcases": [ + {"data": {"column1": "value1", "column2": "value2"}}, + {"data": {"column1": "value3", "column2": "value4"}}, + ] + }, + } } -response = requests.post(url, data=json.dumps(data), headers={'Content-Type': 'application/json'${ - !isDemo() ? "" : ", 'Authorization': 'your_api_key'" - }}) +headers = {'Content-Type': 'application/json'${!isDemo() ? "" : ", 'Authorization': 'your_api_key'"}} + +response = requests.post(url, data=json.dumps(data), headers=headers) print(response.status_code) print(response.json()) diff --git a/web/oss/src/code_snippets/testsets/create_with_json/typescript.ts b/web/oss/src/code_snippets/testsets/create_with_json/typescript.ts index abe9b045e..6a04996d5 100644 --- a/web/oss/src/code_snippets/testsets/create_with_json/typescript.ts +++ b/web/oss/src/code_snippets/testsets/create_with_json/typescript.ts @@ -6,12 +6,18 @@ export default function tsCode(uri: string, params: string): string { const codeString = `import axios from 'axios'; const url = '${uri}'; + const data = { - name: 'your_testset_name', - csvdata: [ - {column1: 'value1', column2: 'value2'}, - {column1: 'value3', column2: 'value4'} - ] + testset: { + slug: 'your-testset-slug', + name: 'your_testset_name', + data: { + testcases: [ + {data: {column1: 'value1', column2: 'value2'}}, + {data: {column1: 'value3', column2: 'value4'}}, + ], + }, + }, }; axios.post(url, data${!isDemo() ? "" : ", {headers: {Authorization: 'your_api_key'}}"}) diff --git a/web/oss/src/code_snippets/testsets/create_with_upload/curl.ts b/web/oss/src/code_snippets/testsets/create_with_upload/curl.ts index d7ddfa1c9..924d51ed4 100644 --- a/web/oss/src/code_snippets/testsets/create_with_upload/curl.ts +++ b/web/oss/src/code_snippets/testsets/create_with_upload/curl.ts @@ -1,9 +1,10 @@ import {isDemo} from "@/oss/lib/helpers/utils" export default function cURLCode(uri: string): string { - return `curl -X POST ${uri} \\ --H 'Content-Type: multipart/form-data' \\ --F 'file=@/oss/path/to/your/file.csv' \\ --F 'testset_name=your_testset_name' \\ + return `curl -X POST ${uri} \ +-H 'Content-Type: multipart/form-data' \ +-F 'file=@/oss/path/to/your/file.csv' \ +-F 'file_type=csv' \ +-F 'testset_name=your_testset_name' \ ${!isDemo() ? "" : "-H 'Authorization: your_api_key'"}` } diff --git a/web/oss/src/code_snippets/testsets/create_with_upload/python.ts b/web/oss/src/code_snippets/testsets/create_with_upload/python.ts index 47d31c79c..38012af06 100644 --- a/web/oss/src/code_snippets/testsets/create_with_upload/python.ts +++ b/web/oss/src/code_snippets/testsets/create_with_upload/python.ts @@ -9,7 +9,10 @@ testset_name = 'your_testset_name' with open(file_path, 'rb') as file: files = {'file': file} - data = {'testset_name': testset_name} + data = { + 'testset_name': testset_name, + 'file_type': 'csv', + } response = requests.post(url, files=files, data=data${ !isDemo() ? "" : ", headers={'Authorization': 'your_api_key'}" }) diff --git a/web/oss/src/code_snippets/testsets/create_with_upload/typescript.ts b/web/oss/src/code_snippets/testsets/create_with_upload/typescript.ts index 25410d4ec..fb2791747 100644 --- a/web/oss/src/code_snippets/testsets/create_with_upload/typescript.ts +++ b/web/oss/src/code_snippets/testsets/create_with_upload/typescript.ts @@ -13,6 +13,7 @@ export default function tsCode(uri: string): string { const formData = new FormData(); formData.append('file', fs.createReadStream(filePath)); + formData.append('file_type', 'csv'); formData.append('testset_name', testsetName); const config = { diff --git a/web/oss/src/components/pages/testset/modals/CreateTestsetFromApi.tsx b/web/oss/src/components/pages/testset/modals/CreateTestsetFromApi.tsx index a206bbbb7..a387815f2 100644 --- a/web/oss/src/components/pages/testset/modals/CreateTestsetFromApi.tsx +++ b/web/oss/src/components/pages/testset/modals/CreateTestsetFromApi.tsx @@ -73,11 +73,21 @@ const CreateTestsetFromApi: React.FC = ({setCurrent, onCancel}) => { const [uploadType, setUploadType] = useState<"csv" | "json">("csv") const [selectedLang, setSelectedLang] = useState("python") - const uploadURI = `${getAgentaApiUrl()}/testsets/upload` - const jsonURI = `${getAgentaApiUrl()}/testsets` + const uploadURI = `${getAgentaApiUrl()}/preview/simple/testsets/upload` + const jsonURI = `${getAgentaApiUrl()}/preview/simple/testsets/` const params = `{ - "name": "testset_name",}` + "testset": { + "slug": "your-testset-slug", + "name": "your_testset_name", + "data": { + "testcases": [ + {"data": {"column1": "value1", "column2": "value2"}}, + {"data": {"column1": "value3", "column2": "value4"}} + ] + } + } +}` const jsonCodeSnippets: Record = { python: pythonCode(jsonURI, params), @@ -116,7 +126,7 @@ const CreateTestsetFromApi: React.FC = ({setCurrent, onCancel}) => { - Use this endpoint to create a new Testset for your App using JSON + Use these endpoints to create a testset via JSON or upload a file
Date: Fri, 16 Jan 2026 00:03:00 +0100 Subject: [PATCH 11/20] fix --- .../JSONSchema/JSONSchemaEditor.tsx | 25 ------------------- 1 file changed, 25 deletions(-) diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx index 6aff01d8c..dc6480ae0 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx @@ -153,31 +153,6 @@ export const JSONSchemaEditor: React.FC = ({form, name, d } }, [supportsBasicMode, mode]) - // Update form when basic mode changes - useEffect(() => { - if (mode === "basic" && supportsBasicMode) { - const config: SchemaConfig = { - responseFormat, - includeReasoning, - continuousConfig: {minimum: minValue, maximum: maxValue}, - categoricalOptions: categories, - } - const schema = generateJSONSchema(config) - const schemaString = JSON.stringify(schema, null, 2) - - syncFormValue(schemaString) - } - }, [ - mode, - responseFormat, - includeReasoning, - minValue, - maxValue, - categories, - supportsBasicMode, - syncFormValue, - ]) - const handleModeSwitch = (newMode: "basic" | "advanced") => { if (newMode === mode) { return From 3a9e7e2782e4592ced051846eecd3d1b9bb2cfe4 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Fri, 16 Jan 2026 00:31:57 +0100 Subject: [PATCH 12/20] fix --- .../JSONSchema/JSONSchemaEditor.tsx | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx index dc6480ae0..6aff01d8c 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx @@ -153,6 +153,31 @@ export const JSONSchemaEditor: React.FC = ({form, name, d } }, [supportsBasicMode, mode]) + // Update form when basic mode changes + useEffect(() => { + if (mode === "basic" && supportsBasicMode) { + const config: SchemaConfig = { + responseFormat, + includeReasoning, + continuousConfig: {minimum: minValue, maximum: maxValue}, + categoricalOptions: categories, + } + const schema = generateJSONSchema(config) + const schemaString = JSON.stringify(schema, null, 2) + + syncFormValue(schemaString) + } + }, [ + mode, + responseFormat, + includeReasoning, + minValue, + maxValue, + categories, + supportsBasicMode, + syncFormValue, + ]) + const handleModeSwitch = (newMode: "basic" | "advanced") => { if (newMode === mode) { return From cd5e28b97d62b238e35278ba1051a2392d99bac5 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Fri, 16 Jan 2026 00:43:58 +0100 Subject: [PATCH 13/20] fix: ensure evaluator configuration changes persist correctly --- .../ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx index 6aff01d8c..f24ef6a5c 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx @@ -76,6 +76,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d return isSchemaCompatibleWithBasicMode(defaultValue) }) + const [isInitialized, setIsInitialized] = useState(false) const lastSyncedValueRef = useRef(undefined) @@ -133,10 +134,13 @@ export const JSONSchemaEditor: React.FC = ({form, name, d if (!defaultValue) { setSupportsBasicMode(true) setRawSchema("") + lastSyncedValueRef.current = undefined + setIsInitialized(true) return } if (lastSyncedValueRef.current === defaultValue) { + setIsInitialized(true) return } @@ -145,7 +149,9 @@ export const JSONSchemaEditor: React.FC = ({form, name, d setSupportsBasicMode(isSchemaCompatibleWithBasicMode(defaultValue)) setRawSchema(defaultValue) - }, [defaultValue, applyParsedConfig]) + syncFormValue(defaultValue) + setIsInitialized(true) + }, [defaultValue, applyParsedConfig, syncFormValue]) useEffect(() => { if (!supportsBasicMode && mode !== "advanced") { @@ -155,6 +161,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d // Update form when basic mode changes useEffect(() => { + if (!isInitialized) return if (mode === "basic" && supportsBasicMode) { const config: SchemaConfig = { responseFormat, @@ -168,6 +175,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d syncFormValue(schemaString) } }, [ + isInitialized, mode, responseFormat, includeReasoning, From c988e47eb68c03f5a53dbc021737d18e053a2ef0 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Fri, 16 Jan 2026 01:05:45 +0100 Subject: [PATCH 14/20] fix --- .../JSONSchema/JSONSchemaEditor.tsx | 33 ++++++++++++++++--- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx index f24ef6a5c..a3c58b098 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx @@ -77,6 +77,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d return isSchemaCompatibleWithBasicMode(defaultValue) }) const [isInitialized, setIsInitialized] = useState(false) + const [isDirty, setIsDirty] = useState(false) const lastSyncedValueRef = useRef(undefined) @@ -96,6 +97,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d } else { setCategories(createDefaultCategories()) } + setIsDirty(false) }, []) const syncFormValue = useCallback( @@ -136,6 +138,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d setRawSchema("") lastSyncedValueRef.current = undefined setIsInitialized(true) + setIsDirty(false) return } @@ -151,6 +154,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d setRawSchema(defaultValue) syncFormValue(defaultValue) setIsInitialized(true) + setIsDirty(false) }, [defaultValue, applyParsedConfig, syncFormValue]) useEffect(() => { @@ -161,7 +165,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d // Update form when basic mode changes useEffect(() => { - if (!isInitialized) return + if (!isInitialized || !isDirty) return if (mode === "basic" && supportsBasicMode) { const config: SchemaConfig = { responseFormat, @@ -172,10 +176,12 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const schema = generateJSONSchema(config) const schemaString = JSON.stringify(schema, null, 2) + setRawSchema(schemaString) syncFormValue(schemaString) } }, [ isInitialized, + isDirty, mode, responseFormat, includeReasoning, @@ -219,6 +225,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const parsed = parseJSONSchema(rawSchema) const config = parsed ?? getDefaultConfig() applyConfigAndSync(config) + setIsDirty(false) setMode("basic") }, }) @@ -228,6 +235,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const parsed = parseJSONSchema(rawSchema) const config = parsed ?? getDefaultConfig() applyConfigAndSync(config) + setIsDirty(false) setMode("basic") return } @@ -237,16 +245,19 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const addCategory = () => { setCategories([...categories, {name: "", description: ""}]) + setIsDirty(true) } const removeCategory = (index: number) => { setCategories(categories.filter((_, i) => i !== index)) + setIsDirty(true) } const updateCategory = (index: number, field: "name" | "description", value: string) => { const updated = [...categories] updated[index][field] = value setCategories(updated) + setIsDirty(true) } if (mode === "advanced") { @@ -320,7 +331,10 @@ export const JSONSchemaEditor: React.FC = ({form, name, d { - setResponseFormat(value) - setIsDirty(true) - }} + onChange={(value) => setResponseFormat(value)} options={[ {label: "Boolean (True/False)", value: "boolean"}, {label: "Continuous (Numeric Range)", value: "continuous"}, @@ -391,10 +358,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d { - setMinValue(value ?? 0) - setIsDirty(true) - }} + onChange={(value) => setMinValue(value ?? 0)} />
@@ -414,10 +378,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d { - setMaxValue(value ?? 10) - setIsDirty(true) - }} + onChange={(value) => setMaxValue(value ?? 10)} />
@@ -480,10 +441,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d
{ - setIncludeReasoning(e.target.checked) - setIsDirty(true) - }} + onChange={(e) => setIncludeReasoning(e.target.checked)} > Include reasoning From 840b13ad0af402b7875fc5dce5b6ab3a690520db Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Fri, 16 Jan 2026 10:10:32 +0100 Subject: [PATCH 17/20] fix: enhance schema normalization and ensure evaluator configuration changes persist --- .../JSONSchema/JSONSchemaEditor.tsx | 118 ++++++++++-------- 1 file changed, 66 insertions(+), 52 deletions(-) diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx index d86583516..8189ba7dc 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx @@ -34,6 +34,14 @@ interface JSONSchemaEditorProps { defaultValue?: string } +const normalizeSchemaValue = (value: unknown): string | undefined => { + if (typeof value === "string") return value + if (value && typeof value === "object") { + return JSON.stringify(value, null, 2) + } + return undefined +} + const createDefaultCategories = (): CategoricalOption[] => [ {name: "good", description: "The response is good"}, {name: "bad", description: "The response is bad"}, @@ -69,19 +77,20 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const [categories, setCategories] = useState(createDefaultCategories()) // Advanced mode state - const [rawSchema, setRawSchema] = useState(defaultValue ?? "") - const [supportsBasicMode, setSupportsBasicMode] = useState(() => { - if (!defaultValue) { - return true - } - - return isSchemaCompatibleWithBasicMode(defaultValue) - }) + const initialSchema = normalizeSchemaValue(defaultValue) + const [rawSchema, setRawSchema] = useState(initialSchema ?? "") + const [supportsBasicMode, setSupportsBasicMode] = useState(() => + initialSchema ? isSchemaCompatibleWithBasicMode(initialSchema) : true, + ) const [isInitialized, setIsInitialized] = useState(false) + const [isDirty, setIsDirty] = useState(false) const lastSyncedValueRef = useRef(undefined) const namePath = useMemo(() => (Array.isArray(name) ? name : [name]), [name]) + const watchedValue = Form.useWatch(namePath as any, form) + const normalizedWatchedValue = useMemo(() => normalizeSchemaValue(watchedValue), [watchedValue]) + const normalizedDefaultValue = useMemo(() => normalizeSchemaValue(defaultValue), [defaultValue]) const applyParsedConfig = useCallback((parsed: SchemaConfig) => { setResponseFormat(parsed.responseFormat) @@ -97,6 +106,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d } else { setCategories(createDefaultCategories()) } + setIsDirty(false) }, []) const syncFormValue = useCallback( @@ -110,6 +120,16 @@ export const JSONSchemaEditor: React.FC = ({form, name, d [form, namePath], ) + const buildConfig = useCallback( + (): SchemaConfig => ({ + responseFormat, + includeReasoning, + continuousConfig: {minimum: minValue, maximum: maxValue}, + categoricalOptions: categories, + }), + [categories, includeReasoning, maxValue, minValue, responseFormat], + ) + const getDefaultConfig = useCallback((): SchemaConfig => { return { responseFormat: "boolean", @@ -130,29 +150,32 @@ export const JSONSchemaEditor: React.FC = ({form, name, d [applyParsedConfig, syncFormValue], ) - // Initialize from default value + // Initialize from form value (preferred) or default fallback. useEffect(() => { - if (!defaultValue) { + const sourceValue = normalizedWatchedValue ?? normalizedDefaultValue + if (!sourceValue) { setSupportsBasicMode(true) setRawSchema("") lastSyncedValueRef.current = undefined setIsInitialized(true) + setIsDirty(false) return } - if (lastSyncedValueRef.current === defaultValue) { + if (lastSyncedValueRef.current === sourceValue) { setIsInitialized(true) return } - const parsed = parseJSONSchema(defaultValue) + const parsed = parseJSONSchema(sourceValue) if (parsed) applyParsedConfig(parsed) - setSupportsBasicMode(isSchemaCompatibleWithBasicMode(defaultValue)) - setRawSchema(defaultValue) - syncFormValue(defaultValue) + setSupportsBasicMode(isSchemaCompatibleWithBasicMode(sourceValue)) + setRawSchema(sourceValue) + syncFormValue(sourceValue) setIsInitialized(true) - }, [defaultValue, applyParsedConfig, syncFormValue]) + setIsDirty(false) + }, [applyParsedConfig, normalizedDefaultValue, normalizedWatchedValue, syncFormValue]) useEffect(() => { if (!supportsBasicMode && mode !== "advanced") { @@ -162,30 +185,15 @@ export const JSONSchemaEditor: React.FC = ({form, name, d // Update form when basic mode changes useEffect(() => { - if (!isInitialized) return + if (!isInitialized || !isDirty) return if (mode === "basic" && supportsBasicMode) { - const config: SchemaConfig = { - responseFormat, - includeReasoning, - continuousConfig: {minimum: minValue, maximum: maxValue}, - categoricalOptions: categories, - } - const schema = generateJSONSchema(config) + const schema = generateJSONSchema(buildConfig()) const schemaString = JSON.stringify(schema, null, 2) + setRawSchema(schemaString) syncFormValue(schemaString) } - }, [ - isInitialized, - mode, - responseFormat, - includeReasoning, - minValue, - maxValue, - categories, - supportsBasicMode, - syncFormValue, - ]) + }, [isInitialized, isDirty, mode, buildConfig, supportsBasicMode, syncFormValue]) const handleModeSwitch = (newMode: "basic" | "advanced") => { if (newMode === mode) { @@ -193,13 +201,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d } if (newMode === "advanced" && mode === "basic") { - const config: SchemaConfig = { - responseFormat, - includeReasoning, - continuousConfig: {minimum: minValue, maximum: maxValue}, - categoricalOptions: categories, - } - const schema = generateJSONSchema(config) + const schema = generateJSONSchema(buildConfig()) const schemaString = JSON.stringify(schema, null, 2) setRawSchema(schemaString) syncFormValue(schemaString) @@ -220,6 +222,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const parsed = parseJSONSchema(rawSchema) const config = parsed ?? getDefaultConfig() applyConfigAndSync(config) + setIsDirty(false) setMode("basic") }, }) @@ -229,6 +232,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const parsed = parseJSONSchema(rawSchema) const config = parsed ?? getDefaultConfig() applyConfigAndSync(config) + setIsDirty(false) setMode("basic") return } @@ -238,16 +242,19 @@ export const JSONSchemaEditor: React.FC = ({form, name, d const addCategory = () => { setCategories([...categories, {name: "", description: ""}]) + setIsDirty(true) } const removeCategory = (index: number) => { setCategories(categories.filter((_, i) => i !== index)) + setIsDirty(true) } const updateCategory = (index: number, field: "name" | "description", value: string) => { const updated = [...categories] updated[index][field] = value setCategories(updated) + setIsDirty(true) } if (mode === "advanced") { @@ -274,12 +281,7 @@ export const JSONSchemaEditor: React.FC = ({form, name, d setSupportsBasicMode( value ? isSchemaCompatibleWithBasicMode(value) : false, ) - - if (Array.isArray(name)) { - form.setFieldValue(name, value) - } else { - form.setFieldValue([name], value) - } + form.setFieldValue(namePath, value) } }} editorProps={{ @@ -321,7 +323,10 @@ export const JSONSchemaEditor: React.FC = ({form, name, d