diff --git a/.gitleaks.toml b/.gitleaks.toml index d1991dc726..eb89914190 100644 --- a/.gitleaks.toml +++ b/.gitleaks.toml @@ -11,6 +11,7 @@ paths = [ '''^core/docs/docs/reference/api/.*\.mdx''', '''^docs/docs/reference/api/.*\.mdx''', '''^docs/.docusaurus/.*''', + '''^docs/build/.*''', # -------------------------------------------------------------- WEB ARTIFACTS '''^.*/\.pnpm-store/.*''', '''^.*/public/__env\.js$''', @@ -24,6 +25,8 @@ regexes = [ '''is_completion=True''', '''YOUR_API_KEY''', '''_SECRET_KEY''', + # ------------------------------------------------------------ PUBLIC KEYS + '''phc_hmVSxIjTW1REBHXgj2aw4HW9X6CXb6FzerBgP9XenC7''', # ---------------------------------------------------------------------------- ] diff --git a/api/pyproject.toml b/api/pyproject.toml index 76fc53afd4..35e55fa50d 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "api" -version = "0.58.0" +version = "0.59.0" description = "Agenta API" authors = [ { name = "Mahmoud Mabrouk", email = "mahmoud@agenta.ai" }, diff --git a/docs/blog/entries/filtering-traces-by-annotation.mdx b/docs/blog/entries/filtering-traces-by-annotation.mdx new file mode 100644 index 0000000000..e75f09081e --- /dev/null +++ b/docs/blog/entries/filtering-traces-by-annotation.mdx @@ -0,0 +1,40 @@ +--- +title: "Filtering Traces by Annotation" +slug: filtering-traces-by-annotation +date: 2025-10-14 +tags: [v0.58.0] +--- + +import Image from "@theme/IdealImage"; + + +We rebuilt the filtering system in observability. We added a new dropdown with more options. Additionally, we added a new annotation filtering. You can now filter and search traces based on their annotations. This feature helps you find traces with low scores or bad feedback quickly. + +### New Filter Options + +The new dropdown is simpler and gives you more options. You can now filter by: +- **Span status**: Find successful or failed spans +- **Input keys**: Search for specific inputs in your spans +- **App or environment**: Filter traces from specific apps or environments +- **Any key within your span**: Search custom data in your trace structure + +Complete observability filters + +### Annotation Filtering + +Filter traces based on evaluations and feedback: +- **Evaluator results**: Find spans evaluated by a specific evaluator +- **User feedback**: Search for spans with feedback like `success=True` + +Annotation filtering interface + +This feature enables powerful workflows: + +1. **Capture user feedback** from your application using our API ([see tutorial](/tutorials/cookbooks/capture-user-feedback)) +2. **Filter traces** to find those with bad feedback or low scores +3. **Add them to test sets** to track problematic cases +4. **Improve your prompts** based on real user feedback + +The filtering system makes it easy to turn production issues into test cases. + +--- diff --git a/docs/blog/main.mdx b/docs/blog/main.mdx index 670de79f42..4b162c982f 100644 --- a/docs/blog/main.mdx +++ b/docs/blog/main.mdx @@ -10,6 +10,27 @@ import Image from "@theme/IdealImage";
+### [Filtering Traces by Annotation](/changelog/filtering-traces-by-annotation) + +_14 October 2025_ + +**v0.58.0** +Annotation filtering interface + + +You can now filter and search traces based on their annotations. This helps you find traces with low scores or bad feedback quickly. + +We rebuilt the filtering system in observability with a simpler dropdown and more options. You can now filter by span status, input keys, app or environment references, and any key within your span. + +The new annotation filtering lets you find: +- Spans evaluated by a specific evaluator +- Spans with user feedback like `success=True` + + +This enables powerful workflows: [capture user feedback](/tutorials/cookbooks/capture-user-feedback) from your app, filter to find traces with bad feedback, add them to test sets, and improve your prompts based on real user data. + +--- + ### [New Evaluation Results Dashboard](/changelog/new-evaluation-results-dashboard) _26 September 2025_ diff --git a/docs/src/data/roadmap.ts b/docs/src/data/roadmap.ts index d4bcf0854b..3c7d18d236 100644 --- a/docs/src/data/roadmap.ts +++ b/docs/src/data/roadmap.ts @@ -24,6 +24,19 @@ export const shippedFeatures: ShippedFeature[] = [ // Playground: BCFF78 // Observability: DE74FF // Evaluation: 86B7FF + { + id: "filtering-by-annotation", + title: "Filtering Traces by Annotation", + description: "Filter and search for traces based on their annotations. Find traces with low scores or feedback quickly using the rebuilt filtering system.", + changelogPath: "/changelog/filtering-traces-by-annotation", + shippedAt: "2025-10-14", + labels: [ + { + name: "Observability", + color: "DE74FF", + }, + ], + }, { id: "evaluation-results-dashboard", title: "New Evaluation Results Dashboard", @@ -173,19 +186,7 @@ export const inProgressFeatures: PlannedFeature[] = [ }, ], }, - { - id: "filtering-by-annotation", - title: "Filtering Traces by Annotation", - description: "We are adding the ability to filter traces by annotation. This is useful for finding traces with low scores or feedback.", - githubUrl: "https://github.com/Agenta-AI/agenta/discussions/2729", - labels: [ - { - name: "Observability", - color: "DE74FF", - }, - ], - }, { id: "date-range-filtering", title: "Date Range Filtering in Metrics Dashboard", description: "We are adding the ability to filter traces by date range in the metrics dashboard.", diff --git a/docs/static/images/changelog/changelog-annotation-filter.png b/docs/static/images/changelog/changelog-annotation-filter.png new file mode 100644 index 0000000000..172c97eac5 Binary files /dev/null and b/docs/static/images/changelog/changelog-annotation-filter.png differ diff --git a/docs/static/images/changelog/changelog-filters-observability.png b/docs/static/images/changelog/changelog-filters-observability.png new file mode 100644 index 0000000000..6a2da97c18 Binary files /dev/null and b/docs/static/images/changelog/changelog-filters-observability.png differ diff --git a/hooks/setup.sh b/hooks/setup.sh index dfa7669995..287d31bab6 100755 --- a/hooks/setup.sh +++ b/hooks/setup.sh @@ -26,8 +26,11 @@ if ! command -v gitleaks >/dev/null 2>&1; then brew install gitleaks else # fallback: go install (requires Go installed) - go install github.com/gitleaks/gitleaks/v8@latest + go install github.com/zricethezav/gitleaks/v8@latest export PATH="$PATH:$(go env GOPATH)/bin" + echo "----------------------------------------------------------------------" + echo "⚠️⚠️ PLEASE ADD $(go env GOPATH)/bin TO YOUR PATH IN YOUR .bashrc OR .zshrc ⚠️⚠️" + echo "----------------------------------------------------------------------" fi fi diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml index 6fb0d58a0a..9efac11f7f 100644 --- a/sdk/pyproject.toml +++ b/sdk/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "agenta" -version = "0.58.0" +version = "0.59.0" description = "The SDK for agenta is an open-source LLMOps platform." readme = "README.md" authors = [ diff --git a/web/ee/package.json b/web/ee/package.json index e98d5b2bed..fc2d4e4b96 100644 --- a/web/ee/package.json +++ b/web/ee/package.json @@ -1,6 +1,6 @@ { "name": "@agenta/ee", - "version": "0.58.0", + "version": "0.59.0", "private": true, "engines": { "node": ">=18" diff --git a/web/ee/src/components/Banners/BillingPlanBanner/FreeTrialBanner.tsx b/web/ee/src/components/Banners/BillingPlanBanner/FreeTrialBanner.tsx index 3fa77e8671..7fb69da69a 100644 --- a/web/ee/src/components/Banners/BillingPlanBanner/FreeTrialBanner.tsx +++ b/web/ee/src/components/Banners/BillingPlanBanner/FreeTrialBanner.tsx @@ -1,7 +1,7 @@ import {Button, Typography} from "antd" import {useRouter} from "next/router" -import useURL from "@/oss/hooks/useURL" +import useURL from "@/oss/hooks/useURL" import {SubscriptionType} from "@/oss/services/billing/types" import SubscriptionPlanDetails from "@/agenta-oss-common/components/pages/settings/Billing/Modals/PricingModal/assets/SubscriptionPlanDetails" diff --git a/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunOutput.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunOutput.tsx index 9c4151f6eb..74358c0ffe 100644 --- a/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunOutput.tsx +++ b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunOutput.tsx @@ -1,6 +1,7 @@ +import clsx from "clsx" + import SimpleSharedEditor from "@/oss/components/EditorViews/SimpleSharedEditor" import {useInvocationResult} from "@/oss/lib/hooks/useInvocationResult" -import clsx from "clsx" const RunOutput = ({ runId, diff --git a/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunTraceHeader.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunTraceHeader.tsx index d37ba99532..4a1352a3ab 100644 --- a/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunTraceHeader.tsx +++ b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunTraceHeader.tsx @@ -1,3 +1,10 @@ +import {memo} from "react" + +import clsx from "clsx" +import {useAtomValue} from "jotai" +import dynamic from "next/dynamic" + +import EvalNameTag from "@/oss/components/EvalRunDetails/AutoEvalRun/assets/EvalNameTag" import {EVAL_TAG_COLOR} from "@/oss/components/EvalRunDetails/AutoEvalRun/assets/utils" import {useRunId} from "@/oss/contexts/RunIdContext" import { @@ -5,11 +12,6 @@ import { evaluationRunStateFamily, } from "@/oss/lib/hooks/useEvaluationRunData/assets/atoms" import {useInvocationResult} from "@/oss/lib/hooks/useInvocationResult" -import clsx from "clsx" -import {useAtomValue} from "jotai" -import {memo} from "react" -import dynamic from "next/dynamic" -import EvalNameTag from "@/oss/components/EvalRunDetails/AutoEvalRun/assets/EvalNameTag" const GenerationResultUtils = dynamic( () => diff --git a/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/index.tsx index 866d83eb3a..1e300d7589 100644 --- a/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/index.tsx +++ b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/index.tsx @@ -7,15 +7,15 @@ import {atomFamily} from "jotai/utils" import {useRunId} from "@/oss/contexts/RunIdContext" import {evaluationRunStateFamily} from "@/oss/lib/hooks/useEvaluationRunData/assets/atoms" +import {projectIdAtom} from "@/oss/state/project/selectors/project" import { clearProjectVariantReferencesAtom, prefetchProjectVariantConfigs, setProjectVariantReferencesAtom, } from "@/oss/state/projectVariantConfig" -import {projectIdAtom} from "@/oss/state/project/selectors/project" -import {urlStateAtom} from "../../../state/urlState" import {collectProjectVariantReferences} from "../../../../../lib/hooks/usePreviewEvaluations/projectVariantConfigs" +import {urlStateAtom} from "../../../state/urlState" import PromptConfigCard from "./assets/PromptConfigCard" diff --git a/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/index.tsx index 74490dd563..c0d4b6a16d 100644 --- a/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/index.tsx +++ b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/index.tsx @@ -61,7 +61,7 @@ const DURATION_METRIC_KEY = INVOCATION_METRIC_KEYS[1] const TOKEN_METRIC_KEY = INVOCATION_METRIC_KEYS[2] const ERRORS_METRIC_KEY = INVOCATION_METRIC_KEYS[3] -const INVOCATION_METRIC_COLUMNS: Array<{key: string; label: string}> = [ +const INVOCATION_METRIC_COLUMNS: {key: string; label: string}[] = [ {key: COST_METRIC_KEY, label: "Cost (Total)"}, {key: DURATION_METRIC_KEY, label: "Duration (Total)"}, {key: TOKEN_METRIC_KEY, label: "Total tokens"}, diff --git a/web/ee/src/components/EvalRunDetails/UrlSync.tsx b/web/ee/src/components/EvalRunDetails/UrlSync.tsx index 7abf792a2f..30b59e68ab 100644 --- a/web/ee/src/components/EvalRunDetails/UrlSync.tsx +++ b/web/ee/src/components/EvalRunDetails/UrlSync.tsx @@ -3,10 +3,11 @@ import {useEffect} from "react" import {useAtom, useAtomValue, useSetAtom} from "jotai" import {useRouter} from "next/router" +import {useRunId} from "@/oss/contexts/RunIdContext" + import {evalAtomStore} from "../../lib/hooks/useEvaluationRunData/assets/atoms/store" import {EvalRunUrlState, runViewTypeAtom, urlStateAtom} from "./state/urlState" -import {useRunId} from "@/oss/contexts/RunIdContext" const UrlSync = ({evalType}: {evalType: "auto" | "human"}) => { const router = useRouter() diff --git a/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/ComparisonScenarioTable.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/ComparisonScenarioTable.tsx index 1d777a6713..d60ac8835e 100644 --- a/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/ComparisonScenarioTable.tsx +++ b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/ComparisonScenarioTable.tsx @@ -1,9 +1,9 @@ import {RefObject, useEffect, useMemo} from "react" -import dynamic from "next/dynamic" import {DownOutlined} from "@ant-design/icons" import clsx from "clsx" import {atom, useAtom, useAtomValue} from "jotai" +import dynamic from "next/dynamic" import {useResizeObserver} from "usehooks-ts" import {useRunId} from "@/oss/contexts/RunIdContext" diff --git a/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/dataSourceBuilder.ts b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/dataSourceBuilder.ts index ecb9ea6949..a88821b936 100644 --- a/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/dataSourceBuilder.ts +++ b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/dataSourceBuilder.ts @@ -1,7 +1,7 @@ import groupBy from "lodash/groupBy" -import {formatColumnTitle} from "@/oss/components/Filters/EditColumns/assets/helper" import {evalTypeAtom} from "@/oss/components/EvalRunDetails/state/evalType" +import {formatColumnTitle} from "@/oss/components/Filters/EditColumns/assets/helper" import { evalAtomStore, evaluationEvaluatorsFamily, diff --git a/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useExpandableComparisonDataSource.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useExpandableComparisonDataSource.tsx index 2615b89538..0908385f40 100644 --- a/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useExpandableComparisonDataSource.tsx +++ b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useExpandableComparisonDataSource.tsx @@ -4,12 +4,12 @@ import deepEqual from "fast-deep-equal" import {atom, useAtomValue} from "jotai" import {atomFamily} from "jotai/utils" +import {filterColumns} from "@/oss/components/Filters/EditColumns/assets/helper" import { evalAtomStore, evaluationRunStateFamily, runIndexFamily, } from "@/oss/lib/hooks/useEvaluationRunData/assets/atoms" -import {filterColumns} from "@/oss/components/Filters/EditColumns/assets/helper" import type {RunIndex} from "@/oss/lib/hooks/useEvaluationRunData/assets/helpers/buildRunIndex" import { @@ -17,9 +17,10 @@ import { scenarioStepsFamily, } from "../../../../../lib/hooks/useEvaluationRunData/assets/atoms/runScopedScenarios" import {buildScenarioTableData} from "../assets/dataSourceBuilder" +import type {TableColumn} from "../assets/types" import {buildAntdColumns} from "../assets/utils" import {expendedRowAtom} from "../ComparisonScenarioTable" -import type {TableColumn} from "../assets/types" + import {editColumnsFamily} from "./useTableDataSource" export interface GroupedScenario { diff --git a/web/ee/src/components/EvalRunDetails/index.tsx b/web/ee/src/components/EvalRunDetails/index.tsx index d4957b4747..accc7a632e 100644 --- a/web/ee/src/components/EvalRunDetails/index.tsx +++ b/web/ee/src/components/EvalRunDetails/index.tsx @@ -7,8 +7,8 @@ import {createStore, getDefaultStore, Provider, useAtomValue, useSetAtom} from " import {selectAtom} from "jotai/utils" import {useRouter} from "next/router" -import EvalRunDetails from "@/oss/components/EvalRunDetails/HumanEvalRun" import ErrorState from "@/oss/components/ErrorState" +import EvalRunDetails from "@/oss/components/EvalRunDetails/HumanEvalRun" import SingleModelEvaluationTable from "@/oss/components/EvaluationTable/SingleModelEvaluationTable" import {RunIdProvider} from "@/oss/contexts/RunIdContext" import {useAppId} from "@/oss/hooks/useAppId" diff --git a/web/ee/src/components/Evaluators/assets/cells/EvaluatorTagsCell.tsx b/web/ee/src/components/Evaluators/assets/cells/EvaluatorTagsCell.tsx new file mode 100644 index 0000000000..b2f0dddcae --- /dev/null +++ b/web/ee/src/components/Evaluators/assets/cells/EvaluatorTagsCell.tsx @@ -0,0 +1,19 @@ +import {memo} from "react" + +import {Tag} from "antd" + +const EvaluatorTagsCell = memo(({tags}: {tags: string[]}) => { + if (!tags?.length) return null + + return ( +
+ {tags.map((tag) => ( + + {tag} + + ))} +
+ ) +}) + +export default EvaluatorTagsCell diff --git a/web/ee/src/components/Evaluators/assets/cells/EvaluatorTypePill.tsx b/web/ee/src/components/Evaluators/assets/cells/EvaluatorTypePill.tsx new file mode 100644 index 0000000000..727ca05926 --- /dev/null +++ b/web/ee/src/components/Evaluators/assets/cells/EvaluatorTypePill.tsx @@ -0,0 +1,53 @@ +import {memo, useMemo, type CSSProperties} from "react" + +import {Tag} from "antd" + +import {EvaluatorTypeBadge} from "../types" + +const hexToRgb = (hex?: string) => { + if (!hex) return null + let sanitized = hex.replace("#", "") + if (sanitized.length === 3) { + sanitized = sanitized + .split("") + .map((char) => char + char) + .join("") + } + if (sanitized.length !== 6) return null + + const intVal = Number.parseInt(sanitized, 16) + if (Number.isNaN(intVal)) return null + + return { + r: (intVal >> 16) & 255, + g: (intVal >> 8) & 255, + b: intVal & 255, + } +} + +const EvaluatorTypePill = memo(({badge}: {badge: EvaluatorTypeBadge}) => { + const baseHex = badge.colorHex + const computedStyle = useMemo(() => { + const rgb = hexToRgb(baseHex) + if (!rgb) return undefined + + return { + backgroundColor: `rgba(${rgb.r}, ${rgb.g}, ${rgb.b}, 0.12)`, + borderColor: `rgba(${rgb.r}, ${rgb.g}, ${rgb.b}, 0.32)`, + color: `rgba(${rgb.r}, ${rgb.g}, ${rgb.b}, 0.88)`, + } satisfies CSSProperties + }, [baseHex]) + + return ( + + {badge.label} + + ) +}) + +export default EvaluatorTypePill diff --git a/web/ee/src/components/Evaluators/assets/cells/TableDropdownMenu/index.tsx b/web/ee/src/components/Evaluators/assets/cells/TableDropdownMenu/index.tsx new file mode 100644 index 0000000000..767e44bd5e --- /dev/null +++ b/web/ee/src/components/Evaluators/assets/cells/TableDropdownMenu/index.tsx @@ -0,0 +1,58 @@ +import {memo, useMemo} from "react" + +import {MoreOutlined} from "@ant-design/icons" +import {GearSix, PencilSimpleLine, Trash} from "@phosphor-icons/react" +import {Button, Dropdown, MenuProps} from "antd" + +import {TableDropdownMenuProps} from "./types" + +const TableDropdownMenu = ({ + record, + category, + onEdit, + onConfigure, + onDelete, +}: TableDropdownMenuProps) => { + const items = useMemo(() => { + const menuItems: MenuProps["items"] = [] + + menuItems.push({ + key: "edit", + label: "Edit evaluator", + icon: , + onClick: (event) => { + event.domEvent.stopPropagation() + category === "human" ? onEdit?.(record) : onConfigure?.(record) + }, + }) + + if (menuItems.length) { + menuItems.push({type: "divider"}) + } + + menuItems.push({ + key: "delete", + label: "Delete", + icon: , + danger: true, + onClick: (event) => { + event.domEvent.stopPropagation() + onDelete(record) + }, + }) + + return menuItems + }, [category, record]) + + return ( + + + } + /> + ) + } + + if (existingConfig && (!editEvalEditValues || !editMode)) { + return + } + + const setCurrent: React.Dispatch> = () => { + navigateBack() + } + + const handleOnCancel = () => { + navigateBack() + } + + return ( + + ) +} + +export default ConfigureEvaluatorPage diff --git a/web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/assets/DeleteEvaluatorsModalContent/index.tsx b/web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/assets/DeleteEvaluatorsModalContent/index.tsx new file mode 100644 index 0000000000..7243d815d8 --- /dev/null +++ b/web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/assets/DeleteEvaluatorsModalContent/index.tsx @@ -0,0 +1,37 @@ +import {memo, useMemo} from "react" + +import {Typography} from "antd" + +interface DeleteEvaluatorsModalContentProps { + selectedCount: number + selectedNames: string[] +} + +const DeleteEvaluatorsModalContent = ({ + selectedCount, + selectedNames, +}: DeleteEvaluatorsModalContentProps) => { + const previewNames = useMemo(() => selectedNames.slice(0, 3), [selectedNames]) + const remaining = Math.max(selectedCount - previewNames.length, 0) + + return ( +
+ + {selectedCount === 1 + ? "Are you sure you want to delete this evaluator?" + : "Are you sure you want to delete the selected evaluators?"} + + + {previewNames.length > 0 && ( +
    + {previewNames.map((name) => ( +
  • {name}
  • + ))} + {remaining > 0 &&
  • and {remaining} more…
  • } +
+ )} +
+ ) +} + +export default memo(DeleteEvaluatorsModalContent) diff --git a/web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/index.tsx b/web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/index.tsx new file mode 100644 index 0000000000..97c33bf066 --- /dev/null +++ b/web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/index.tsx @@ -0,0 +1,49 @@ +import {memo} from "react" + +import dynamic from "next/dynamic" + +import EnhancedModal from "@/oss/components/EnhancedUIs/Modal" + +import {DeleteEvaluatorsModalProps} from "./types" + +const DeleteEvaluatorsModalContent = dynamic( + () => import("./assets/DeleteEvaluatorsModalContent"), + {ssr: false}, +) + +const DeleteEvaluatorsModal = ({ + selectedCount, + selectedNames, + confirmLoading = false, + onConfirm, + open, + onCancel, + okButtonProps, + ...modalProps +}: DeleteEvaluatorsModalProps) => { + return ( + + + + ) +} + +export default memo(DeleteEvaluatorsModal) diff --git a/web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/types.ts b/web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/types.ts new file mode 100644 index 0000000000..3a2c1205f3 --- /dev/null +++ b/web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/types.ts @@ -0,0 +1,8 @@ +import {EnhancedModalProps} from "@/oss/components/EnhancedUIs/Modal/types" + +export interface DeleteEvaluatorsModalProps extends Omit { + selectedCount: number + selectedNames: string[] + confirmLoading?: boolean + onConfirm: () => void +} diff --git a/web/ee/src/components/Evaluators/components/SelectEvaluatorModal/assets/SelectEvaluatorModalContent/index.tsx b/web/ee/src/components/Evaluators/components/SelectEvaluatorModal/assets/SelectEvaluatorModalContent/index.tsx new file mode 100644 index 0000000000..9c819a6e21 --- /dev/null +++ b/web/ee/src/components/Evaluators/components/SelectEvaluatorModal/assets/SelectEvaluatorModalContent/index.tsx @@ -0,0 +1,186 @@ +import {memo, useCallback, useMemo, useState} from "react" + +import {ArrowRight} from "@phosphor-icons/react" +import type {TabsProps} from "antd" +import {Empty, Skeleton, Tabs, Tag, Typography, message} from "antd" +import clsx from "clsx" +import {useRouter} from "next/router" + +import type {EvaluatorPreview} from "@/oss/components/Evaluators/assets/types" +import useURL from "@/oss/hooks/useURL" +import {getEvaluatorTags} from "@/oss/lib/helpers/evaluate" +import {capitalize} from "@/oss/lib/helpers/utils" +import useFetchEvaluatorsData from "@/oss/lib/hooks/useFetchEvaluatorsData" +import type {Evaluator} from "@/oss/lib/Types" + +const DEFAULT_TAB_KEY = "all" + +const TAG_CLASSNAME_MAP: Record = { + rag: "bg-sky-100 text-sky-700", + classifiers: "bg-orange-100 text-orange-700", + similarity: "bg-blue-100 text-blue-700", + ai_llm: "bg-violet-100 text-violet-700", + functional: "bg-amber-100 text-amber-700", +} + +const getEvaluatorTagValues = (item: EvaluatorPreview | Evaluator) => { + const registry = new Set() + // Prefer explicit evaluator tags when available and fall back to metadata tags + const primaryTags = Array.isArray((item as Evaluator).tags) ? (item as Evaluator).tags : [] + + primaryTags.filter(Boolean).forEach((tag) => { + registry.add(String(tag).toLowerCase()) + }) + + const rawTags = [ + ...(Array.isArray((item.flags as any)?.tags) ? (item.flags as any).tags : []), + ...(Array.isArray((item.meta as any)?.tags) ? (item.meta as any).tags : []), + ].filter(Boolean) + + rawTags.forEach((tag) => registry.add(String(tag).toLowerCase())) + + return Array.from(registry) +} + +const SelectEvaluatorModalContent = () => { + const {projectURL} = useURL() + const router = useRouter() + const {evaluatorsSwr, isLoadingEvaluators} = useFetchEvaluatorsData() + const [activeTab, setActiveTab] = useState(DEFAULT_TAB_KEY) + const evaluators = evaluatorsSwr.data || [] + const baseTags = useMemo(() => getEvaluatorTags(), []) + + const availableTags = useMemo(() => { + const normalized = new Map() + baseTags.forEach((tag) => { + normalized.set(tag.value, tag.label) + }) + + evaluators.forEach((item) => { + getEvaluatorTagValues(item).forEach((tag) => { + if (!normalized.has(tag)) { + normalized.set(tag, capitalize(tag.replace(/[_-]+/g, " "))) + } + }) + }) + + return normalized + }, [baseTags, evaluators]) + + const tabItems = useMemo(() => { + const items: TabsProps["items"] = [{key: DEFAULT_TAB_KEY, label: "All templates"}] + + availableTags.forEach((label, value) => { + items!.push({key: value, label}) + }) + + return items + }, [availableTags]) + + const filteredEvaluators = useMemo(() => { + if (activeTab === DEFAULT_TAB_KEY) { + return evaluators + } + + return evaluators.filter((item) => { + const tags = getEvaluatorTagValues(item) + return tags.includes(activeTab) + }) + }, [activeTab, evaluators]) + + const handleTabChange = useCallback((key: string) => { + setActiveTab(key) + }, []) + + const handleTemplateSelect = useCallback( + async (template: EvaluatorPreview | Evaluator) => { + const evaluatorId = (template as any)?.key + if (!evaluatorId) { + message.error("Unable to open evaluator template") + return + } + + await router.push(`${projectURL}/evaluators/configure/${evaluatorId}`) + }, + [router, projectURL], + ) + + const renderContent = () => { + if (isLoadingEvaluators) { + return ( +
+ {Array.from({length: 5}).map((_, index) => ( + + ))} +
+ ) + } + + if (!filteredEvaluators.length) { + return ( +
+ +
+ ) + } + + return ( +
+ {filteredEvaluators.map((item) => { + const primaryTag = getEvaluatorTagValues(item)[0] + const tagClassnames = primaryTag + ? TAG_CLASSNAME_MAP[primaryTag] || "bg-slate-100 text-slate-700" + : "bg-slate-100 text-slate-700" + + return ( +
handleTemplateSelect(item)} + className={clsx( + "border-0 border-b border-solid border-gray-200 min-h-[72px] flex flex-col justify-center gap-3 py-3 px-4 cursor-pointer group", + )} + > +
+ + {item.name} + + +
+ {item.description} +
+ ) + })} +
+ ) + } + + return ( +
+
+ + Select evaluator type + + + Choose base template for your evaluator + +
+ + +
{renderContent()}
+
+ ) +} + +export default memo(SelectEvaluatorModalContent) diff --git a/web/ee/src/components/Evaluators/components/SelectEvaluatorModal/index.tsx b/web/ee/src/components/Evaluators/components/SelectEvaluatorModal/index.tsx new file mode 100644 index 0000000000..858de0e46e --- /dev/null +++ b/web/ee/src/components/Evaluators/components/SelectEvaluatorModal/index.tsx @@ -0,0 +1,28 @@ +import {memo} from "react" + +import dynamic from "next/dynamic" + +import EnhancedModal from "@/oss/components/EnhancedUIs/Modal" + +import {SelectEvaluatorModalProps} from "./types" +const SelectEvaluatorModalContent = dynamic(() => import("./assets/SelectEvaluatorModalContent"), { + ssr: false, +}) + +const SelectEvaluatorModal = ({open, onCancel, ...modalProps}: SelectEvaluatorModalProps) => { + return ( + + + + ) +} + +export default memo(SelectEvaluatorModal) diff --git a/web/ee/src/components/Evaluators/components/SelectEvaluatorModal/types.ts b/web/ee/src/components/Evaluators/components/SelectEvaluatorModal/types.ts new file mode 100644 index 0000000000..29b0bbbc32 --- /dev/null +++ b/web/ee/src/components/Evaluators/components/SelectEvaluatorModal/types.ts @@ -0,0 +1,3 @@ +import {EnhancedModalProps} from "@/oss/components/EnhancedUIs/Modal/types" + +export interface SelectEvaluatorModalProps extends EnhancedModalProps {} diff --git a/web/ee/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts b/web/ee/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts new file mode 100644 index 0000000000..3aa171dc76 --- /dev/null +++ b/web/ee/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts @@ -0,0 +1,66 @@ +import {useCallback, useMemo} from "react" + +import useEvaluators from "@/oss/lib/hooks/useEvaluators" +import useFetchEvaluatorsData from "@/oss/lib/hooks/useFetchEvaluatorsData" +import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types" + +import {EvaluatorCategory, EvaluatorPreview, EvaluatorRegistryRow} from "../assets/types" +import { + sortEvaluatorRowsByCreatedAtDesc, + transformEvaluatorConfigsToRows, + transformEvaluatorsToRows, +} from "../assets/utils" + +const HUMAN_QUERY = Object.freeze({is_human: true}) + +const useEvaluatorsRegistryData = (category: EvaluatorCategory) => { + const { + evaluatorsSwr: baseEvaluatorsSwr, + evaluatorConfigsSwr, + isLoadingEvaluators, + refetchAll: refetchEvaluatorResources, + } = useFetchEvaluatorsData() + + const humanEvaluatorsSwr = useEvaluators({ + preview: true, + queries: HUMAN_QUERY, + }) + + const rows = useMemo(() => { + let unsortedRows: EvaluatorRegistryRow[] + + if (category === "human") { + const humanEvaluators = (humanEvaluatorsSwr.data || []) as EvaluatorPreview[] + unsortedRows = transformEvaluatorsToRows(humanEvaluators, "human") + } else { + const evaluatorConfigs = (evaluatorConfigsSwr.data || []) as EvaluatorConfig[] + const baseEvaluators = (baseEvaluatorsSwr.data || []) as Evaluator[] + + unsortedRows = transformEvaluatorConfigsToRows( + evaluatorConfigs, + category, + baseEvaluators, + ) + } + + return sortEvaluatorRowsByCreatedAtDesc(unsortedRows) + }, [category, baseEvaluatorsSwr.data, evaluatorConfigsSwr.data, humanEvaluatorsSwr.data]) + + const isLoading = useMemo( + () => + category === "human" + ? humanEvaluatorsSwr.isLoading + : evaluatorConfigsSwr.isLoading || isLoadingEvaluators, + [isLoadingEvaluators, evaluatorConfigsSwr.isLoading, humanEvaluatorsSwr.isLoading], + ) + + const refetchAll = useCallback(async () => { + await Promise.all( + [refetchEvaluatorResources(), humanEvaluatorsSwr.mutate?.()].filter(Boolean), + ) + }, [refetchEvaluatorResources, humanEvaluatorsSwr.mutate]) + + return {rows, isLoading, refetchAll} +} + +export default useEvaluatorsRegistryData diff --git a/web/ee/src/components/Evaluators/index.tsx b/web/ee/src/components/Evaluators/index.tsx new file mode 100644 index 0000000000..3c39bba016 --- /dev/null +++ b/web/ee/src/components/Evaluators/index.tsx @@ -0,0 +1,374 @@ +import {memo, useCallback, useEffect, useMemo, useState, type ChangeEvent, type Key} from "react" + +import {DeleteOutlined, PlusOutlined} from "@ant-design/icons" +import {Button, Input, Tabs, Typography} from "antd" +import {message} from "@/oss/components/AppMessageContext" +import dynamic from "next/dynamic" +import {useRouter} from "next/router" +import {useLocalStorage} from "usehooks-ts" + +import EnhancedTable from "@/oss/components/EnhancedUIs/Table" +import {AnnotateDrawerSteps} from "@/oss/components/pages/observability/drawer/AnnotateDrawer/assets/enum" +import useURL from "@/oss/hooks/useURL" +import {checkIfResourceValidForDeletion} from "@/oss/lib/helpers/evaluate" +import {useBreadcrumbsEffect} from "@/oss/lib/hooks/useBreadcrumbs" +import {deleteEvaluatorConfig} from "@/oss/services/evaluations/api" +import {deleteHumanEvaluator} from "@/oss/services/evaluators" +import {useProjectData} from "@/oss/state/project/hooks" + +import { + DEFAULT_EVALUATOR_TAB, + EVALUATOR_TABLE_STORAGE_PREFIX, + EVALUATOR_TABS, +} from "./assets/constants" +import getColumns from "./assets/getColumns" +import {EvaluatorCategory, EvaluatorRegistryRow} from "./assets/types" +import DeleteEvaluatorsModal from "./components/DeleteEvaluatorsModal" +import SelectEvaluatorModal from "./components/SelectEvaluatorModal" +import useEvaluatorsRegistryData from "./hooks/useEvaluatorsRegistryData" +import {useQueryParam} from "@/oss/hooks/useQuery" + +const AnnotateDrawer = dynamic( + () => import("@/oss/components/pages/observability/drawer/AnnotateDrawer"), + {ssr: false}, +) + +const isValidEvaluatorTab = (value: string): value is EvaluatorCategory => { + return EVALUATOR_TABS.some(({key}) => key === value) +} + +const EvaluatorsRegistry = ({scope = "project"}: {scope?: "project" | "app"}) => { + const {projectId} = useProjectData() + const router = useRouter() + const {projectURL} = useURL() + const storageKey = useMemo( + () => `${EVALUATOR_TABLE_STORAGE_PREFIX}-${scope}-${projectId || "global"}-tab`, + [projectId, scope], + ) + const [activeTab, setActiveTab] = useLocalStorage( + storageKey, + DEFAULT_EVALUATOR_TAB, + ) + const [tabState, setTabState] = useQueryParam("tab", activeTab) + + useEffect(() => { + if (isValidEvaluatorTab(tabState)) { + if (tabState !== activeTab) { + setActiveTab(tabState) + } + return + } + + const fallbackTab = isValidEvaluatorTab(activeTab) ? activeTab : DEFAULT_EVALUATOR_TAB + + if (activeTab !== fallbackTab) { + setActiveTab(fallbackTab) + } + + if (tabState !== fallbackTab) { + setTabState(fallbackTab) + } + }, [tabState, activeTab]) + + // states + const [searchTerm, setSearchTerm] = useState("") + const [selectedRowKeys, setSelectedRowKeys] = useState([]) + const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false) + const [isDeleting, setIsDeleting] = useState(false) + const [isSelectEvaluatorModalOpen, setIsSelectEvaluatorModalOpen] = useState(false) + const [isAnnotateDrawerOpen, setIsAnnotateDrawerOpen] = useState(false) + const [drawerMode, setDrawerMode] = useState<"create" | "edit">("create") + const [evaluatorToEdit, setEvaluatorToEdit] = useState(null) + + const {rows, isLoading, refetchAll} = useEvaluatorsRegistryData(activeTab) + + const rowsById = useMemo(() => { + return new Map(rows.map((row) => [row.id, row])) + }, [rows]) + + useEffect(() => { + setSelectedRowKeys((prev) => { + const validKeys = prev.filter((key) => rowsById.has(String(key))) + return validKeys.length === prev.length ? prev : validKeys + }) + }, [rowsById]) + + const filteredRows = useMemo(() => { + if (!searchTerm) return rows + + return rows.filter((row) => { + const haystack = [ + row.name, + row.slug, + row.typeBadge.label, + row.versionLabel, + row.modifiedBy, + ...row.tags, + ] + .filter(Boolean) + .join(" ") + .toLowerCase() + + return haystack.includes(searchTerm.trim().toLowerCase()) + }) + }, [rows, searchTerm]) + + const selectedRows = useMemo( + () => selectedRowKeys.map((key) => rowsById.get(String(key))).filter(Boolean), + [selectedRowKeys, rowsById], + ) as EvaluatorRegistryRow[] + + const selectedNames = useMemo(() => selectedRows.map((row) => row.name), [selectedRows]) + + const onTabChange = useCallback( + (value: EvaluatorCategory) => { + setActiveTab(value) + setSelectedRowKeys([]) + setTabState(value) + }, + [setActiveTab, setTabState], + ) + + const onSearch = useCallback((event: ChangeEvent) => { + setSearchTerm(event.target.value) + }, []) + + const handleOpenHumanDrawer = useCallback(() => { + setDrawerMode("create") + setEvaluatorToEdit(null) + setIsAnnotateDrawerOpen(true) + }, []) + + const handleOpenCreateModal = useCallback(() => { + if (activeTab === "human") { + handleOpenHumanDrawer() + return + } + setIsSelectEvaluatorModalOpen(true) + }, [activeTab, handleOpenHumanDrawer]) + + const handleCloseSelectModal = useCallback(() => { + setIsSelectEvaluatorModalOpen(false) + }, []) + + const closeAnnotateDrawer = useCallback(() => { + setIsAnnotateDrawerOpen(false) + setEvaluatorToEdit(null) + setDrawerMode("create") + }, []) + + const onSavedEvaluator = useCallback( + async (_slug?: string) => { + try { + await refetchAll() + } catch (error) { + console.error(error) + } finally { + closeAnnotateDrawer() + } + }, + [refetchAll, closeAnnotateDrawer], + ) + + const openDrawerForRecord = useCallback( + (record: EvaluatorRegistryRow) => { + if (activeTab !== "human") return + setDrawerMode("edit") + setEvaluatorToEdit(record.raw) + setIsAnnotateDrawerOpen(true) + }, + [activeTab], + ) + + const handleNavigateToConfigure = useCallback( + async (record: EvaluatorRegistryRow) => { + const raw = record.raw as EvaluatorRegistryRow["raw"] & {kind?: string} + const isConfig = raw && raw.kind === "config" + const targetId = isConfig ? record.id : (record.slug as string) + + await router.push(`${projectURL}/evaluators/configure/${encodeURIComponent(targetId)}`) + }, + [projectURL, router], + ) + + const createEvaluatorDrawerProps = useMemo( + () => ({ + mode: drawerMode, + evaluator: drawerMode === "edit" ? evaluatorToEdit || undefined : undefined, + onSuccess: onSavedEvaluator, + skipPostCreateStepChange: drawerMode === "create", + }), + [drawerMode, evaluatorToEdit, onSavedEvaluator], + ) + + const handleConfirmDelete = useCallback(async () => { + if (!selectedRows.length) return + const ids = selectedRows.map((row) => row.id).filter(Boolean) as string[] + if (!ids.length) return + + try { + setIsDeleting(true) + + if (activeTab === "human") { + await Promise.all(ids.map((id) => deleteHumanEvaluator(id))) + } else { + const canDelete = await checkIfResourceValidForDeletion({ + resourceType: "evaluator_config", + resourceIds: ids, + }) + if (!canDelete) return + + await Promise.all(ids.map((id) => deleteEvaluatorConfig(id))) + } + + message.success( + ids.length === 1 ? "Evaluator deleted" : `${ids.length} evaluators deleted`, + ) + + setSelectedRowKeys([]) + await refetchAll() + } catch (error) { + console.error(error) + message.error("Failed to delete evaluators") + } finally { + setIsDeleting(false) + setIsDeleteModalOpen(false) + } + }, [selectedRows, refetchAll, activeTab]) + + const handleRowDelete = useCallback( + (record: EvaluatorRegistryRow) => { + if (!record?.id) return + setSelectedRowKeys([record.id]) + setIsDeleteModalOpen(true) + }, + [setIsDeleteModalOpen, setSelectedRowKeys], + ) + + const tableColumns = useMemo( + () => + getColumns({ + category: activeTab, + onEdit: openDrawerForRecord, + onConfigure: handleNavigateToConfigure, + onDelete: handleRowDelete, + }), + [activeTab], + ) + + const activeTabLabel = useMemo(() => { + return EVALUATOR_TABS.find((tab) => tab.key === activeTab)?.label || "Evaluators" + }, [activeTab]) + + useEffect(() => { + setSelectedRowKeys([]) + }, [activeTab]) + + const breadcrumbKey = scope === "project" ? "projectPage" : "appPage" + + useBreadcrumbsEffect( + { + breadcrumbs: {[breadcrumbKey]: {label: activeTabLabel}}, + type: "append", + condition: true, + }, + [breadcrumbKey, activeTabLabel], + ) + + const isDeleteDisabled = selectedRowKeys.length === 0 + + return ( +
+ Evaluators + +
+ onTabChange(key as EvaluatorCategory)} + /> +
+
+ + + +
+ +
+ record.id} + rowSelection={{ + type: "checkbox", + columnWidth: 48, + fixed: "left", + selectedRowKeys, + onChange: (keys) => setSelectedRowKeys(keys as Key[]), + }} + tableLayout="fixed" + virtualized + className="flex-1" + onRow={(record) => ({ + className: "cursor-pointer", + onClick: () => { + activeTab === "human" + ? openDrawerForRecord(record) + : handleNavigateToConfigure(record) + }, + })} + /> +
+ + { + setIsDeleteModalOpen(false) + setSelectedRowKeys([]) + }} + onConfirm={handleConfirmDelete} + confirmLoading={isDeleting} + selectedCount={selectedRowKeys.length} + selectedNames={selectedNames} + /> + + + + +
+ ) +} + +export default memo(EvaluatorsRegistry) diff --git a/web/ee/src/components/HumanEvaluations/SingleModelEvaluation.tsx b/web/ee/src/components/HumanEvaluations/SingleModelEvaluation.tsx index cdc664f9b2..a2eaad5e6b 100644 --- a/web/ee/src/components/HumanEvaluations/SingleModelEvaluation.tsx +++ b/web/ee/src/components/HumanEvaluations/SingleModelEvaluation.tsx @@ -13,16 +13,17 @@ import useEvaluations from "@/oss/lib/hooks/useEvaluations" import useRunMetricsMap from "@/oss/lib/hooks/useRunMetricsMap" import {useAppsData} from "@/oss/state/app" -import SingleModelEvaluationHeader from "./assets/SingleModelEvaluationHeader" -import {useStyles} from "./assets/styles" -import {getColumns} from "./assets/utils" -import {EvaluationRow} from "./types" import { buildAppScopedUrl, buildEvaluationNavigationUrl, extractEvaluationAppId, } from "../pages/evaluations/utils" +import SingleModelEvaluationHeader from "./assets/SingleModelEvaluationHeader" +import {useStyles} from "./assets/styles" +import {getColumns} from "./assets/utils" +import {EvaluationRow} from "./types" + interface SingleModelEvaluationProps { viewType: "evaluation" | "overview" scope?: "app" | "project" @@ -64,7 +65,7 @@ const SingleModelEvaluation = ({viewType, scope = "app"}: SingleModelEvaluationP const knownAppIds = useMemo(() => { return new Set( - (availableApps as Array<{app_id?: string}>) + (availableApps as {app_id?: string}[]) .map((app) => app?.app_id) .filter(Boolean) as string[], ) diff --git a/web/ee/src/components/HumanEvaluations/assets/TableDropdownMenu/index.tsx b/web/ee/src/components/HumanEvaluations/assets/TableDropdownMenu/index.tsx index c7210088fe..b9c48831a3 100644 --- a/web/ee/src/components/HumanEvaluations/assets/TableDropdownMenu/index.tsx +++ b/web/ee/src/components/HumanEvaluations/assets/TableDropdownMenu/index.tsx @@ -6,6 +6,7 @@ import {Dropdown, Button, MenuProps} from "antd" import {useRouter} from "next/router" import {EvaluationStatus} from "@/oss/lib/Types" + import { buildAppScopedUrl, buildEvaluationNavigationUrl, diff --git a/web/ee/src/components/pages/evaluations/EvaluationsView.tsx b/web/ee/src/components/pages/evaluations/EvaluationsView.tsx index 69694f0205..94462f85b6 100644 --- a/web/ee/src/components/pages/evaluations/EvaluationsView.tsx +++ b/web/ee/src/components/pages/evaluations/EvaluationsView.tsx @@ -46,7 +46,7 @@ interface EvaluationsViewProps { scope?: EvaluationScope } -const allowedOptionsByScope: Record> = { +const allowedOptionsByScope: Record = { app: [ {value: "auto_evaluation", label: "Automatic"}, {value: "human_annotation", label: "Human annotation"}, @@ -138,7 +138,7 @@ const EvaluationsView = ({scope = "app"}: EvaluationsViewProps) => { return (
- Evaluations + Evaluations import("./AdvancedSettings"), { ssr: false, }) +const NoResultsFound = dynamic(() => import("@/oss/components/NoResultsFound/NoResultsFound"), { + ssr: false, +}) + const NewEvaluationModalContent: FC = ({ onSuccess, handlePanelChange, @@ -59,7 +64,13 @@ const NewEvaluationModalContent: FC = ({ }) => { const classes = useStyles() const {inputRef} = useFocusInput({isOpen: props.isOpen || false}) + const {redirectUrl} = useURL() const appSelectionComplete = Boolean(selectedAppId) + const hasAppOptions = appOptions.length > 0 + + const handleCreateApp = useCallback(() => { + redirectUrl() + }, [redirectUrl]) const selectedTestset = useMemo( () => testSets.find((ts) => ts._id === selectedTestsetId) || null, @@ -103,17 +114,29 @@ const NewEvaluationModalContent: FC = ({ ), children: (
- - {!appSelectionComplete && !appSelectionDisabled ? ( - - Please select an application to continue configuring the evaluation. - - ) : null} + {hasAppOptions ? ( + <> + + {!appSelectionComplete && !appSelectionDisabled ? ( + + Please select an application to continue configuring the + evaluation. + + ) : null} + + ) : ( + + )}
), }, @@ -260,6 +283,8 @@ const NewEvaluationModalContent: FC = ({ selectedAppId, onSelectApp, appSelectionDisabled, + hasAppOptions, + handleCreateApp, ]) return ( diff --git a/web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectAppSection.tsx b/web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectAppSection.tsx index 890a9fa6a3..2275dc4380 100644 --- a/web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectAppSection.tsx +++ b/web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectAppSection.tsx @@ -1,4 +1,4 @@ -import {HTMLProps, useMemo} from "react" +import {HTMLProps, ReactNode, useMemo} from "react" import {Table, Tag, Typography} from "antd" import type {ColumnsType} from "antd/es/table" @@ -18,6 +18,7 @@ interface SelectAppSectionProps extends HTMLProps { selectedAppId: string onSelectApp: (value: string) => void disabled?: boolean + emptyText?: ReactNode } const SelectAppSection = ({ @@ -26,6 +27,7 @@ const SelectAppSection = ({ onSelectApp, disabled, className, + emptyText, }: SelectAppSectionProps) => { const columns: ColumnsType = useMemo(() => { return [ @@ -106,9 +108,11 @@ const SelectAppSection = ({ getCheckboxProps: () => ({disabled}), }} locale={{ - emptyText: disabled - ? "Application selection is locked in app scope" - : "No applications available", + emptyText: + emptyText ?? + (disabled + ? "Application selection is locked in app scope" + : "No applications available"), }} />
diff --git a/web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx b/web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx index b87ad02813..54fe8f0314 100644 --- a/web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx +++ b/web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx @@ -6,24 +6,15 @@ import {ColumnsType} from "antd/es/table" import clsx from "clsx" import dynamic from "next/dynamic" -import EnhancedDrawer from "@/oss/components/EnhancedUIs/Drawer" -import AnnotateDrawerTitle from "@/oss/components/pages/observability/drawer/AnnotateDrawer/assets/AnnotateDrawerTitle" -import CreateEvaluator from "@/oss/components/pages/observability/drawer/AnnotateDrawer/assets/CreateEvaluator" -import {AnnotateDrawerSteps} from "@/oss/components/pages/observability/drawer/AnnotateDrawer/assets/enum" import {getMetricsFromEvaluator} from "@/oss/components/pages/observability/drawer/AnnotateDrawer/assets/transforms" import {EvaluatorDto} from "@/oss/lib/hooks/useEvaluators/types" import useFetchEvaluatorsData from "@/oss/lib/hooks/useFetchEvaluatorsData" import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types" import type {SelectEvaluatorSectionProps} from "../../types" +import router from "next/router" +import useURL from "@/oss/hooks/useURL" -const EvaluatorsModal = dynamic( - () => import("../../../autoEvaluation/EvaluatorsModal/EvaluatorsModal"), - { - ssr: false, - loading: () => null, // Prevent flash by not rendering until loaded - }, -) const NoResultsFound = dynamic(() => import("@/oss/components/NoResultsFound/NoResultsFound"), { ssr: false, }) @@ -55,11 +46,16 @@ const SelectEvaluatorSection = ({ selectedAppId, ...props }: SelectEvaluatorSectionProps & {preview?: Preview}) => { + const {projectURL} = useURL() const fetchData = useFetchEvaluatorsData({ preview: preview as boolean, queries: {is_human: preview}, appId: selectedAppId || "", }) + const evaluatorsRegistryUrl = useMemo( + () => `${projectURL}/evaluators?tab=${preview ? "human" : "automatic"}`, + [projectURL, preview], + ) const evaluationData = useMemo(() => { if (preview) { @@ -87,8 +83,7 @@ const SelectEvaluatorSection = ({ evaluationData const [searchTerm, setSearchTerm] = useState("") - const [isEvaluatorsModalOpen, setIsEvaluatorsModalOpen] = useState(false) - const [current, setCurrent] = useState(0) + const prevSelectedAppIdRef = useRef() const {refetchEvaluatorConfigs} = fetchData @@ -237,10 +232,7 @@ const SelectEvaluatorSection = ({ @@ -253,10 +245,7 @@ const SelectEvaluatorSection = ({ title="No evaluators yet" description="Evaluators help you measure and analyze your model's responses." primaryActionLabel="Create your first evaluator" - onPrimaryAction={() => { - setCurrent(1) - setIsEvaluatorsModalOpen(true) - }} + onPrimaryAction={() => router.push(evaluatorsRegistryUrl)} /> ) : preview ? ( > @@ -320,39 +309,6 @@ const SelectEvaluatorSection = ({ /> )}
- - {preview ? ( - setIsEvaluatorsModalOpen(false)} - onClose={() => setIsEvaluatorsModalOpen(false)} - /> - } - closeIcon={null} - width={400} - onClose={() => setIsEvaluatorsModalOpen(false)} - classNames={{body: "!p-0", header: "!p-4"}} - > - { - setSelectedEvalConfigs(updater) - setIsEvaluatorsModalOpen(false) - }} - /> - - ) : ( - setIsEvaluatorsModalOpen(false)} - current={current} - setCurrent={setCurrent} - appId={selectedAppId || null} - openedFromNewEvaluation={true} - /> - )} ) } diff --git a/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx index 7666042f88..e72dd938e3 100644 --- a/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx +++ b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx @@ -1,5 +1,5 @@ // @ts-nocheck -import {Dispatch, SetStateAction, useMemo, useRef, useState} from "react" +import {Dispatch, SetStateAction, useEffect, useMemo, useRef, useState} from "react" import { CheckCircleOutlined, @@ -39,6 +39,7 @@ import { safeJson5Parse, } from "@/oss/lib/helpers/utils" import {getAllVariantParameters} from "@/oss/lib/helpers/variantHelper" +import useAppVariantRevisions from "@/oss/lib/hooks/useAppVariantRevisions" import {getAllMetadata} from "@/oss/lib/hooks/useStatelessVariants/state" import {extractInputKeysFromSchema} from "@/oss/lib/shared/variant/inputHelpers" import {getRequestSchema} from "@/oss/lib/shared/variant/openapiUtils" @@ -65,6 +66,7 @@ import { createEvaluatorRunExecution, } from "@/oss/services/evaluations/api_ee" import {AgentaNodeDTO} from "@/oss/services/observability/types" +import {useAppsData} from "@/oss/state/app/hooks" import {customPropertiesByRevisionAtomFamily} from "@/oss/state/newPlayground/core/customProperties" import { stablePromptVariablesAtomFamily, @@ -75,6 +77,7 @@ import {appSchemaAtom, appUriInfoAtom} from "@/oss/state/variant/atoms/fetcher" import EvaluatorTestcaseModal from "./EvaluatorTestcaseModal" import EvaluatorVariantModal from "./EvaluatorVariantModal" +import {buildVariantFromRevision} from "./variantUtils" interface DebugSectionProps { selectedTestcase: { testcase: Record | null @@ -119,6 +122,7 @@ const useStyles = createUseStyles((theme: JSSTheme) => ({ border: `1px solid ${theme.colorBorder}`, borderRadius: theme.borderRadius, overflow: "hidden", + minHeight: "180px", }, variantTab: { flex: 1, @@ -161,6 +165,7 @@ const DebugSection = ({ const {appTheme} = useAppTheme() const uriObject = useAtomValue(appUriInfoAtom) const appSchema = useAtomValue(appSchemaAtom) + const {apps: availableApps = []} = useAppsData() const [baseResponseData, setBaseResponseData] = useState(null) const [outputResult, setOutputResult] = useState("") const [isLoadingResult, setIsLoadingResult] = useState(false) @@ -179,19 +184,52 @@ const DebugSection = ({ }) const {secrets} = useVaultSecret() + const defaultAppId = useMemo(() => { + if (_selectedVariant?.appId) return _selectedVariant.appId + if (appId) return appId + const firstApp = availableApps?.[0] + return firstApp?.app_id ?? "" + }, [_selectedVariant?.appId, appId, availableApps]) + + const {revisionMap: defaultRevisionMap} = useAppVariantRevisions(defaultAppId || null) + const selectedVariant = useMemo(() => { const revs = _selectedVariant?.revisions || [] // find the most recent revision by looking at the updatedAtTimestamp - const variant = revs.sort((a, b) => b.updatedAtTimestamp - a.updatedAtTimestamp)[0] + const variant = revs?.sort((a, b) => b.updatedAtTimestamp - a.updatedAtTimestamp)[0] return variant }, [_selectedVariant]) + const fallbackVariant = useMemo(() => { + if (_selectedVariant || !defaultAppId) return null + const revisionLists = Object.values(defaultRevisionMap || {}) + if (!revisionLists.length) return null + const revisions = revisionLists[0] + if (!revisions || revisions.length === 0) return null + const baseVariant = buildVariantFromRevision(revisions[0], defaultAppId) + baseVariant.revisions = [...revisions] + return baseVariant + }, [_selectedVariant, defaultAppId, defaultRevisionMap]) + + const derivedVariants = useMemo(() => { + if (variants && variants.length > 0) return variants + if (fallbackVariant) return [fallbackVariant] + return [] + }, [variants, fallbackVariant]) + + useEffect(() => { + if (_selectedVariant) return + if (derivedVariants.length > 0) { + setSelectedVariant(derivedVariants[0]) + return + } + }, [_selectedVariant, derivedVariants, setSelectedVariant]) // Variant flags (custom/chat) from global atoms for the selected revision const flags = useAtomValue( useMemo( () => (selectedVariant?.id - ? variantFlagsAtomFamily({revisionId: selectedVariant.id}) + ? variantFlagsAtomFamily({revisionId: selectedVariant?.id}) : (atom(null) as any)) as any, [selectedVariant?.id], ), @@ -202,7 +240,7 @@ const DebugSection = ({ useMemo( () => (selectedVariant?.id - ? (stablePromptVariablesAtomFamily(selectedVariant.id) as any) + ? (stablePromptVariablesAtomFamily(selectedVariant?.id) as any) : (atom([]) as any)) as any, [selectedVariant?.id], ), @@ -214,7 +252,7 @@ const DebugSection = ({ () => (selectedVariant?.id ? (transformedPromptsAtomFamily({ - revisionId: selectedVariant.id, + revisionId: selectedVariant?.id, useStableParams: true, }) as any) : (atom(null) as any)) as any, @@ -227,7 +265,7 @@ const DebugSection = ({ useMemo( () => (selectedVariant?.id - ? (customPropertiesByRevisionAtomFamily(selectedVariant.id) as any) + ? (customPropertiesByRevisionAtomFamily(selectedVariant?.id) as any) : (atom({}) as any)) as any, [selectedVariant?.id], ), @@ -237,6 +275,9 @@ const DebugSection = ({ return testsets?.find((item) => item._id === selectedTestset) }, [selectedTestset, testsets]) + const isPlainObject = (value: unknown): value is Record => + Boolean(value) && typeof value === "object" && !Array.isArray(value) + const fetchEvalMapper = async () => { if (!baseResponseData || !selectedTestcase.testcase) return @@ -339,6 +380,10 @@ const DebugSection = ({ } const handleRunVariant = async () => { + if (availableApps.length === 0 && derivedVariants.length === 0) { + message.info("Create an app first to run a variant.") + return + } if (!selectedTestcase.testcase || !selectedVariant) return const controller = new AbortController() abortControllersRef.current = controller @@ -355,7 +400,7 @@ const DebugSection = ({ messages: ChatMessage[] } - if (selectedVariant.parameters) { + if (selectedVariant?.parameters) { const routePath = uriObject?.routePath || "" const spec = appSchema as any const req = spec ? (getRequestSchema as any)(spec, {routePath}) : undefined @@ -396,24 +441,52 @@ const DebugSection = ({ params.inputs = (effectiveKeys || []).map((name) => ({name, input: false})) // Optional parameters/body extras: prefer stable transform snapshot - params.parameters = - stableTransformedParams || - transformToRequestBody({ - variant: selectedVariant, - allMetadata: getAllMetadata(), - prompts: - spec && selectedVariant - ? derivePromptsFromSpec( - selectedVariant as any, - spec as any, - routePath, - ) || [] - : [], - // Keep request shape aligned with OpenAPI schema - isChat: hasMessagesProp, - isCustom, - customProperties: isCustom ? customProps : undefined, - }) + const baseParameters = isPlainObject(stableTransformedParams) + ? {...stableTransformedParams} + : transformToRequestBody({ + variant: selectedVariant, + allMetadata: getAllMetadata(), + prompts: + spec && selectedVariant + ? derivePromptsFromSpec( + selectedVariant as any, + spec as any, + routePath, + ) || [] + : [], + // Keep request shape aligned with OpenAPI schema + isChat: hasMessagesProp, + isCustom, + customProperties: isCustom ? customProps : undefined, + }) + + const variantParameters = isPlainObject(selectedVariant?.parameters) + ? (selectedVariant?.parameters as Record) + : undefined + + if (isPlainObject(baseParameters)) { + const hasAgConfig = + isPlainObject(baseParameters.ag_config) && + Object.keys(baseParameters.ag_config).length > 0 + + if (!hasAgConfig && variantParameters) { + const variantAgConfig = isPlainObject(variantParameters.ag_config) + ? variantParameters.ag_config + : Object.keys(variantParameters).length > 0 + ? variantParameters + : undefined + + if (variantAgConfig) { + baseParameters.ag_config = variantAgConfig + } + } + + params.parameters = baseParameters + } else if (!baseParameters && variantParameters) { + params.parameters = {...variantParameters} + } else { + params.parameters = baseParameters + } params.isChatVariant = hasMessagesProp params.messages = hasMessagesProp ? extractChatMessages(selectedTestcase.testcase) @@ -428,7 +501,7 @@ const DebugSection = ({ params.messages = hasMessagesInput ? extractChatMessages(selectedTestcase.testcase) : [] - params.isCustom = selectedVariant.isCustom + params.isCustom = selectedVariant?.isCustom } // Filter testcase down to allowed keys only (exclude chat) @@ -445,14 +518,14 @@ const DebugSection = ({ params.inputs || [], params.parameters || [], appId, - selectedVariant.baseId, + selectedVariant?.baseId, params.messages, controller.signal, true, - selectedVariant.parameters && !!selectedVariant._parentVariant, + selectedVariant?.parameters && !!selectedVariant?._parentVariant, params.isCustom, uriObject, - selectedVariant.variantId, + selectedVariant?.variantId, ) if (typeof result === "string") { @@ -545,289 +618,272 @@ const DebugSection = ({ } return ( - <> - {debugEvaluator && ( - <> - - -
- - - Test evaluator - - - Test your evaluator by generating a test data +
+
+ + Test evaluator + + Test your evaluator by generating a test data + + + +
+
+ + + Testcase - -
-
- - - Testcase + {activeTestset && selectedTestcase.testcase && ( + <> + + + loaded from {activeTestset.name} + + )} + + + + + +
- {activeTestset && selectedTestcase.testcase && ( - <> - - - loaded from {activeTestset.name} - - - )} - - - + { + try { + if (value) { + const parsedValue = JSON.parse(value) + setSelectedTestcase(parsedValue) + } + } catch (error) { + console.error("Failed to parse test case JSON", error) + } + }} + options={{ + wordWrap: "on", + minimap: {enabled: false}, + lineNumbers: "off", + scrollBeyondLastLine: false, + }} + /> +
+
+ +
+
+ + + Application + + {variantStatus.success && ( + <> + + Success + + )} + {variantStatus.error && ( + + )} + + + {isRunningVariant ? ( + + ) : ( + } + menu={{ + items: [ + { + key: "change_variant", + icon: , + label: "Change Variant", + onClick: () => setOpenVariantModal(true), + }, + ], + }} + > +
- - -
- -
- { - try { - if (value) { - const parsedValue = JSON.parse(value) - setSelectedTestcase(parsedValue) - } - } catch (error) { - console.error("Failed to parse test case JSON", error) - } - }} - options={{ - wordWrap: "on", - minimap: {enabled: false}, - lineNumbers: "off", - scrollBeyondLastLine: false, - }} - /> -
-
- -
-
- - - Application - - {variantStatus.success && ( - <> - - - Success - - - )} - {variantStatus.error && ( - - )} - - - {isRunningVariant ? ( - - ) : ( - } - menu={{ - items: [ - { - key: "change_variant", - icon: , - label: "Change Variant", - onClick: () => setOpenVariantModal(true), - }, - ], - }} - > -
+ {/* Adding key above ensures React re-renders this label when variant changes */} + Run {selectedVariant?.variantName || "variant"} +
+
+ )} +
+ + + { + if (value) { + setVariantResult(value) + } + }} + /> +
+ ), + }, + { + key: "trace", + label: "Trace", + children: ( +
+ - - {/* Adding key above ensures React re-renders this label when variant changes */} - Run {selectedVariant?.variantName || "variant"} -
- - )} -
- - - { - if (value) { - setVariantResult(value) - } - }} - /> -
- ), - }, - { - key: "trace", - label: "Trace", - children: ( -
- { + try { + if (value) { + const parsedValue = JSON.parse(value) + setTraceTree(parsedValue) } - options={{ - wordWrap: "on", - minimap: {enabled: false}, - lineNumbers: "off", - scrollBeyondLastLine: false, - }} - onChange={(value) => { - try { - if (value) { - const parsedValue = - JSON.parse(value) - setTraceTree(parsedValue) - } - } catch (error) { - console.error( - "Failed to parse trace tree JSON", - error, - ) - } - }} - /> -
- ), - }, - ]} - /> -
- -
- - - - Evaluator Output - - {evalOutputStatus.success && ( - <> - - - Successful - - - )} - {evalOutputStatus.error && ( - - )} - - - - - - -
- -
-
+ } catch (error) { + console.error( + "Failed to parse trace tree JSON", + error, + ) + } + }} + /> +
+ ), + }, + ]} + /> + + +
+ + + + Evaluator Output + + {evalOutputStatus.success && ( + <> + + Successful + + )} + {evalOutputStatus.error && ( + + )} + + + + + + +
+
- - )} +
+ setOpenVariantModal(false)} setSelectedVariant={setSelectedVariant} @@ -844,7 +900,7 @@ const DebugSection = ({ setSelectedTestset={setSelectedTestset} /> )} - +
) } diff --git a/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/EvaluatorVariantModal.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/EvaluatorVariantModal.tsx index 71d67c7e1e..823b8fe33a 100644 --- a/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/EvaluatorVariantModal.tsx +++ b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/EvaluatorVariantModal.tsx @@ -1,26 +1,34 @@ import { useCallback, + useEffect, useMemo, useState, - useEffect, type ComponentProps, type Dispatch, type SetStateAction, - type Key, } from "react" -import {CloseOutlined} from "@ant-design/icons" +import {CloseCircleOutlined, CloseOutlined} from "@ant-design/icons" import {Play} from "@phosphor-icons/react" -import {Button, Input, Modal, Typography} from "antd" -import {useAtomValue} from "jotai" +import {Button, Input, Modal, Tabs, Tag, Typography} from "antd" +import clsx from "clsx" +import dynamic from "next/dynamic" import {createUseStyles} from "react-jss" import VariantsTable from "@/oss/components/VariantsComponents/Table" +import {useAppId} from "@/oss/hooks/useAppId" +import useURL from "@/oss/hooks/useURL" +import useAppVariantRevisions from "@/oss/lib/hooks/useAppVariantRevisions" import type {EnhancedVariant} from "@/oss/lib/shared/variant/transformer/types" -import {JSSTheme, Variant as BaseVariant} from "@/oss/lib/Types" -import {revisionMapAtom} from "@/oss/state/variant/atoms/fetcher" +import type {JSSTheme, ListAppsItem, Variant} from "@/oss/lib/Types" +import {useAppsData} from "@/oss/state/app/hooks" + +import TabLabel from "../../../NewEvaluation/assets/TabLabel" +import SelectAppSection from "../../../NewEvaluation/Components/SelectAppSection" +import type {NewEvaluationAppOption} from "../../../NewEvaluation/types" + +import {buildVariantFromRevision} from "./variantUtils" -type Variant = BaseVariant & {id?: string} type EvaluatorVariantModalProps = { variants: Variant[] | null setSelectedVariant: Dispatch> @@ -38,65 +46,411 @@ const useStyles = createUseStyles((theme: JSSTheme) => ({ height: 600, }, }, - table: { - "& .ant-table-thead > tr > th": { - height: 32, - padding: "0 16px", + tabs: { + height: "100%", + display: "flex", + "& .ant-tabs-nav": { + minWidth: 220, }, - "& .ant-table-tbody > tr > td": { - height: 48, - padding: "0 16px", + "& .ant-tabs-nav-list": { + width: "100%", + }, + "& .ant-tabs-content-holder": { + flex: 1, + paddingLeft: theme.padding, + overflow: "auto", + }, + }, + searchRow: { + display: "flex", + justifyContent: "space-between", + }, + tabsContainer: { + height: "100%", + display: "flex", + "& .ant-tabs-content-holder": { + paddingLeft: theme.padding, + flex: 1, + overflow: "auto", + }, + "& .ant-tabs-tab": { + color: theme.colorTextSecondary, + "&:hover": { + backgroundColor: theme.colorInfoBg, + }, + }, + "& .ant-tabs-ink-bar": { + display: "none", + }, + "& .ant-tabs-tab-active": { + backgroundColor: theme.controlItemBgActive, + borderRight: `2px solid ${theme.colorPrimary}`, + color: theme.colorPrimary, + fontWeight: `${theme.fontWeightMedium} !important`, }, }, })) +const NoResultsFound = dynamic(() => import("@/oss/components/NoResultsFound/NoResultsFound"), { + ssr: false, +}) + const EvaluatorVariantModal = ({ - variants, + variants: _variants, setSelectedVariant, selectedVariant, ...props }: EvaluatorVariantModalProps) => { const classes = useStyles() + const appIdFromRoute = useAppId() + const isAppScoped = Boolean(appIdFromRoute) + const {apps: availableApps = []} = useAppsData() + const {redirectUrl} = useURL() + + const [activePanel, setActivePanel] = useState( + isAppScoped ? "variantPanel" : "appPanel", + ) const [searchTerm, setSearchTerm] = useState("") - const [selectedRowKeys, setSelectedRowKeys] = useState([]) - - // Build a list of latest revisions (EnhancedVariant) for each base variant - const revisionMap = useAtomValue(revisionMapAtom) - const latestRevisions: EnhancedVariant[] = useMemo(() => { - const list: EnhancedVariant[] = [] - ;(variants || []).forEach((v) => { - const arr = revisionMap[v.variantId] || [] - if (arr && arr.length > 0) list.push(arr[0]) + const [appSearchTerm, setAppSearchTerm] = useState("") + const [selectedAppId, setSelectedAppId] = useState("") + const [selectedRowKeys, setSelectedRowKeys] = useState([]) + + const appOptions: NewEvaluationAppOption[] = useMemo(() => { + const options = + (availableApps as ListAppsItem[]).map((app) => ({ + label: app.app_name, + value: app.app_id, + type: app.app_type ?? null, + createdAt: app.created_at ?? null, + updatedAt: app.updated_at ?? null, + })) ?? [] + + if (selectedAppId && !options.some((option) => option.value === selectedAppId)) { + options.push({ + label: selectedAppId, + value: selectedAppId, + type: null, + createdAt: null, + updatedAt: null, + }) + } + + return options + }, [availableApps, selectedAppId]) + + const handleCreateApp = useCallback(() => { + redirectUrl() + }, [redirectUrl]) + + const filteredAppOptions = useMemo(() => { + if (!appSearchTerm) return appOptions + return appOptions.filter((option) => + option.label.toLowerCase().includes(appSearchTerm.toLowerCase()), + ) + }, [appOptions, appSearchTerm]) + + const {variants: appVariantRevisions, isLoading: variantsLoading} = useAppVariantRevisions( + selectedAppId || null, + ) + + const {latestRevisions, revisionToVariantMap, revisionById, variantById} = useMemo(() => { + if (!appVariantRevisions?.length) { + return { + latestRevisions: [] as EnhancedVariant[], + revisionToVariantMap: new Map(), + revisionById: new Map(), + variantById: new Map(), + } + } + + const grouped = new Map() + const revisionLookup = new Map() + appVariantRevisions.forEach((rev) => { + if (!rev?.variantId) return + const key = rev.variantId + const existing = grouped.get(key) ?? [] + existing.push(rev) + grouped.set(key, existing) + if (rev.id) { + revisionLookup.set(String(rev.id), rev) + } + }) + + const revisionToVariant = new Map() + const variantMap = new Map() + const latest: EnhancedVariant[] = [] + + grouped.forEach((revisions, variantId) => { + const sorted = [...revisions].sort( + (a, b) => (b.updatedAtTimestamp ?? 0) - (a.updatedAtTimestamp ?? 0), + ) + const baseRevision = sorted[0] ?? revisions[0] + if (!baseRevision) return + + const baseVariant = buildVariantFromRevision(baseRevision, selectedAppId) + baseVariant.revisions = sorted + + variantMap.set(variantId, baseVariant) + sorted.forEach((rev) => { + if (rev.id) { + revisionToVariant.set(String(rev.id), baseVariant) + } + }) + + latest.push(baseRevision) }) - return list - }, [variants, revisionMap]) - // Clear selection when modal is opened + latest.sort((a, b) => (b.updatedAtTimestamp ?? 0) - (a.updatedAtTimestamp ?? 0)) + + return { + latestRevisions: latest, + revisionToVariantMap: revisionToVariant, + revisionById: revisionLookup, + variantById: variantMap, + } + }, [appVariantRevisions, selectedAppId]) + useEffect(() => { - if (props.open) { - // Preselect currently selected variant's latest revision id - const rev = latestRevisions.find((r) => r.variantId === selectedVariant?.variantId) - setSelectedRowKeys(rev?.id ? [rev.id] : []) + if (!selectedRowKeys.length) return + const filteredKeys = selectedRowKeys.filter((key) => revisionToVariantMap.has(String(key))) + if (filteredKeys.length !== selectedRowKeys.length) { + setSelectedRowKeys(filteredKeys) + } + }, [revisionToVariantMap, selectedRowKeys]) + + useEffect(() => { + if (!props.open) { + setSearchTerm("") + setAppSearchTerm("") + setSelectedRowKeys([]) + setActivePanel(isAppScoped ? "variantPanel" : "appPanel") + if (!isAppScoped) setSelectedAppId("") + return + } + + const derivedAppId = (isAppScoped ? appIdFromRoute : selectedVariant?.appId) ?? "" + if (derivedAppId) { + setSelectedAppId(derivedAppId) + setActivePanel("variantPanel") + } else { + setActivePanel("appPanel") + } + }, [props.open, isAppScoped, appIdFromRoute, selectedVariant?.appId]) + + useEffect(() => { + if (!props.open) return + if (!selectedVariant?.variantId) return + const variant = variantById.get(selectedVariant.variantId) + if (!variant?.revisions?.length) return + const latestRevisionId = variant.revisions[0]?.id + if (!latestRevisionId) return + setSelectedRowKeys((prev) => (prev.length ? prev : [String(latestRevisionId)])) + }, [props.open, variantById, selectedVariant?.variantId]) + + const loadVariant = useCallback(() => { + const [selectedRevisionId] = selectedRowKeys + if (!selectedRevisionId) return + + const baseVariant = revisionToVariantMap.get(String(selectedRevisionId)) + if (!baseVariant) return + + const variantToSet = { + ...baseVariant, + revisions: baseVariant.revisions ? [...baseVariant.revisions] : [], } - }, [props.open, selectedVariant?.variantId, latestRevisions]) - const filtered = useMemo(() => { - const src = latestRevisions - if (!searchTerm) return src - return (src || []).filter((item) => + setSelectedVariant(variantToSet) + props.onCancel?.({} as any) + }, [selectedRowKeys, revisionToVariantMap, setSelectedVariant, props]) + + const handlePanelChange = useCallback((key: string) => { + setActivePanel(key) + }, []) + + const handleAppSelection = useCallback( + (value: string) => { + if (value === selectedAppId) return + setSelectedAppId(value) + setSelectedRowKeys([]) + setSearchTerm("") + setActivePanel("variantPanel") + }, + [selectedAppId], + ) + + const appSelectionComplete = Boolean(selectedAppId) + + const filteredRevisions = useMemo(() => { + if (!searchTerm) return latestRevisions + return latestRevisions.filter((item) => (item.variantName || "").toLowerCase().includes(searchTerm.toLowerCase()), ) }, [searchTerm, latestRevisions]) - const loadVariant = useCallback(() => { - const selectedRevision = filtered?.find((rev) => rev.id === selectedRowKeys[0]) - if (selectedRevision) { - // Find the base variant matching this revision and pass it back - const base = (variants || []).find((v) => v.variantId === selectedRevision.variantId) - if (base) setSelectedVariant(base) - props.onCancel?.({} as any) - } - }, [filtered, selectedRowKeys, setSelectedVariant, props, variants]) + const selectedRevisionTags = useMemo(() => { + if (!selectedRowKeys.length) return [] + return selectedRowKeys + .map((key) => { + const revision = revisionById.get(String(key)) + if (!revision) return null + return { + revisionId: String(key), + label: `${revision.variantName} - v${revision.revision}`, + } + }) + .filter(Boolean) as {revisionId: string; label: string}[] + }, [selectedRowKeys, revisionById]) + + const variantTabContent = ( +
+
+ setSearchTerm(e.target.value)} + placeholder="Search variants" + allowClear + className="w-[240px]" + disabled={!appSelectionComplete} + /> +
+ + {appSelectionComplete ? ( + { + const normalized = value.map((id) => id.toString()) + setSelectedRowKeys(normalized) + }, + type: "radio", + }} + isLoading={variantsLoading} + onRowClick={() => {}} + rowKey={"id"} + showStableName + showActionsDropdown={false} + onRow={(record) => { + const revision = record as EnhancedVariant + return { + style: {cursor: "pointer"}, + onClick: () => { + if (revision.id) { + setSelectedRowKeys([String(revision.id)]) + } + }, + } + }} + /> + ) : ( + + Select an application first to load this section. + + )} +
+ ) + + const tabs = useMemo(() => { + const showAppEmptyState = appOptions.length === 0 + const noSearchResults = filteredAppOptions.length === 0 && !showAppEmptyState + + return [ + { + key: "appPanel", + label: ( + + {appSelectionComplete && ( + : null} + onClose={() => { + if (!isAppScoped) { + setSelectedAppId("") + setSelectedRowKeys([]) + setActivePanel("appPanel") + } + }} + > + {appOptions.find((opt) => opt.value === selectedAppId)?.label ?? + selectedAppId} + + )} + + ), + children: ( +
+ {showAppEmptyState ? ( + + ) : ( + <> +
+ setAppSearchTerm(e.target.value)} + placeholder="Search applications" + allowClear + className="w-[240px]" + disabled={isAppScoped} + /> +
+ + {!appSelectionComplete && !isAppScoped ? ( + + Please select an application to continue. + + ) : null} + + )} +
+ ), + }, + { + key: "variantPanel", + label: ( + 0}> + {selectedRevisionTags.map(({revisionId, label}) => ( + } + onClose={() => setSelectedRowKeys([])} + > + {label} + + ))} + + ), + children: variantTabContent, + }, + ] + }, [ + appOptions, + appSelectionComplete, + handleAppSelection, + isAppScoped, + selectedAppId, + selectedRevisionTags, + selectedRowKeys, + setActivePanel, + setSelectedAppId, + setSelectedRowKeys, + filteredAppOptions, + variantTabContent, + handleCreateApp, + ]) return ( , iconPosition: "end", disabled: !selectedRowKeys.length, + loading: variantsLoading, onClick: loadVariant, }} title={ @@ -125,32 +480,17 @@ const EvaluatorVariantModal = ({ centered {...props} > -
- setSearchTerm(e.target.value)} - placeholder="Search" - allowClear - className="w-[240px]" - /> - - setSelectedRowKeys(value), - type: "radio", - }} - isLoading={false} - onRowClick={() => {}} - // Use revision id for table and selection, so the cell renderers resolve correctly - rowKey={"id"} - // Use stable name display to avoid showing Draft tag in selection UI - showStableName - className={classes.table} - showActionsDropdown={false} - /> -
+
) } diff --git a/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/Messages.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/Messages.tsx index 273bcf6f4d..8d74181adb 100644 --- a/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/Messages.tsx +++ b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/Messages.tsx @@ -5,8 +5,8 @@ import {MinusCircle} from "@phosphor-icons/react" import {Button, Form, Input} from "antd" import isEqual from "lodash/isEqual" -import MessageEditor from "@/oss/components/Playground/Components/ChatCommon/MessageEditor" import EnhancedButton from "@/oss/components/Playground/assets/EnhancedButton" +import MessageEditor from "@/oss/components/Playground/Components/ChatCommon/MessageEditor" interface Message { role: string diff --git a/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx index 55eaeabc18..1ec855d3ca 100644 --- a/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx +++ b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx @@ -1,19 +1,18 @@ import {useEffect, useMemo, useState} from "react" -import {CloseOutlined} from "@ant-design/icons" -import {ArrowLeft, CaretDoubleRight} from "@phosphor-icons/react" -import {Button, Flex, Form, Input, message, Space, Tooltip, Typography} from "antd" +import {ArrowLeft} from "@phosphor-icons/react" +import {Button, Flex, Form, Input, message, Space, Typography, Splitter, Divider} from "antd" import dynamic from "next/dynamic" import {createUseStyles} from "react-jss" import {useAppId} from "@/oss/hooks/useAppId" -import {isDemo} from "@/oss/lib/helpers/utils" import {Evaluator, EvaluatorConfig, JSSTheme, testset, Variant} from "@/oss/lib/Types" import { CreateEvaluationConfigData, createEvaluatorConfig, updateEvaluatorConfig, } from "@/oss/services/evaluations/api" +import {useAppList} from "@/oss/state/app" import AdvancedSettings from "./AdvancedSettings" import {DynamicFormField} from "./DynamicFormField" @@ -23,6 +22,7 @@ const DebugSection: any = dynamic( import( "@/oss/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection" ), + {ssr: false}, ) interface ConfigureEvaluatorProps { @@ -48,8 +48,6 @@ interface ConfigureEvaluatorProps { testcase: Record | null }> > - setDebugEvaluator: React.Dispatch> - debugEvaluator: boolean setSelectedTestset: React.Dispatch> selectedTestset: string appId?: string | null @@ -74,13 +72,8 @@ const useStyles = createUseStyles((theme: JSSTheme) => ({ gap: theme.padding, height: "100%", width: "100%", - maxWidth: "100%", - overflow: "hidden", "& .ant-form-item": { - marginBottom: 0, - }, - "& .ant-form-item-label": { - paddingBottom: theme.paddingXXS, + marginBottom: 10, }, }, formTitleText: { @@ -107,14 +100,13 @@ const ConfigureEvaluator = ({ cloneConfig, setCloneConfig, setSelectedTestcase, - debugEvaluator, - setDebugEvaluator, selectedTestset, setSelectedTestset, appId: appIdOverride, }: ConfigureEvaluatorProps) => { const routeAppId = useAppId() - const appId = appIdOverride ?? routeAppId + const apps = useAppList() + const appId = appIdOverride ?? routeAppId ?? apps?.[0].app_id const classes = useStyles() const [form] = Form.useForm() const [submitLoading, setSubmitLoading] = useState(false) @@ -150,6 +142,7 @@ const ConfigureEvaluator = ({ evaluator_key: selectedEvaluator.key, settings_values: settingsValues, } + ;(editMode ? updateEvaluatorConfig(editEvalEditValues?.id!, data) : createEvaluatorConfig(appId, data) @@ -174,85 +167,49 @@ const ConfigureEvaluator = ({ }, [editMode, cloneConfig]) return ( -
-
- - {editMode ? ( - <> -
- + +
- -
+
+
{selectedEvaluator.name} - - - - {selectedEvaluator.description} -
+
- -
- - - -
-
+
+ + + +
{basicSettingsFields.length ? ( - +
Parameters @@ -293,36 +247,31 @@ const ConfigureEvaluator = ({ name={["settings_values", field.key]} /> ))} - +
) : ( "" )} {advancedSettingsFields.length > 0 && ( - +
+ +
)}
- - - - -
+ + - -
+
+ ) } diff --git a/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/variantUtils.ts b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/variantUtils.ts new file mode 100644 index 0000000000..69d9ae3a46 --- /dev/null +++ b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/variantUtils.ts @@ -0,0 +1,35 @@ +import type {EnhancedVariant} from "@/oss/lib/shared/variant/transformer/types" +import type {Variant} from "@/oss/lib/Types" + +export const buildVariantFromRevision = ( + revision: EnhancedVariant, + fallbackAppId?: string, +): Variant => { + return { + id: revision.variantId, + name: revision.variantName, + variantName: revision.variantName, + templateVariantName: (revision as any)?.templateVariantName ?? null, + persistent: true, + previousVariantName: (revision as any)?.previousVariantName ?? null, + variantId: revision.variantId, + appId: revision.appId ?? fallbackAppId ?? "", + appName: revision.appName ?? "", + baseId: revision.baseId ?? "", + baseName: revision.baseName ?? "", + configName: revision.configName ?? "", + uri: revision.uri ?? "", + parameters: revision.parameters ?? {}, + modifiedBy: revision.modifiedBy ?? revision.createdBy ?? "", + modifiedById: revision.modifiedById ?? "", + createdAt: revision.createdAt ?? "", + createdAtTimestamp: revision.createdAtTimestamp ?? 0, + updatedAt: revision.updatedAt ?? revision.createdAt ?? "", + updatedAtTimestamp: revision.updatedAtTimestamp ?? revision.createdAtTimestamp ?? 0, + isLatestRevision: revision.isLatestRevision ?? false, + commitMessage: revision.commitMessage ?? null, + deployedIn: revision.deployedIn ?? [], + projectId: (revision as any)?.projectId ?? "", + revisions: [], + } as Variant +} diff --git a/web/ee/src/components/pages/evaluations/autoEvaluation/assets/AutoEvaluationHeader.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/assets/AutoEvaluationHeader.tsx index 52ce792956..e52e9227fb 100644 --- a/web/ee/src/components/pages/evaluations/autoEvaluation/assets/AutoEvaluationHeader.tsx +++ b/web/ee/src/components/pages/evaluations/autoEvaluation/assets/AutoEvaluationHeader.tsx @@ -12,7 +12,6 @@ import EditColumns from "@/oss/components/Filters/EditColumns" import {formatColumnTitle} from "@/oss/components/Filters/EditColumns/assets/helper" import {formatMetricValue} from "@/oss/components/HumanEvaluations/assets/MetricDetailsPopover/assets/utils" import {EvaluationRow} from "@/oss/components/HumanEvaluations/types" -import {useQueryParam} from "@/oss/hooks/useQuery" import useURL from "@/oss/hooks/useURL" import {snakeToCamelCaseKeys} from "@/oss/lib/helpers/casing" import {formatDate24, formatDay} from "@/oss/lib/helpers/dateTimeHelper" @@ -27,10 +26,9 @@ import {EvaluationStatus} from "@/oss/lib/Types" import {getAppValues} from "@/oss/state/app" import {statusMapper} from "../../../evaluations/cellRenderers/cellRenderers" +import {buildEvaluationNavigationUrl} from "../../utils" import {useStyles} from "../assets/styles" -import EvaluatorsModal from "../EvaluatorsModal/EvaluatorsModal" -import {buildAppScopedUrl, buildEvaluationNavigationUrl} from "../../utils" import {AutoEvaluationHeaderProps} from "./types" const isLegacyEvaluation = (evaluation: any): boolean => "aggregated_results" in evaluation @@ -78,11 +76,6 @@ const AutoEvaluationHeader = ({ // local states const [searchTerm, setSearchTerm] = useState("") const [newEvalModalOpen, setNewEvalModalOpen] = useState(false) - const [current, setCurrent] = useState(0) - const [isConfigEvaluatorModalOpen, setIsConfigEvaluatorModalOpen] = useQueryParam( - "configureEvaluatorModal", - "", - ) const onExport = useCallback(() => { try { @@ -534,32 +527,14 @@ const AutoEvaluationHeader = ({ icon={} className={classes.button} onClick={() => { - setIsConfigEvaluatorModalOpen("open") - setCurrent(0) + router.push(`${projectURL}/evaluators?tab=automatic`) }} > - Configure evaluators + Configure evaluator ) : null} - {/*
- setPagination({page: p, size: s})} - className="flex items-center xl:hidden shrink-0 [&_.ant-pagination-options]:hidden lg:[&_.ant-pagination-options]:block [&_.ant-pagination-options]:!ml-2" - /> - setPagination({page: p, size: s})} - className="hidden xl:flex xl:items-center" - /> -
*/}
@@ -620,13 +595,6 @@ const AutoEvaluationHeader = ({ />
- - setIsConfigEvaluatorModalOpen("")} - current={current} - setCurrent={setCurrent} - /> )} diff --git a/web/ee/src/components/pages/settings/Billing/index.tsx b/web/ee/src/components/pages/settings/Billing/index.tsx index 5240c8c53c..c97e905ace 100644 --- a/web/ee/src/components/pages/settings/Billing/index.tsx +++ b/web/ee/src/components/pages/settings/Billing/index.tsx @@ -4,6 +4,7 @@ import {Button, message, Spin, Typography} from "antd" import dayjs from "dayjs" import {useRouter} from "next/router" +import useURL from "@/oss/hooks/useURL" import {Plan} from "@/oss/lib/Types" import {editSubscriptionInfo, useSubscriptionData, useUsageData} from "@/oss/services/billing" @@ -11,7 +12,6 @@ import UsageProgressBar from "./assets/UsageProgressBar" import AutoRenewalCancelModal from "./Modals/AutoRenewalCancelModal" import PricingModal from "./Modals/PricingModal" import SubscriptionPlanDetails from "./Modals/PricingModal/assets/SubscriptionPlanDetails" -import useURL from "@/oss/hooks/useURL" const {Link} = Typography diff --git a/web/ee/src/lib/helpers/serviceValidations.ts b/web/ee/src/lib/helpers/serviceValidations.ts new file mode 100644 index 0000000000..87849c1098 --- /dev/null +++ b/web/ee/src/lib/helpers/serviceValidations.ts @@ -0,0 +1,17 @@ +const UUID_V4_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i +const MONGO_OBJECT_ID_RE = /^[0-9a-f]{24}$/i // keep if backend allows ObjectIds + +export function isValidId(id: unknown): id is string { + if (typeof id !== "string") return false + const s = id.trim() + if (!s) return false + if (s.includes("/") || s.includes("\\") || s.includes("..")) return false + return UUID_V4_RE.test(s) || MONGO_OBJECT_ID_RE.test(s) +} + +export function assertValidId(id: unknown, label = "id"): string { + if (!isValidId(id)) { + throw new TypeError(`Invalid ${label}: must be a UUID v4 or 24-hex ObjectId`) + } + return (id as string).trim() +} diff --git a/web/ee/src/lib/hooks/useEvaluationRunData/index.ts b/web/ee/src/lib/hooks/useEvaluationRunData/index.ts index dd333d153c..7df0a127e1 100644 --- a/web/ee/src/lib/hooks/useEvaluationRunData/index.ts +++ b/web/ee/src/lib/hooks/useEvaluationRunData/index.ts @@ -29,9 +29,10 @@ import { setProjectVariantReferencesAtom, } from "@/oss/state/projectVariantConfig" +import {collectProjectVariantReferences} from "../usePreviewEvaluations/projectVariantConfigs" + import {evalAtomStore, evaluationRunStateFamily, loadingStateAtom} from "./assets/atoms" import {buildRunIndex} from "./assets/helpers/buildRunIndex" -import {collectProjectVariantReferences} from "../usePreviewEvaluations/projectVariantConfigs" const fetchLegacyScenariosData = async ( evaluationId: string, diff --git a/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/configure/[evaluator_id].tsx b/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/configure/[evaluator_id].tsx new file mode 100644 index 0000000000..df1b8461be --- /dev/null +++ b/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/configure/[evaluator_id].tsx @@ -0,0 +1,20 @@ +import {useMemo} from "react" + +import {useRouter} from "next/router" + +import ConfigureEvaluatorPage from "@/oss/components/Evaluators/components/ConfigureEvaluator" + +const EvaluatorConfigureRoute = () => { + const router = useRouter() + const evaluatorId = useMemo(() => { + const id = router.query.evaluator_id + if (Array.isArray(id)) { + return id[0] + } + return id ?? null + }, [router.query.evaluator_id]) + + return +} + +export default EvaluatorConfigureRoute diff --git a/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/index.tsx b/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/index.tsx new file mode 100644 index 0000000000..7996228a65 --- /dev/null +++ b/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/index.tsx @@ -0,0 +1,7 @@ +import EvaluatorsRegistry from "@/oss/components/Evaluators" + +const ProjectEvaluatorsPage = () => { + return +} + +export default ProjectEvaluatorsPage diff --git a/web/ee/src/services/observability/api/helper.ts b/web/ee/src/services/observability/api/helper.ts index 11b6616843..37f5e2353a 100644 --- a/web/ee/src/services/observability/api/helper.ts +++ b/web/ee/src/services/observability/api/helper.ts @@ -1,6 +1,7 @@ -import {GenerationDashboardData, TracingDashboardData} from "@/oss/lib/types_ee" import dayjs from "dayjs" +import {GenerationDashboardData, TracingDashboardData} from "@/oss/lib/types_ee" + export const normalizeDurationSeconds = (d = 0) => d / 1_000 export const formatTick = (ts: number | string, range: string) => diff --git a/web/oss/package.json b/web/oss/package.json index bf2e603365..253f1db73c 100644 --- a/web/oss/package.json +++ b/web/oss/package.json @@ -1,6 +1,6 @@ { "name": "@agenta/oss", - "version": "0.58.0", + "version": "0.59.0", "private": true, "engines": { "node": ">=18" diff --git a/web/oss/src/components/Layout/Layout.tsx b/web/oss/src/components/Layout/Layout.tsx index e1ecc2126d..7f9f160de5 100644 --- a/web/oss/src/components/Layout/Layout.tsx +++ b/web/oss/src/components/Layout/Layout.tsx @@ -47,12 +47,14 @@ const AppWithVariants = memo( classes, isPlayground, isHumanEval, + isEvaluator, appTheme, ...props }: { children: ReactNode isAppRoute: boolean isHumanEval: boolean + isEvaluator: boolean classes: StyleClasses appTheme: string isPlayground?: boolean @@ -80,7 +82,7 @@ const AppWithVariants = memo( } return ( -
+
{project?.is_demo && (
You are in a view-only demo workspace. To go back to your @@ -97,7 +99,11 @@ const AppWithVariants = memo( /> -
+
@@ -130,8 +137,8 @@ const AppWithVariants = memo( @@ -149,6 +156,7 @@ const AppWithVariants = memo( )}
+
@@ -216,7 +224,7 @@ const App: React.FC = ({children}) => { } }, [appTheme]) - const {isHumanEval, isPlayground, isAppRoute, isAuthRoute} = useMemo(() => { + const {isHumanEval, isPlayground, isAppRoute, isAuthRoute, isEvaluator} = useMemo(() => { const pathname = appState.pathname const asPath = appState.asPath const selectedEvaluation = Array.isArray(query.selectedEvaluation) @@ -230,6 +238,7 @@ const App: React.FC = ({children}) => { isAppRoute: baseAppURL ? asPath.startsWith(baseAppURL) : false, isPlayground: pathname.includes("/playground") || pathname.includes("/evaluations/results"), + isEvaluator: pathname.includes("/evaluators/configure"), isHumanEval: pathname.includes("/evaluations/single_model_test") || selectedEvaluation === "human_annotation", @@ -253,6 +262,7 @@ const App: React.FC = ({children}) => { appTheme={appTheme} isPlayground={isPlayground} isHumanEval={isHumanEval} + isEvaluator={isEvaluator} > {children} {contextHolder} diff --git a/web/oss/src/components/Playground/Components/PlaygroundGenerations/assets/GenerationCompletionRow/SingleView.tsx b/web/oss/src/components/Playground/Components/PlaygroundGenerations/assets/GenerationCompletionRow/SingleView.tsx index ffabc9eac0..e750dd22dc 100644 --- a/web/oss/src/components/Playground/Components/PlaygroundGenerations/assets/GenerationCompletionRow/SingleView.tsx +++ b/web/oss/src/components/Playground/Components/PlaygroundGenerations/assets/GenerationCompletionRow/SingleView.tsx @@ -87,7 +87,7 @@ const SingleView = ({ // disabled={disableForCustom} // placeholder={ // disableForCustom - // ? "Insert a {{ variable }} in your template to create an input." + // ? "Insert a {{variable}} in your template to create an input." // : "Enter value" // } editorProps={{enableTokens: false}} diff --git a/web/oss/src/components/Playground/Components/PlaygroundVariantConfigPrompt/assets/PlaygroundVariantConfigPromptCollapseContent.tsx b/web/oss/src/components/Playground/Components/PlaygroundVariantConfigPrompt/assets/PlaygroundVariantConfigPromptCollapseContent.tsx index e64c3b7a0d..7b85e8a1c2 100644 --- a/web/oss/src/components/Playground/Components/PlaygroundVariantConfigPrompt/assets/PlaygroundVariantConfigPromptCollapseContent.tsx +++ b/web/oss/src/components/Playground/Components/PlaygroundVariantConfigPrompt/assets/PlaygroundVariantConfigPromptCollapseContent.tsx @@ -72,7 +72,7 @@ const PlaygroundVariantConfigPromptCollapseContent: React.FC - Insert a {"{{ variable }}"} in + Insert a {"{{variable}}"} in your template to create an input. } diff --git a/web/oss/src/components/Playground/adapters/VariableControlAdapter.tsx b/web/oss/src/components/Playground/adapters/VariableControlAdapter.tsx index bd1a7176c1..86cbfd7c30 100644 --- a/web/oss/src/components/Playground/adapters/VariableControlAdapter.tsx +++ b/web/oss/src/components/Playground/adapters/VariableControlAdapter.tsx @@ -86,7 +86,7 @@ const VariableControlAdapter: React.FC = ({ const viewType = isComparisonView ? "comparison" : "single" const effectivePlaceholder = disableForCustom - ? "Insert a {{ variable }} in your template to create an input." + ? "Insert a {{variable}} in your template to create an input." : placeholder || "Enter a value" return ( diff --git a/web/oss/src/components/Sidebar/hooks/useSidebarConfig/index.tsx b/web/oss/src/components/Sidebar/hooks/useSidebarConfig/index.tsx index c5cb6a261d..5e6138bc75 100644 --- a/web/oss/src/components/Sidebar/hooks/useSidebarConfig/index.tsx +++ b/web/oss/src/components/Sidebar/hooks/useSidebarConfig/index.tsx @@ -14,6 +14,7 @@ import { Rocket, CloudArrowUp, ChatCircle, + Gauge, } from "@phosphor-icons/react" import {useAppId} from "@/oss/hooks/useAppId" @@ -52,6 +53,12 @@ export const useSidebarConfig = () => { link: `${projectURL}/observability`, icon: , }, + { + key: "project-evaluators-link", + title: "Evaluators", + link: `${projectURL}/evaluators`, + icon: , + }, { key: "project-evaluations-link", title: "Evaluations", diff --git a/web/oss/src/components/VariantsComponents/index.tsx b/web/oss/src/components/VariantsComponents/index.tsx index 36dd61017b..a23d1dde5e 100644 --- a/web/oss/src/components/VariantsComponents/index.tsx +++ b/web/oss/src/components/VariantsComponents/index.tsx @@ -135,7 +135,7 @@ const VariantsDashboard = () => { return ( <>
- Variants + Variants
diff --git a/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/AnnotateDrawerTitle/index.tsx b/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/AnnotateDrawerTitle/index.tsx index cd0eded2df..4f2b589ec0 100644 --- a/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/AnnotateDrawerTitle/index.tsx +++ b/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/AnnotateDrawerTitle/index.tsx @@ -42,16 +42,25 @@ const AnnotateDrawerTitle = ({ const onClickPrev = useCallback( (step: AnnotateDrawerStepsType) => { - if (step === AnnotateDrawerSteps.ANNOTATE) { + const hasSingleView = Boolean( + showOnly?.annotateUi || showOnly?.selectEvaluatorsUi || showOnly?.createEvaluatorUi, + ) + + if ( + hasSingleView || + step === AnnotateDrawerSteps.ANNOTATE || + !Object.values(AnnotateDrawerSteps).includes(step) + ) { onClose() - } else { - setSteps((prev) => { - const prevIndex = Object.values(AnnotateDrawerSteps).indexOf(prev) - return Object.values(AnnotateDrawerSteps)[prevIndex - 1] - }) + return } + + setSteps((prev) => { + const prevIndex = Object.values(AnnotateDrawerSteps).indexOf(prev) + return Object.values(AnnotateDrawerSteps)[prevIndex - 1] + }) }, - [onClose, setSteps], + [onClose, setSteps, showOnly], ) const onClickNext = useCallback( diff --git a/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/CreateEvaluator/assets/CreateNewMetric/index.tsx b/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/CreateEvaluator/assets/CreateNewMetric/index.tsx index 6c2d0873ac..a0b1b92010 100644 --- a/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/CreateEvaluator/assets/CreateNewMetric/index.tsx +++ b/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/CreateEvaluator/assets/CreateNewMetric/index.tsx @@ -35,10 +35,13 @@ const CreateNewMetric = ({ // it will add a empty field for enum when user select label option useEffect(() => { - if (metricType === "label" || metricType === "class") { + if (metricType !== "label" && metricType !== "class") return + + const existingEnum = form.getFieldValue(["metrics", field.name, "enum"]) + if (!Array.isArray(existingEnum) || existingEnum.length === 0) { form.setFieldValue(["metrics", field.name, "enum"], [""]) } - }, [metricType]) + }, [metricType, field.name, form]) const getCurrentEnumValues = useCallback( (currentIndex: number) => { diff --git a/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/CreateEvaluator/index.tsx b/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/CreateEvaluator/index.tsx index fdd3618029..c0d3490fa1 100644 --- a/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/CreateEvaluator/index.tsx +++ b/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/CreateEvaluator/index.tsx @@ -1,12 +1,14 @@ -import {useCallback, useState, useEffect} from "react" +import {useCallback, useEffect, useMemo, useState} from "react" import {Plus} from "@phosphor-icons/react" -import {Alert, Button, Form, Input, message, Typography} from "antd" +import {Alert, Button, Form, Input, Typography} from "antd" +import {message} from "@/oss/components/AppMessageContext" import {useDebounceValue} from "usehooks-ts" import {isAppNameInputValid} from "@/oss/lib/helpers/utils" import useEvaluators from "@/oss/lib/hooks/useEvaluators" -import {createEvaluator} from "@/oss/services/evaluators" +import {EvaluatorPreviewDto} from "@/oss/lib/hooks/useEvaluators/types" +import {createEvaluator, updateEvaluator} from "@/oss/services/evaluators" import {AnnotateDrawerSteps} from "../enum" import {generateNewEvaluatorPayloadData} from "../transforms" @@ -16,24 +18,143 @@ import CreateNewMetric from "./assets/CreateNewMetric" import {slugify} from "./assets/helper" import {MetricFormData} from "./assets/types" -const CreateEvaluator = ({setSteps, setSelectedEvaluators}: CreateEvaluatorProps) => { +type EvaluatorWithMeta = EvaluatorPreviewDto & { + id?: string + flags?: Record + meta?: Record + tags?: Record +} + +const defaultMetric = {name: "", optional: false} + +const CreateEvaluator = ({ + setSteps, + setSelectedEvaluators, + mode = "create", + evaluator, + onSuccess, + skipPostCreateStepChange = false, +}: CreateEvaluatorProps) => { const [isSubmitting, setIsSubmitting] = useState(false) const [errorMessage, setErrorMessage] = useState([]) const [slugTouched, setSlugTouched] = useState(false) const [form] = Form.useForm() const name = Form.useWatch("evaluatorName", form) + const slugValue = Form.useWatch("evaluatorSlug", form) const [debouncedName] = useDebounceValue(name, 500) const {mutate} = useEvaluators({ preview: true, queries: {is_human: true}, }) + const isEditMode = mode === "edit" && Boolean(evaluator?.id) + + const metricsFromEvaluator = useMemo(() => { + if (!isEditMode || !evaluator) return [] + + const outputs = + evaluator.data?.service?.format?.properties?.outputs || + (evaluator as EvaluatorWithMeta)?.data?.service?.format?.properties?.outputs + + if (!outputs || typeof outputs !== "object") return [] + + const required = Array.isArray(outputs.required) ? outputs.required : [] + const properties = outputs.properties ?? {} + + return Object.entries(properties).map(([metricName, schema]) => { + const metricSchema = schema as Record + const optional = !required.includes(metricName) + + if (Array.isArray(metricSchema.anyOf) && metricSchema.anyOf.length > 0) { + const first = metricSchema.anyOf[0] || {} + const enums = Array.isArray(first.enum) + ? first.enum.filter((value: any) => value !== null && value !== undefined) + : [] + return { + name: metricName, + type: "class", + enum: enums.map(String).filter(Boolean), + optional, + } + } + + if (metricSchema.type === "array") { + const items = metricSchema.items || {} + const enums = Array.isArray(items.enum) + ? items.enum.filter((value: any) => value !== null && value !== undefined) + : [] + return { + name: metricName, + type: "label", + enum: enums.map(String).filter(Boolean), + optional, + } + } + + const metric: Record = { + name: metricName, + type: metricSchema.type, + optional, + } + + if (metricSchema.minimum !== undefined) { + metric.minimum = metricSchema.minimum + } + + if (metricSchema.maximum !== undefined) { + metric.maximum = metricSchema.maximum + } + + if (Array.isArray(metricSchema.enum)) { + metric.enum = metricSchema.enum.filter( + (value: any) => value !== null && value !== undefined, + ) + } + + return metric + }) + }, [evaluator, isEditMode]) + + const initialFormValues = useMemo(() => { + const metrics = + metricsFromEvaluator.length > 0 + ? metricsFromEvaluator.map((metric) => ({...metric})) + : [{...defaultMetric}] + + if (!isEditMode) { + return { + evaluatorName: "", + evaluatorSlug: "", + evaluatorDescription: "", + metrics, + } + } + + return { + evaluatorName: evaluator?.name || "", + evaluatorSlug: evaluator?.slug || "", + evaluatorDescription: evaluator?.description || "", + metrics, + } + }, [evaluator, isEditMode, metricsFromEvaluator]) + useEffect(() => { - if (!slugTouched) { - form.setFieldValue("evaluatorSlug", slugify(debouncedName || "")) + form.setFieldsValue({metrics: []}) + form.setFieldsValue(initialFormValues) + setErrorMessage([]) + setSlugTouched(isEditMode) + }, [form, initialFormValues, isEditMode]) + + useEffect(() => { + if (isEditMode) return + if (slugTouched) return + + const nextSlug = slugify(debouncedName || "") + if (slugValue !== nextSlug) { + form.setFieldValue("evaluatorSlug", nextSlug) } - }, [debouncedName, slugTouched, form]) + }, [debouncedName, slugTouched, form, slugValue, isEditMode]) const onScrollTo = useCallback((direction: "top" | "bottom") => { setTimeout(() => { @@ -62,36 +183,74 @@ const CreateEvaluator = ({setSteps, setSelectedEvaluators}: CreateEvaluatorProps if (!payloadData.evaluator) return + if (isEditMode && evaluator?.id) { + const evaluatorWithMeta = evaluator as EvaluatorWithMeta + const payload = { + evaluator: { + ...payloadData.evaluator, + id: evaluator.id, + flags: { + ...(evaluatorWithMeta.flags || {}), + is_human: true, + is_custom: false, + }, + meta: evaluatorWithMeta.meta || {}, + ...(evaluatorWithMeta.tags ? {tags: evaluatorWithMeta.tags} : {}), + }, + } + + await updateEvaluator(evaluator.id, payload) + await mutate() + message.success("Evaluator updated successfully") + await onSuccess?.(payload.evaluator.slug) + return + } + await createEvaluator(payloadData) await mutate() message.success("Evaluator created successfully") - setSteps?.(AnnotateDrawerSteps.SELECT_EVALUATORS) - setSelectedEvaluators?.((prev) => [ - ...new Set([...prev, payloadData.evaluator.slug]), - ]) + if (!skipPostCreateStepChange) { + setSteps?.(AnnotateDrawerSteps.SELECT_EVALUATORS) + setSelectedEvaluators?.((prev) => [ + ...new Set([...prev, payloadData.evaluator.slug]), + ]) + } + await onSuccess?.(payloadData.evaluator.slug) } catch (error: any) { - if (error.status === 409) { + if (error?.response?.status === 409) { setErrorMessage(["Evaluator with this slug already exists"]) message.error("Evaluator with this slug already exists") onScrollTo("top") } else { - const errorMessages = Array.isArray(error.response?.data?.detail) + const errorMessages = Array.isArray(error?.response?.data?.detail) ? error.response?.data?.detail ?.map((item: any) => item?.msg) .filter(Boolean) - : [error.response?.data?.detail] + : [error?.response?.data?.detail] onScrollTo("top") - setErrorMessage(errorMessages) + setErrorMessage((errorMessages || []).filter(Boolean)) } } finally { setIsSubmitting(false) } }, - [mutate, setErrorMessage, onScrollTo, setSteps], + [ + mutate, + setErrorMessage, + onScrollTo, + setSteps, + setSelectedEvaluators, + isEditMode, + evaluator, + onSuccess, + skipPostCreateStepChange, + ], ) + const submitLabel = isEditMode ? "Update" : "Create" + return (
{errorMessage?.map((msg, idx) => ( - Save + {submitLabel}
diff --git a/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/types.d.ts b/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/types.d.ts index ec51d4a5a6..b60b9675ec 100644 --- a/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/types.d.ts +++ b/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/assets/types.d.ts @@ -2,6 +2,7 @@ import {DrawerProps} from "antd" import {TooltipButtonProps} from "@/oss/components/Playground/assets/EnhancedButton" import {AnnotationDto} from "@/oss/lib/hooks/useAnnotations/types" +import {EvaluatorPreviewDto} from "@/oss/lib/hooks/useEvaluators/types" import {AnnotateDrawerSteps} from "./enum" @@ -27,6 +28,9 @@ export interface AnnotateDrawerProps extends DrawerProps { traceSpanIds?: AnnotateDrawerIdsType showOnly?: ShowOnlyType evalSlugs?: string[] + initialStep?: AnnotateDrawerStepsType + createEvaluatorProps?: Partial + closeOnLayoutClick?: boolean } export interface AnnotateDrawerTitleProps { @@ -71,4 +75,13 @@ export interface SelectEvaluatorsProps { export interface CreateEvaluatorProps { setSteps?: React.Dispatch> setSelectedEvaluators?: React.Dispatch> + mode?: "create" | "edit" + evaluator?: EvaluatorPreviewDto & { + id?: string + flags?: Record + meta?: Record + tags?: Record + } + onSuccess?: (slug: string) => void | Promise + skipPostCreateStepChange?: boolean } diff --git a/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/index.tsx b/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/index.tsx index 8903399161..799c8474f3 100644 --- a/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/index.tsx +++ b/web/oss/src/components/pages/observability/drawer/AnnotateDrawer/index.tsx @@ -22,6 +22,8 @@ const AnnotateDrawer = ({ traceSpanIds, showOnly, evalSlugs, + initialStep = AnnotateDrawerSteps.ANNOTATE, + createEvaluatorProps, ...props }: AnnotateDrawerProps) => { const {projectId} = getProjectValues() @@ -32,7 +34,7 @@ const AnnotateDrawer = ({ const evalLSKey = `${projectId}-evaluator` const [annotations, setAnnotations] = useState([]) - const [steps, setSteps] = useState(AnnotateDrawerSteps.ANNOTATE) + const [steps, setSteps] = useState(initialStep) const [updatedMetrics, setUpdatedMetrics] = useState({}) const [selectedEvaluators, setSelectedEvaluators] = useLocalStorage(evalLSKey, []) const [errorMessage, setErrorMessage] = useState([]) @@ -94,6 +96,12 @@ const AnnotateDrawer = ({ } }, [data, props.open]) + useEffect(() => { + if (props.open) { + setSteps(initialStep) + } + }, [props.open, initialStep]) + const annEvalSlugs = useMemo(() => { return ( (annotations @@ -127,14 +135,14 @@ const AnnotateDrawer = ({ const onAfterClose = useCallback( (open: boolean) => { if (!open) { - setSteps(AnnotateDrawerSteps.ANNOTATE) + setSteps(initialStep) setTempSelectedEvaluators([]) setAnnotations([]) setErrorMessage([]) setUpdatedMetrics({}) } }, - [props.afterOpenChange], + [props.afterOpenChange, initialStep], ) const onCaptureError = useCallback( @@ -149,6 +157,12 @@ const AnnotateDrawer = ({ ) const renderContent = useMemo(() => { + const { + setSteps: _ignoredSetSteps, + setSelectedEvaluators: _ignoredSetSelectedEvaluators, + ...restCreateEvaluatorProps + } = createEvaluatorProps || {} + switch (steps) { case AnnotateDrawerSteps.ANNOTATE: return ( @@ -176,6 +190,7 @@ const AnnotateDrawer = ({ ) default: @@ -188,6 +203,7 @@ const AnnotateDrawer = ({ _selectedEvaluators, tempSelectedEvaluators, errorMessage, + createEvaluatorProps, ]) return ( diff --git a/web/oss/src/lib/hooks/useEvaluatorConfigs/index.ts b/web/oss/src/lib/hooks/useEvaluatorConfigs/index.ts index 6ebc39c643..00c1ec5f83 100644 --- a/web/oss/src/lib/hooks/useEvaluatorConfigs/index.ts +++ b/web/oss/src/lib/hooks/useEvaluatorConfigs/index.ts @@ -1,13 +1,10 @@ -import {useMemo, useCallback} from "react" +import {useEffect, useMemo} from "react" import {useAtomValue} from "jotai" -import useSWR, {SWRResponse} from "swr" import {SWRConfiguration} from "swr" import {useAppId} from "@/oss/hooks/useAppId" -import {fetchAllEvaluatorConfigs} from "@/oss/services/evaluators" -import {userAtom} from "@/oss/state/profile" -import {projectIdAtom} from "@/oss/state/project" +import {evaluatorConfigsQueryAtomFamily} from "@/oss/state/evaluators" import {EvaluatorConfig} from "../../Types" @@ -15,36 +12,71 @@ type EvaluatorConfigResult = Preview extends true ? undefined : EvaluatorConfig[] -const useEvaluatorConfigs = ({ - preview, - appId: appIdOverride, - ...options -}: {preview?: Preview; appId?: string | null} & SWRConfiguration) => { - const projectId = useAtomValue(projectIdAtom) - const user = useAtomValue(userAtom) +type EvaluatorConfigsOptions = { + preview?: Preview + appId?: string | null +} & Pick + +export type UseEvaluatorConfigsReturn = { + data: EvaluatorConfigResult | undefined + error: unknown + isLoading: boolean + isPending: boolean + isError: boolean + isSuccess: boolean + refetch: () => Promise + mutate: () => Promise +} + +const useEvaluatorConfigs = ( + { + preview, + appId: appIdOverride, + onSuccess, + onError, + }: EvaluatorConfigsOptions = {} as EvaluatorConfigsOptions, +): UseEvaluatorConfigsReturn => { const routeAppId = useAppId() const appId = appIdOverride ?? routeAppId - const fetcher = useCallback(async (): Promise => { - if (!projectId) { - return [] - } - const data = await fetchAllEvaluatorConfigs(appId, projectId) - return data - }, [projectId, appId]) - - const swrKey = useMemo(() => { - if (!user || preview || !projectId) return null - return ["evaluator-configs", projectId, appId ?? null] as const - }, [user, preview, projectId, appId]) - - const response = useSWR(swrKey, fetcher, { - revalidateOnFocus: false, - shouldRetryOnError: false, - ...options, - }) as SWRResponse, any> - - return response + const atomParams = useMemo( + () => ({ + appId: appId ?? null, + preview: Boolean(preview), + }), + [appId, preview], + ) + + const queryAtom = useMemo(() => evaluatorConfigsQueryAtomFamily(atomParams), [atomParams]) + + const queryResult = useAtomValue(queryAtom) + + useEffect(() => { + if (!onSuccess || preview) return + if (!queryResult.isSuccess) return + onSuccess( + queryResult.data as EvaluatorConfigResult, + queryResult.queryKey ?? [], + undefined, + ) + }, [onSuccess, preview, queryResult.data, queryResult.isSuccess, queryResult.queryKey]) + + useEffect(() => { + if (!onError || preview) return + if (!queryResult.isError) return + onError(queryResult.error) + }, [onError, preview, queryResult.error, queryResult.isError]) + + return { + data: queryResult.data as EvaluatorConfigResult | undefined, + error: queryResult.error, + isLoading: queryResult.isPending, + isPending: queryResult.isPending, + isError: queryResult.isError, + isSuccess: queryResult.isSuccess, + refetch: queryResult.refetch, + mutate: queryResult.refetch, + } } export default useEvaluatorConfigs diff --git a/web/oss/src/lib/hooks/useEvaluators/index.ts b/web/oss/src/lib/hooks/useEvaluators/index.ts index 0ad2af8b5d..d46495e147 100644 --- a/web/oss/src/lib/hooks/useEvaluators/index.ts +++ b/web/oss/src/lib/hooks/useEvaluators/index.ts @@ -1,87 +1,84 @@ -import {useCallback} from "react" +import {useEffect, useMemo} from "react" import {useAtomValue} from "jotai" -import useSWR, {SWRResponse} from "swr" +import {SWRConfiguration} from "swr" -import {getMetricsFromEvaluator} from "@/oss/components/pages/observability/drawer/AnnotateDrawer/assets/transforms" -import {fetchAllEvaluators} from "@/oss/services/evaluators" -import {useOrgData} from "@/oss/state/org" -import {userAtom} from "@/oss/state/profile" -import {getProjectValues} from "@/oss/state/project" - -import axios from "../../api/assets/axiosConfig" +import {evaluatorsQueryAtomFamily} from "@/oss/state/evaluators" import {Evaluator} from "../../Types" -import {transformApiData} from "../useAnnotations/assets/transformer" -import { - EvaluatorDto, - EvaluatorsResponseDto, - EvaluatorPreviewDto, - UseEvaluatorsOptions, -} from "./types" +import {EvaluatorPreviewDto, UseEvaluatorsOptions} from "./types" -type UseEvaluatorsReturn = SWRResponse< - Preview extends true ? EvaluatorPreviewDto[] : Evaluator[], - any -> +export type UseEvaluatorsReturn = { + data: (Preview extends true ? EvaluatorPreviewDto[] : Evaluator[]) | undefined + error: unknown + isLoading: boolean + isPending: boolean + isError: boolean + isSuccess: boolean + refetch: () => Promise + mutate: () => Promise +} const useEvaluators = ({ preview, queries, - ...options + onSuccess, + onError, + projectId, + ..._rest }: UseEvaluatorsOptions & { preview?: Preview queries?: {is_human: boolean} + onSuccess?: ( + data: (Preview extends true ? EvaluatorPreviewDto[] : Evaluator[]) | undefined, + key: readonly unknown[], + config: SWRConfiguration | undefined, + ) => void + onError?: (error: unknown) => void }): UseEvaluatorsReturn => { - const {selectedOrg} = useOrgData() - const user = useAtomValue(userAtom) - const projectId = options?.projectId || getProjectValues()?.projectId || "" - const workspace = selectedOrg?.default_workspace - const members = workspace?.members || [] + const queriesKey = useMemo(() => JSON.stringify(queries ?? null), [queries]) + + const atomParams = useMemo( + () => ({ + projectId: projectId ?? null, + preview: Boolean(preview), + queriesKey, + }), + [projectId, preview, queriesKey], + ) - type Data = Preview extends true ? EvaluatorPreviewDto[] : Evaluator[] + const queryAtom = useMemo(() => evaluatorsQueryAtomFamily(atomParams), [atomParams]) - const fetcher = useCallback(async (): Promise => { - if (preview) { - const response = await axios.post( - `/preview/simple/evaluators/query?project_id=${projectId}`, - queries - ? { - evaluator: { - flags: queries, - }, - } - : {}, - ) - const data = - (response?.data?.evaluators || []).map((evaluator) => - transformApiData({data: evaluator, members}), - ) || [] - const withMetrics = data.map((d) => ({ - ...d, - metrics: getMetricsFromEvaluator(d as EvaluatorDto), - })) - return withMetrics as unknown as Data - } else { - // Non-preview mode returns full Evaluator objects - const data = await fetchAllEvaluators() - return data as Data - } - }, [projectId, preview, queries]) + const queryResult = useAtomValue(queryAtom) - const data = useSWR( - user?.id && projectId - ? `/api${preview ? "/preview" : ""}/evaluators/?project_id=${projectId}&queries=${JSON.stringify(queries)}` - : null, - fetcher, - { - revalidateOnFocus: false, - shouldRetryOnError: false, - ...options, - }, - ) + useEffect(() => { + if (!onSuccess || !queryResult.isSuccess) return + onSuccess( + queryResult.data as + | (Preview extends true ? EvaluatorPreviewDto[] : Evaluator[]) + | undefined, + queryResult.queryKey ?? [], + undefined, + ) + }, [onSuccess, queryResult.data, queryResult.isSuccess, queryResult.queryKey]) + + useEffect(() => { + if (!onError || !queryResult.isError) return + onError(queryResult.error) + }, [onError, queryResult.error, queryResult.isError]) - return data + return { + data: queryResult.data as + | (Preview extends true ? EvaluatorPreviewDto[] : Evaluator[]) + | undefined, + error: queryResult.error, + isLoading: queryResult.isPending, + isPending: queryResult.isPending, + isError: queryResult.isError, + isSuccess: queryResult.isSuccess, + refetch: queryResult.refetch, + mutate: queryResult.refetch, + } } export default useEvaluators diff --git a/web/oss/src/lib/hooks/useFetchEvaluatorsData/index.tsx b/web/oss/src/lib/hooks/useFetchEvaluatorsData/index.tsx index 4a51520e21..497dbd8994 100644 --- a/web/oss/src/lib/hooks/useFetchEvaluatorsData/index.tsx +++ b/web/oss/src/lib/hooks/useFetchEvaluatorsData/index.tsx @@ -1,12 +1,11 @@ import {useCallback} from "react" import {useSetAtom} from "jotai" -import {SWRResponse} from "swr" import {evaluatorConfigsAtom, evaluatorsAtom} from "../../atoms/evaluation" import {Evaluator, EvaluatorConfig} from "../../Types" -import useEvaluatorConfigs from "../useEvaluatorConfigs" -import useEvaluators from "../useEvaluators" +import useEvaluatorConfigs, {UseEvaluatorConfigsReturn} from "../useEvaluatorConfigs" +import useEvaluators, {UseEvaluatorsReturn} from "../useEvaluators" import {EvaluatorPreviewDto} from "../useEvaluators/types" interface EvaluatorsData { @@ -15,8 +14,8 @@ interface EvaluatorsData { refetchEvaluators: () => Promise refetchEvaluatorConfigs: () => Promise refetchAll: () => Promise - evaluatorsSwr: SWRResponse - evaluatorConfigsSwr: SWRResponse + evaluatorsSwr: UseEvaluatorsReturn + evaluatorConfigsSwr: UseEvaluatorConfigsReturn } const useFetchEvaluatorsData = ( diff --git a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/deployments/index.tsx b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/deployments/index.tsx index a7c75bc458..46265127dc 100644 --- a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/deployments/index.tsx +++ b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/deployments/index.tsx @@ -78,7 +78,7 @@ const DeploymentsPage = () => { return (
- Deployment + Deployment ({ container: { @@ -66,7 +66,7 @@ const AppDetailsSection = memo(() => { return ( <> - {currentApp?.app_name || ""} + {currentApp?.app_name || ""} , +) => { + const {projectId} = getProjectValues() + + try { + const data = await axios.put( + `${getAgentaApiUrl()}/preview/simple/evaluators/${evaluatorId}?project_id=${projectId}`, + evaluatorPayload, + ) + + return data + } catch (error) { + throw error + } +} + const evaluatorIconsMap = { auto_exact_match: exactMatchImg, auto_similarity_match: similarityImg, @@ -117,3 +135,11 @@ export const deleteEvaluatorConfig = async (configId: string) => { return axios.delete(`/evaluators/configs/${configId}?project_id=${projectId}`) } + +export const deleteHumanEvaluator = async (evaluatorId: string) => { + const {projectId} = getProjectValues() + + return axios.post( + `${getAgentaApiUrl()}/preview/simple/evaluators/${evaluatorId}/archive?project_id=${projectId}`, + ) +} diff --git a/web/oss/src/state/evaluators/atoms.ts b/web/oss/src/state/evaluators/atoms.ts new file mode 100644 index 0000000000..1874aa5eb7 --- /dev/null +++ b/web/oss/src/state/evaluators/atoms.ts @@ -0,0 +1,103 @@ +import {atomFamily} from "jotai/utils" +import {atomWithQuery} from "jotai-tanstack-query" + +import axios from "@/oss/lib/api/assets/axiosConfig" +import {getMetricsFromEvaluator} from "@/oss/components/pages/observability/drawer/AnnotateDrawer/assets/transforms" +import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types" +import {transformApiData} from "@/oss/lib/hooks/useAnnotations/assets/transformer" +import { + EvaluatorDto, + EvaluatorPreviewDto, + EvaluatorsResponseDto, +} from "@/oss/lib/hooks/useEvaluators/types" +import {fetchAllEvaluators, fetchAllEvaluatorConfigs} from "@/oss/services/evaluators" +import {selectedAppIdAtom} from "@/oss/state/app" +import {selectedOrgAtom} from "@/oss/state/org" +import {projectIdAtom} from "@/oss/state/project" +import {userAtom} from "@/oss/state/profile" +import {EvaluatorConfigsParams, EvaluatorsParams} from "./types" +import {NO_APP_KEY, NO_PROJECT_KEY} from "./constant" +import {parseQueries} from "./parse" + +export const evaluatorConfigsQueryAtomFamily = atomFamily( + ({projectId: overrideProjectId, appId: overrideAppId, preview}: EvaluatorConfigsParams = {}) => + atomWithQuery((get) => { + const projectId = overrideProjectId ?? get(projectIdAtom) + const appId = overrideAppId ?? get(selectedAppIdAtom) + const user = get(userAtom) as {id?: string} | null + + const enabled = !preview && Boolean(projectId && user?.id) + const projectKey = projectId ?? NO_PROJECT_KEY + const appKey = appId ?? NO_APP_KEY + + return { + queryKey: [ + "evaluator-configs", + preview ? "preview" : "regular", + projectKey, + appKey, + ] as const, + queryFn: async () => fetchAllEvaluatorConfigs(appId, projectId), + staleTime: 60_000, + refetchOnWindowFocus: false, + refetchOnReconnect: false, + refetchOnMount: false, + retry: false, + enabled, + } + }), +) + +export const evaluatorsQueryAtomFamily = atomFamily( + ({projectId: overrideProjectId, preview, queriesKey}: EvaluatorsParams) => + atomWithQuery((get) => { + const projectId = overrideProjectId ?? get(projectIdAtom) + const user = get(userAtom) as {id?: string} | null + const selectedOrg = get(selectedOrgAtom) + const members = selectedOrg?.default_workspace?.members ?? [] + const projectKey = projectId ?? NO_PROJECT_KEY + + const enabled = Boolean(projectId && user?.id) + const queryKey = preview + ? (["evaluators", "preview", projectKey, queriesKey] as const) + : (["evaluators", projectKey] as const) + + return { + queryKey, + queryFn: async () => { + if (preview) { + if (!projectId) return [] + const flags = parseQueries(queriesKey) + const response = await axios.post( + `/preview/simple/evaluators/query?project_id=${projectId}`, + flags + ? { + evaluator: {flags}, + } + : {}, + ) + const evaluators = + response?.data?.evaluators?.map((item) => + transformApiData({ + data: item, + members, + }), + ) ?? [] + return evaluators.map((evaluator) => ({ + ...evaluator, + metrics: getMetricsFromEvaluator(evaluator as EvaluatorDto), + })) as EvaluatorPreviewDto[] + } + + const data = await fetchAllEvaluators() + return data + }, + staleTime: 60_000, + refetchOnWindowFocus: false, + refetchOnReconnect: false, + refetchOnMount: false, + retry: false, + enabled, + } + }), +) diff --git a/web/oss/src/state/evaluators/constant.ts b/web/oss/src/state/evaluators/constant.ts new file mode 100644 index 0000000000..ae583bb3cd --- /dev/null +++ b/web/oss/src/state/evaluators/constant.ts @@ -0,0 +1,2 @@ +export const NO_PROJECT_KEY = "__no-project__" +export const NO_APP_KEY = "__no-app__" diff --git a/web/oss/src/state/evaluators/index.ts b/web/oss/src/state/evaluators/index.ts new file mode 100644 index 0000000000..89eeef800a --- /dev/null +++ b/web/oss/src/state/evaluators/index.ts @@ -0,0 +1 @@ +export * from "./atoms" diff --git a/web/oss/src/state/evaluators/parse.ts b/web/oss/src/state/evaluators/parse.ts new file mode 100644 index 0000000000..4bb367bc66 --- /dev/null +++ b/web/oss/src/state/evaluators/parse.ts @@ -0,0 +1,8 @@ +export const parseQueries = (serialized: string) => { + if (!serialized || serialized === "null") return undefined + try { + return JSON.parse(serialized) as {is_human?: boolean} + } catch { + return undefined + } +} diff --git a/web/oss/src/state/evaluators/types.ts b/web/oss/src/state/evaluators/types.ts new file mode 100644 index 0000000000..1fa1bd7460 --- /dev/null +++ b/web/oss/src/state/evaluators/types.ts @@ -0,0 +1,11 @@ +export type EvaluatorConfigsParams = { + projectId?: string | null + appId?: string | null + preview?: boolean +} + +export type EvaluatorsParams = { + projectId?: string | null + preview: boolean + queriesKey: string +} diff --git a/web/package.json b/web/package.json index 641cbc9e9f..f8db4efb44 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "agenta-web", - "version": "0.58.0", + "version": "0.59.0", "workspaces": [ "ee", "oss",