From 9e59779bf017a6a5ccaf551c601a00bbee42ef5e Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Tue, 30 Dec 2025 16:01:34 +0100 Subject: [PATCH 01/31] refactor: replace icon imports with specific icon components in EvaluationRunsCreateButton and InfiniteVirtualTableFeatureShell --- .../components/EvaluationRunsCreateButton.tsx | 122 +++++++++--------- .../InfiniteVirtualTableFeatureShell.tsx | 6 +- 2 files changed, 62 insertions(+), 66 deletions(-) diff --git a/web/oss/src/components/EvaluationRunsTablePOC/components/EvaluationRunsCreateButton.tsx b/web/oss/src/components/EvaluationRunsTablePOC/components/EvaluationRunsCreateButton.tsx index ae14c4506..f6264e0a3 100644 --- a/web/oss/src/components/EvaluationRunsTablePOC/components/EvaluationRunsCreateButton.tsx +++ b/web/oss/src/components/EvaluationRunsTablePOC/components/EvaluationRunsCreateButton.tsx @@ -1,7 +1,7 @@ import {useCallback, useEffect, useMemo} from "react" -import {CaretDown, Check, Plus} from "@phosphor-icons/react" -import {Button, Dropdown, Space, Tooltip, type MenuProps} from "antd" +import {PlusIcon} from "@phosphor-icons/react" +import {Button, Dropdown, Tooltip, type MenuProps} from "antd" import {useAtom, useAtomValue} from "jotai" import { @@ -37,13 +37,17 @@ const createTypeCopy: Record< }, } -const isSupportedCreateType = (value: string): value is SupportedCreateType => - SUPPORTED_CREATE_TYPES.includes(value as SupportedCreateType) +const isSupportedCreateType = (value: unknown): value is SupportedCreateType => { + return typeof value === "string" && (SUPPORTED_CREATE_TYPES as string[]).includes(value) +} + +const FALLBACK_CREATE_TYPE: SupportedCreateType = "auto" const EvaluationRunsCreateButton = () => { const {createEnabled, createTooltip, evaluationKind, defaultCreateType, scope} = useAtomValue( evaluationRunsTableHeaderStateAtom, ) + const isAllTab = evaluationKind === "all" const isAppScoped = scope === "app" const [createOpen, setCreateOpen] = useAtom(evaluationRunsCreateModalOpenAtom) const [selectedCreateType, setSelectedCreateType] = useAtom( @@ -52,40 +56,50 @@ const EvaluationRunsCreateButton = () => { const [createTypePreference, setCreateTypePreference] = useAtom( evaluationRunsCreateTypePreferenceAtom, ) - const isAllTab = evaluationKind === "all" + + const availableTypes = useMemo(() => { + if (!isAllTab) return [] + if (isAppScoped) return SUPPORTED_CREATE_TYPES.filter((t) => t !== "online") + return SUPPORTED_CREATE_TYPES + }, [isAllTab, isAppScoped]) + + const normalizeAllTabType = useCallback( + (value: unknown): SupportedCreateType => { + const candidate = isSupportedCreateType(value) ? value : FALLBACK_CREATE_TYPE + return availableTypes.includes(candidate) + ? candidate + : (availableTypes[0] ?? FALLBACK_CREATE_TYPE) + }, + [availableTypes], + ) useEffect(() => { - if (!createEnabled && createOpen) { - setCreateOpen(false) - } + if (!createEnabled && createOpen) setCreateOpen(false) }, [createEnabled, createOpen, setCreateOpen]) useEffect(() => { - if (!isAllTab && defaultCreateType && selectedCreateType !== defaultCreateType) { - setSelectedCreateType(defaultCreateType) - } + if (isAllTab) return + if (!defaultCreateType) return + if (selectedCreateType !== defaultCreateType) setSelectedCreateType(defaultCreateType) }, [defaultCreateType, isAllTab, selectedCreateType, setSelectedCreateType]) useEffect(() => { if (!isAllTab) return - const normalizedPreference = isSupportedCreateType(createTypePreference) - ? createTypePreference - : "auto" - if (!isSupportedCreateType(createTypePreference)) { - setCreateTypePreference(normalizedPreference) - } - if (selectedCreateType !== normalizedPreference) { - setSelectedCreateType(normalizedPreference) - } + + const normalized = normalizeAllTabType(createTypePreference) + + if (createTypePreference !== normalized) setCreateTypePreference(normalized) + if (selectedCreateType !== normalized) setSelectedCreateType(normalized) }, [ - createTypePreference, isAllTab, + createTypePreference, selectedCreateType, setCreateTypePreference, setSelectedCreateType, + normalizeAllTabType, ]) - const handlePrimaryClick = useCallback(() => { + const openCreateModal = useCallback(() => { if (!createEnabled) return setCreateOpen(true) }, [createEnabled, setCreateOpen]) @@ -93,74 +107,56 @@ const EvaluationRunsCreateButton = () => { const handleMenuClick = useCallback>( ({key}) => { if (!isSupportedCreateType(key)) return - setSelectedCreateType(key) - setCreateTypePreference(key) - if (!createEnabled) return - setCreateOpen(true) + + const normalized = normalizeAllTabType(key) + + setSelectedCreateType(normalized) + setCreateTypePreference(normalized) + openCreateModal() }, - [createEnabled, setCreateOpen, setCreateTypePreference, setSelectedCreateType], + [normalizeAllTabType, openCreateModal, setCreateTypePreference, setSelectedCreateType], ) - const dropdownMenuItems = useMemo(() => { + const menuItems = useMemo(() => { if (!isAllTab) return [] - // Filter out "online" (Live Evaluation) in app-scoped views - const availableTypes = isAppScoped - ? SUPPORTED_CREATE_TYPES.filter((type) => type !== "online") - : SUPPORTED_CREATE_TYPES + return availableTypes.map((type) => { const copy = createTypeCopy[type] - const isActive = selectedCreateType === type return { key: type, label: ( -
-
- {isActive ? : null} -
-
- {copy.title} - {copy.description} -
+
+ {copy.title} + {copy.description}
), } }) - }, [isAllTab, isAppScoped, selectedCreateType]) - - const buttonLabel = useMemo(() => { - if (!isAllTab) return "New Evaluation" - const shortLabel = isSupportedCreateType(selectedCreateType) - ? createTypeCopy[selectedCreateType]?.short - : null - return shortLabel ? `New ${shortLabel} Evaluation` : "New Evaluation" - }, [isAllTab, selectedCreateType]) + }, [availableTypes, isAllTab]) return (
{isAllTab ? ( - + - - diff --git a/web/oss/src/components/InfiniteVirtualTable/features/InfiniteVirtualTableFeatureShell.tsx b/web/oss/src/components/InfiniteVirtualTable/features/InfiniteVirtualTableFeatureShell.tsx index 8d7934de0..ccf275af3 100644 --- a/web/oss/src/components/InfiniteVirtualTable/features/InfiniteVirtualTableFeatureShell.tsx +++ b/web/oss/src/components/InfiniteVirtualTable/features/InfiniteVirtualTableFeatureShell.tsx @@ -1,7 +1,7 @@ import type {CSSProperties, Key, ReactNode} from "react" import {useCallback, useEffect, useMemo, useState} from "react" -import {Trash} from "@phosphor-icons/react" +import {TrashIcon} from "@phosphor-icons/react" import {Button, Grid, Tabs, Tooltip} from "antd" import type {MenuProps} from "antd" import clsx from "clsx" @@ -358,7 +358,7 @@ function InfiniteVirtualTableFeatureShellBase( ) From 23b05c02434ad72585f3f6b538cc59f0416bb93f Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 31 Dec 2025 15:15:28 +0100 Subject: [PATCH 02/31] refactor: improve layout and styling in evaluation result components --- .../components/CompareRunsMenu.tsx | 46 ++++++++----------- .../EvalRunDetails/components/Page.tsx | 5 +- .../components/views/OverviewView.tsx | 4 +- .../components/BaseRunMetricsSection.tsx | 15 ++---- .../src/components/PageLayout/PageLayout.tsx | 4 +- web/oss/src/styles/evaluations.css | 6 ++- 6 files changed, 34 insertions(+), 46 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx b/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx index 21759f163..2c1d2ed89 100644 --- a/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx +++ b/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx @@ -259,7 +259,7 @@ const CompareRunsPopoverContent = memo(({runId, availability}: CompareRunsPopove return ( -
+
{availability.testsetIds.length ? ( @@ -280,31 +280,21 @@ const CompareRunsPopoverContent = memo(({runId, availability}: CompareRunsPopove ) : null} -
- -
- - Selected {compareIds.length}/{MAX_COMPARISON_RUNS} - -
- {selectedDetails.map((run) => ( - { - event.preventDefault() - handleRemove(run.id) - }} + + + Selected {compareIds.length}/{MAX_COMPARISON_RUNS} + + {compareIds.length ? ( +
- {compareIds.length ? ( - - ) : null} + Clear all + + ) : null} +
{item.status ? : null} {createdLabel ? ( @@ -457,8 +447,8 @@ const TestsetReferenceTag = ({ label={label} copyValue={copyValue} href={href} - tone="testset" className="max-w-[200px]" + showIcon={false} /> ) diff --git a/web/oss/src/components/EvalRunDetails/components/Page.tsx b/web/oss/src/components/EvalRunDetails/components/Page.tsx index 2693c8cb9..4704e2b56 100644 --- a/web/oss/src/components/EvalRunDetails/components/Page.tsx +++ b/web/oss/src/components/EvalRunDetails/components/Page.tsx @@ -130,6 +130,7 @@ const EvalRunPreviewPage = ({runId, evaluationType, projectId = null}: EvalRunPr return ( setActiveViewParam(v)} /> } - headerClassName="px-2" + headerClassName="px-4" > -
+
{ const comparisonRunIds = useMemo(() => runIds.slice(1), [runIds]) return ( -
+
-
+
{baseRunId ? ( - {runDisplayName} -
- } - > -
-
{renderContent()}
-
- +
+
{renderContent()}
+
) } diff --git a/web/oss/src/components/PageLayout/PageLayout.tsx b/web/oss/src/components/PageLayout/PageLayout.tsx index dfca3c961..f3437c330 100644 --- a/web/oss/src/components/PageLayout/PageLayout.tsx +++ b/web/oss/src/components/PageLayout/PageLayout.tsx @@ -5,6 +5,7 @@ import classNames from "classnames" interface PageLayoutProps { title?: ReactNode + titleLevel?: 1 | 2 | 3 | 4 | 5 headerTabs?: ReactNode headerTabsProps?: TabsProps children: ReactNode @@ -14,6 +15,7 @@ interface PageLayoutProps { const PageLayout = ({ title, + titleLevel = 5, headerTabs, headerTabsProps, children, @@ -35,7 +37,7 @@ const PageLayout = ({ headerClassName, )} > - + {title} {headerTabsContent ? ( diff --git a/web/oss/src/styles/evaluations.css b/web/oss/src/styles/evaluations.css index 8df14e725..44a4ef786 100644 --- a/web/oss/src/styles/evaluations.css +++ b/web/oss/src/styles/evaluations.css @@ -139,7 +139,6 @@ .metadata-summary-table .ant-table, .metadata-summary-table .ant-table-container, .metadata-summary-table .ant-table-content { - border: none !important; box-shadow: none !important; } @@ -161,6 +160,11 @@ border-bottom: none; } +.metadata-summary-table .ant-tag { + margin-inline-end: 0; + margin-bottom: 0; +} + .agenta-scenario-table .ant-table-tbody > tr > td { padding: 0 !important; vertical-align: top; From b0e990447faea1c55dfc8a39eb9f85c15d64ffb2 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 31 Dec 2025 15:16:16 +0100 Subject: [PATCH 03/31] refactor: enhance layout and add typography to AggregatedOverviewSection and MetadataSummaryTable --- .../components/AggregatedOverviewSection.tsx | 24 ++-- .../components/MetadataSummaryTable.tsx | 118 ++++++++++-------- 2 files changed, 82 insertions(+), 60 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/AggregatedOverviewSection.tsx b/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/AggregatedOverviewSection.tsx index 13dd92230..d0e8a8cfa 100644 --- a/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/AggregatedOverviewSection.tsx +++ b/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/AggregatedOverviewSection.tsx @@ -1,6 +1,6 @@ import {memo, useMemo} from "react" -import {Card} from "antd" +import {Card, Typography} from "antd" import useURL from "@/oss/hooks/useURL" @@ -19,14 +19,24 @@ const AggregatedOverviewSection = ({runIds}: AggregatedOverviewSectionProps) => } return ( - +
-
-
- +
+
+ + Evaluator Scores Overview + + + Average evaluator score across evaluations +
-
- +
+
+ +
+
+ +
diff --git a/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/MetadataSummaryTable.tsx b/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/MetadataSummaryTable.tsx index 0ca7cdf8d..4dd9f9684 100644 --- a/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/MetadataSummaryTable.tsx +++ b/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/MetadataSummaryTable.tsx @@ -10,7 +10,6 @@ import useEvaluatorReference from "@/oss/components/References/hooks/useEvaluato import type {BasicStats} from "@/oss/lib/metricUtils" import {useProjectData} from "@/oss/state/project" -import {getComparisonColor} from "../../../../atoms/compare" import {evaluationQueryRevisionAtomFamily} from "../../../../atoms/query" import { runCreatedAtAtomFamily, @@ -206,14 +205,19 @@ const StatusCell = ({runId}: MetadataCellProps) => { } const ApplicationCell = ({runId, projectURL}: MetadataCellProps) => ( -
- +
+
) const LegacyVariantCell = memo(({runId}: MetadataCellProps) => ( -
- +
+
)) @@ -235,8 +239,10 @@ const MetadataRunNameCell = memo( runId ?? "—" const accent = - accentColor ?? - (typeof runData?.accentColor === "string" ? (runData as any).accentColor : null) + accentColor === null + ? null + : accentColor ?? + (typeof runData?.accentColor === "string" ? (runData as any).accentColor : null) return (
@@ -248,7 +254,18 @@ const MetadataRunNameCell = memo( const LegacyTestsetsCell = memo(({runId, projectURL}: MetadataCellProps) => { const testsetAtom = useMemo(() => runTestsetIdsAtomFamily(runId), [runId]) const testsetIds = useAtomValueWithSchedule(testsetAtom, {priority: LOW_PRIORITY}) ?? [] - return + return ( +
+ +
+ ) }) const formatCurrency = (value: number | undefined | null) => { @@ -362,7 +379,14 @@ const InvocationErrorsCell = makeMetricCell("attributes.ag.metrics.errors.cumula }) const METADATA_ROWS: MetadataRowRecord[] = [ - {key: "evaluations", label: "Evaluations", Cell: MetadataRunNameCell}, + { + key: "testsets", + label: "Test set", + Cell: LegacyTestsetsCell, + shouldDisplay: ({snapshots}) => + snapshots.some(({testsetIds}) => (testsetIds?.length ?? 0) > 0), + }, + {key: "evaluation", label: "Evaluation", Cell: MetadataRunNameCell}, {key: "status", label: "Status", Cell: StatusCell}, {key: "created", label: "Created at", Cell: CreatedCell}, {key: "updated", label: "Updated at", Cell: UpdatedCell}, @@ -400,13 +424,6 @@ const METADATA_ROWS: MetadataRowRecord[] = [ ) }), }, - { - key: "testsets", - label: "Test sets", - Cell: LegacyTestsetsCell, - shouldDisplay: ({snapshots}) => - snapshots.some(({testsetIds}) => (testsetIds?.length ?? 0) > 0), - }, // {key: "scenarios", label: "Scenarios evaluated", Cell: ScenarioCountCell}, {key: "invocation_cost", label: "Cost (Total)", Cell: InvocationCostCell}, {key: "invocation_duration", label: "Duration (Total)", Cell: InvocationDurationCell}, @@ -422,7 +439,7 @@ const EvaluatorNameLabel = ({evaluatorId}: {evaluatorId: string}) => { const MetadataSummaryTable = ({runIds, projectURL}: MetadataSummaryTableProps) => { const orderedRunIds = useMemo(() => runIds.filter((id): id is string => Boolean(id)), [runIds]) - const {metricSelections, runColorMap, runDescriptors} = useRunMetricData(orderedRunIds) + const {metricSelections, runDescriptors} = useRunMetricData(orderedRunIds) const runReferenceSnapshotsAtom = useMemo( () => atom((get) => @@ -605,8 +622,6 @@ const MetadataSummaryTable = ({runIds, projectURL}: MetadataSummaryTableProps) = return rows }, [anyHasQuery, evaluatorMetricRows, rowContext]) - const isComparison = orderedRunIds.length > 1 - const columns = useMemo>(() => { const baseColumn = { title: null, @@ -625,47 +640,44 @@ const MetadataSummaryTable = ({runIds, projectURL}: MetadataSummaryTableProps) = key: runId, width: 160, onCell: (record: MetadataRowRecord) => { - if (!isComparison || record.key === "query_config") { - return {} + if (record.key === "testsets") { + return index === 0 ? {colSpan: orderedRunIds.length} : {colSpan: 0} } - const tone = getComparisonColor(index) - return tone ? {style: {backgroundColor: tone}} : {} + return {} + }, + render: (_: unknown, record: MetadataRowRecord) => { + if (record.key === "testsets" && index !== 0) { + return null + } + return ( + + ) }, - render: (_: unknown, record: MetadataRowRecord) => ( - - ), })) return [baseColumn, ...runColumns] - }, [isComparison, orderedRunIds, projectURL, runColorMap, runNameMap]) + }, [orderedRunIds, projectURL, runNameMap]) return ( -
-
- Evaluator Scores Overview - - Average evaluator score across evaluations - -
-
-
- - className="metadata-summary-table" - rowKey="key" - size="small" - pagination={false} - columns={columns} - dataSource={dataSource} - scroll={{x: "max-content"}} - showHeader={false} - /> -
+
+
+ + className="metadata-summary-table" + rowKey="key" + size="small" + pagination={false} + columns={columns} + dataSource={dataSource} + scroll={{x: "max-content"}} + showHeader={false} + bordered={true} + />
) From e301624959ddffa1b5e3894019d910c39d954898 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 31 Dec 2025 15:16:51 +0100 Subject: [PATCH 04/31] refactor: enhance EvaluatorMetricsChart with delta computation and improved summary display --- .../EvaluatorMetricsChart/index.tsx | 242 +++++++++++++++--- 1 file changed, 203 insertions(+), 39 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx b/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx index a822f96bc..3405904dc 100644 --- a/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx +++ b/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx @@ -41,6 +41,38 @@ interface EvaluatorLabelProps { fallbackLabel: string } +type MetricDeltaTone = "positive" | "negative" | "neutral" + +interface MetricStripEntry { + key: string + label: string + color: string + value: number | null + displayValue: string + isMain: boolean + deltaText: string + deltaTone: MetricDeltaTone +} + +const getMainEvaluatorSeries = (entries: MetricStripEntry[]) => + entries.find((entry) => entry.isMain) ?? entries[0] + +const computeDeltaPercent = (current: number | null, baseline: number | null) => { + if (typeof current !== "number" || typeof baseline !== "number") return null + if (!Number.isFinite(current) || !Number.isFinite(baseline) || baseline === 0) return null + return ((current - baseline) / baseline) * 100 +} + +const formatDelta = (delta: number | null): {text: string; tone: MetricDeltaTone} => { + if (delta === null || !Number.isFinite(delta)) { + return {text: "-", tone: "neutral"} + } + const rounded = Math.round(delta) + if (rounded > 0) return {text: `+${rounded}%`, tone: "positive"} + if (rounded < 0) return {text: `${rounded}%`, tone: "negative"} + return {text: "0%", tone: "neutral"} +} + const EvaluatorMetricsChartTitle = memo( ({runId, evaluatorRef, fallbackLabel}: EvaluatorLabelProps) => { const evaluatorAtom = useMemo( @@ -243,25 +275,145 @@ const EvaluatorMetricsChart = ({ (isBooleanMetric && booleanChartData.length > 0) || hasCategoricalFrequency - const summaryValue = useMemo((): string | null => { - if (isBooleanMetric) { - const percentage = booleanHistogram.percentages.true - return Number.isFinite(percentage) ? `${percentage.toFixed(2)}%` : "—" - } - if (hasCategoricalFrequency && categoricalFrequencyData.length) { - return null + const comparisonBooleanPercentMap = useMemo(() => { + const map = new Map() + comparisonBooleanHistograms.forEach((entry) => { + if (Number.isFinite(entry.histogram.percentages.true)) { + map.set(entry.runId, entry.histogram.percentages.true) + } + }) + return map + }, [comparisonBooleanHistograms]) + + const summaryItems = useMemo(() => { + const baseValue = (() => { + if (!resolvedStats) return {value: null, displayValue: "—"} + if (isBooleanMetric) { + const percentage = booleanHistogram.percentages.true + return Number.isFinite(percentage) + ? {value: percentage, displayValue: `${percentage.toFixed(2)}%`} + : {value: null, displayValue: "—"} + } + if (hasCategoricalFrequency) { + return {value: null, displayValue: "—"} + } + if (typeof resolvedStats.mean === "number" && Number.isFinite(resolvedStats.mean)) { + return {value: resolvedStats.mean, displayValue: format3Sig(resolvedStats.mean)} + } + return {value: null, displayValue: "—"} + })() + + const baseEntry: MetricStripEntry = { + key: baseSeriesKey, + label: resolvedRunName, + color: resolvedBaseColor, + value: baseValue.value, + displayValue: baseValue.displayValue, + isMain: true, + deltaText: "-", + deltaTone: "neutral", } - if (typeof stats.mean === "number") return format3Sig(stats.mean) - return "—" + + const comparisonEntries = comparisonSeries.map((entry) => { + const statsValue = entry.stats + if (!statsValue) { + return { + key: entry.runId, + label: entry.runName, + color: entry.color, + value: null, + displayValue: "—", + isMain: false, + deltaText: "-", + deltaTone: "neutral", + } + } + if (isBooleanMetric) { + const percentage = comparisonBooleanPercentMap.get(entry.runId) + return { + key: entry.runId, + label: entry.runName, + color: entry.color, + value: typeof percentage === "number" ? percentage : null, + displayValue: + typeof percentage === "number" && Number.isFinite(percentage) + ? `${percentage.toFixed(2)}%` + : "—", + isMain: false, + deltaText: "-", + deltaTone: "neutral", + } + } + if (hasCategoricalFrequency) { + return { + key: entry.runId, + label: entry.runName, + color: entry.color, + value: null, + displayValue: "—", + isMain: false, + deltaText: "-", + deltaTone: "neutral", + } + } + if (typeof statsValue.mean === "number" && Number.isFinite(statsValue.mean)) { + return { + key: entry.runId, + label: entry.runName, + color: entry.color, + value: statsValue.mean, + displayValue: format3Sig(statsValue.mean), + isMain: false, + deltaText: "-", + deltaTone: "neutral", + } + } + return { + key: entry.runId, + label: entry.runName, + color: entry.color, + value: null, + displayValue: "—", + isMain: false, + deltaText: "-", + deltaTone: "neutral", + } + }) + + const entries = [baseEntry, ...comparisonEntries] + const mainSeries = getMainEvaluatorSeries(entries) + + return entries.map((entry) => { + if (entry.isMain) { + return entry + } + const delta = computeDeltaPercent(entry.value, mainSeries?.value ?? null) + const formatted = formatDelta(delta) + return { + ...entry, + deltaText: formatted.text, + deltaTone: formatted.tone, + } + }) }, [ + baseSeriesKey, booleanHistogram.percentages.true, - categoricalFrequencyData, - effectiveScenarioCount, + comparisonBooleanPercentMap, + comparisonSeries, hasCategoricalFrequency, isBooleanMetric, - stats, + resolvedBaseColor, + resolvedRunName, + resolvedStats, ]) + const metricsGridClass = useMemo(() => { + if (summaryItems.length <= 1) return "grid-cols-1" + if (summaryItems.length === 2) return "grid-cols-2" + if (summaryItems.length === 3) return "grid-cols-3" + return "grid-cols-2 sm:grid-cols-4" + }, [summaryItems.length]) + const chartContent = () => { if (isBooleanMetric) { if (!booleanChartData.length) { @@ -443,10 +595,11 @@ const EvaluatorMetricsChart = ({ return ( + > +
+
- } - > -
- {stableComparisons.length === 0 && ( -
- {summaryValue !== null ? ( - - {summaryValue} - - ) : null} +
+
+ {summaryItems.map((entry) => ( +
+ + {entry.displayValue} + + + {entry.deltaText} + +
+ ))} +
+
+
+
+
+ {isLoading ? ( + + ) : hasError && !resolvedStats ? ( +
+ Unable to load metric data. +
+ ) : ( + chartContent() + )}
- )} -
0 ? "h-[370px]" : "h-[300px]"}> - {isLoading ? ( - - ) : hasError && !resolvedStats ? ( -
- Unable to load metric data. -
- ) : ( - chartContent() - )}
From d73c4e52dcc3411faf129ee11e79e1640e50b4ca Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 31 Dec 2025 15:17:00 +0100 Subject: [PATCH 05/31] refactor: add toneOverride and showIconOverride props to reference label components --- .../references/EvalReferenceLabels.tsx | 31 ++++++++++++++++ .../components/References/ReferenceLabels.tsx | 35 ++++++++++++++++--- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/references/EvalReferenceLabels.tsx b/web/oss/src/components/EvalRunDetails/components/references/EvalReferenceLabels.tsx index e611ed6bb..c84e88e31 100644 --- a/web/oss/src/components/EvalRunDetails/components/references/EvalReferenceLabels.tsx +++ b/web/oss/src/components/EvalRunDetails/components/references/EvalReferenceLabels.tsx @@ -15,6 +15,7 @@ import { VariantReferenceText as GenericVariantReferenceText, VariantRevisionLabel as GenericVariantRevisionLabel, } from "@/oss/components/References" +import type {ReferenceTone} from "@/oss/components/References/referenceColors" import {variantReferenceQueryAtomFamily} from "../../atoms/references" import {effectiveProjectIdAtom} from "../../atoms/run" @@ -30,10 +31,14 @@ export const TestsetTag = memo( testsetId, projectURL, runId, + toneOverride, + showIconOverride, }: { testsetId: string projectURL?: string | null runId?: string | null + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const projectId = useAtomValue(effectiveProjectIdAtom) const {buildTestsetHref} = useRunScopedUrls(runId) @@ -44,6 +49,8 @@ export const TestsetTag = memo( testsetId={testsetId} projectId={projectId} projectURL={href ? undefined : projectURL} + toneOverride={toneOverride} + showIconOverride={showIconOverride} /> ) }, @@ -59,11 +66,15 @@ export const TestsetTagList = memo( projectURL, runId, className, + toneOverride, + showIconOverride, }: { ids: string[] projectURL?: string | null runId?: string | null className?: string + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const projectId = useAtomValue(effectiveProjectIdAtom) const {buildTestsetHref} = useRunScopedUrls(runId) @@ -78,6 +89,8 @@ export const TestsetTagList = memo( projectId={projectId} projectURL={resolvedProjectURL ?? projectURL} className={className} + toneOverride={toneOverride} + showIconOverride={showIconOverride} /> ) }, @@ -92,10 +105,14 @@ export const ApplicationReferenceLabel = memo( runId, applicationId: explicitApplicationId, projectURL: explicitProjectURL, + toneOverride, + showIconOverride, }: { runId?: string | null applicationId?: string | null projectURL?: string | null + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const projectId = useAtomValue(effectiveProjectIdAtom) const {applicationId: runApplicationId} = useRunIdentifiers(runId) @@ -112,6 +129,8 @@ export const ApplicationReferenceLabel = memo( projectId={projectId} projectURL={explicitProjectURL ?? scopedProjectURL} href={appDetailHref} + toneOverride={toneOverride} + showIconOverride={showIconOverride} /> ) }, @@ -129,6 +148,8 @@ export const VariantReferenceLabel = memo( fallbackLabel, showVersionPill = false, explicitVersion, + toneOverride, + showIconOverride, }: { variantId?: string | null applicationId?: string | null @@ -136,6 +157,8 @@ export const VariantReferenceLabel = memo( fallbackLabel?: string | null showVersionPill?: boolean explicitVersion?: number | string | null + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const projectId = useAtomValue(effectiveProjectIdAtom) const {variantId: runVariantId, applicationId: runApplicationId} = useRunIdentifiers(runId) @@ -153,6 +176,8 @@ export const VariantReferenceLabel = memo( showVersionPill={showVersionPill} explicitVersion={explicitVersion} href={href} + toneOverride={toneOverride} + showIconOverride={showIconOverride} /> ) }, @@ -172,6 +197,8 @@ export const VariantRevisionLabel = memo( runId, fallbackVariantName, fallbackRevision, + toneOverride, + showIconOverride, }: { variantId?: string | null revisionId?: string | null @@ -179,6 +206,8 @@ export const VariantRevisionLabel = memo( runId?: string | null fallbackVariantName?: string | null fallbackRevision?: number | string | null + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const projectId = useAtomValue(effectiveProjectIdAtom) const { @@ -235,6 +264,8 @@ export const VariantRevisionLabel = memo( fallbackVariantName={resolvedVariantName} fallbackRevision={resolvedRevision} href={href} + toneOverride={toneOverride} + showIconOverride={showIconOverride} /> ) }, diff --git a/web/oss/src/components/References/ReferenceLabels.tsx b/web/oss/src/components/References/ReferenceLabels.tsx index efae728f9..430e5160d 100644 --- a/web/oss/src/components/References/ReferenceLabels.tsx +++ b/web/oss/src/components/References/ReferenceLabels.tsx @@ -12,6 +12,7 @@ import { queryReferenceAtomFamily, variantConfigAtomFamily, } from "./atoms/entityReferences" +import type {ReferenceTone} from "./referenceColors" import ReferenceTag from "./ReferenceTag" const {Text} = Typography @@ -25,11 +26,15 @@ export const TestsetTag = memo( testsetId, projectId, projectURL, + toneOverride, + showIconOverride, openExternally = false, }: { testsetId: string projectId: string | null projectURL?: string | null + toneOverride?: ReferenceTone | null + showIconOverride?: boolean openExternally?: boolean }) => { const queryAtom = useMemo( @@ -56,7 +61,8 @@ export const TestsetTag = memo( tooltip={isDeleted ? `Testset ${testsetId} was deleted` : label} copyValue={testsetId} className="max-w-[220px] w-fit" - tone="testset" + tone={toneOverride === null ? undefined : toneOverride ?? "testset"} + showIcon={showIconOverride ?? true} openExternally={openExternally} /> ) @@ -169,12 +175,16 @@ export const TestsetTagList = memo( projectId, projectURL, className, + toneOverride, + showIconOverride, openExternally = false, }: { ids: string[] projectId: string | null projectURL?: string | null className?: string + toneOverride?: ReferenceTone | null + showIconOverride?: boolean openExternally?: boolean }) => { if (!ids.length) { @@ -189,6 +199,8 @@ export const TestsetTagList = memo( testsetId={id} projectId={projectId} projectURL={projectURL} + toneOverride={toneOverride} + showIconOverride={showIconOverride} openExternally={openExternally} /> ))} @@ -209,6 +221,8 @@ export const ApplicationReferenceLabel = memo( href: explicitHref, openExternally = false, label: customLabel, + toneOverride, + showIconOverride, }: { applicationId: string | null projectId: string | null @@ -216,6 +230,8 @@ export const ApplicationReferenceLabel = memo( href?: string | null openExternally?: boolean label?: string + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const queryAtom = useMemo( () => appReferenceAtomFamily({projectId, appId: applicationId}), @@ -255,7 +271,8 @@ export const ApplicationReferenceLabel = memo( tooltip={isDeleted ? `Application ${applicationId} was deleted` : label} copyValue={applicationId ?? undefined} className="max-w-[220px] w-fit" - tone="app" + tone={toneOverride === null ? undefined : toneOverride ?? "app"} + showIcon={showIconOverride ?? true} openExternally={openExternally} /> ) @@ -277,6 +294,8 @@ export const VariantReferenceLabel = memo( href: explicitHref, openExternally = false, label: customLabel, + toneOverride, + showIconOverride, }: { revisionId?: string | null projectId: string | null @@ -286,6 +305,8 @@ export const VariantReferenceLabel = memo( href?: string | null openExternally?: boolean label?: string + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { const queryAtom = useMemo( () => variantConfigAtomFamily({projectId, revisionId}), @@ -329,7 +350,8 @@ export const VariantReferenceLabel = memo( tooltip={isDeleted ? `Variant ${revisionId} was deleted` : label} copyValue={revisionId ?? undefined} className="max-w-[220px]" - tone="variant" + tone={toneOverride === null ? undefined : toneOverride ?? "variant"} + showIcon={showIconOverride ?? true} openExternally={openExternally} /> {showVersionPill && resolvedVersion ? ( @@ -355,6 +377,8 @@ export const VariantRevisionLabel = memo( fallbackVariantName, fallbackRevision, href: explicitHref, + toneOverride, + showIconOverride, }: { variantId?: string | null revisionId?: string | null @@ -362,6 +386,8 @@ export const VariantRevisionLabel = memo( fallbackVariantName?: string | null fallbackRevision?: number | string | null href?: string | null + toneOverride?: ReferenceTone | null + showIconOverride?: boolean }) => { // Fetch variant config using revisionId to get revision number const configQueryAtom = useMemo( @@ -411,7 +437,8 @@ export const VariantRevisionLabel = memo( tooltip={isDeleted ? `Variant ${revisionId ?? variantId} was deleted` : label} copyValue={revisionId ?? variantId ?? undefined} className="max-w-[220px]" - tone="variant" + tone={toneOverride === null ? undefined : toneOverride ?? "variant"} + showIcon={showIconOverride ?? true} /> ) }, From 3ebb2e5362e6fe75730a9911889ea827f8f37ed9 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 31 Dec 2025 15:17:05 +0100 Subject: [PATCH 06/31] refactor: enhance PreviewEvalRunMeta with comparison functionality and improved UI elements --- .../components/PreviewEvalRunHeader.tsx | 115 +++++++++++++----- 1 file changed, 85 insertions(+), 30 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/PreviewEvalRunHeader.tsx b/web/oss/src/components/EvalRunDetails/components/PreviewEvalRunHeader.tsx index a3b75e74b..5e4046223 100644 --- a/web/oss/src/components/EvalRunDetails/components/PreviewEvalRunHeader.tsx +++ b/web/oss/src/components/EvalRunDetails/components/PreviewEvalRunHeader.tsx @@ -1,10 +1,11 @@ import {memo, useCallback, useMemo, useState} from "react" -import {Pause, Play} from "@phosphor-icons/react" +import {PushpinFilled} from "@ant-design/icons" +import {PauseIcon, PlayIcon, XCircleIcon} from "@phosphor-icons/react" import {useQueryClient} from "@tanstack/react-query" -import {Button, Space, Tabs, Tag, Tooltip} from "antd" +import {Button, Tabs, Tag, Tooltip, Typography} from "antd" import clsx from "clsx" -import {useAtomValue} from "jotai" +import {atom, useAtomValue, useSetAtom} from "jotai" import {message} from "@/oss/components/AppMessageContext" import dayjs from "@/oss/lib/helpers/dateTimeHelper/dayjs" @@ -12,6 +13,13 @@ import {invalidatePreviewRunCache} from "@/oss/lib/hooks/usePreviewEvaluations/a import {startSimpleEvaluation, stopSimpleEvaluation} from "@/oss/services/onlineEvaluations/api" import { + compareRunIdsAtom, + compareRunIdsWriteAtom, + getComparisonColor, + getComparisonSolidColor, +} from "../atoms/compare" +import { + runDisplayNameAtomFamily, runInvocationRefsAtomFamily, runTestsetIdsAtomFamily, runFlagsAtomFamily, @@ -21,15 +29,6 @@ import {previewEvalTypeAtom} from "../state/evalType" import CompareRunsMenu from "./CompareRunsMenu" -const statusColor = (status?: string | null) => { - if (!status) return "default" - const normalized = status.toLowerCase() - if (normalized.includes("success") || normalized.includes("completed")) return "green" - if (normalized.includes("fail") || normalized.includes("error")) return "red" - if (normalized.includes("running") || normalized.includes("queued")) return "blue" - return "default" -} - type ActiveView = "overview" | "focus" | "scenarios" | "configuration" const useOnlineEvaluationActions = (runId: string, projectId?: string | null) => { @@ -156,6 +155,30 @@ const PreviewEvalRunMeta = ({ const _testsetIds = useAtomValue(useMemo(() => runTestsetIdsAtomFamily(runId), [runId])) const {canStopOnline, handleOnlineAction, onlineAction, showOnlineAction} = useOnlineEvaluationActions(runId, projectId) + const compareRunIds = useAtomValue(compareRunIdsAtom) + const setCompareRunIds = useSetAtom(compareRunIdsWriteAtom) + + const orderedRunIds = useMemo(() => { + const ids = [runId, ...compareRunIds].filter((id): id is string => Boolean(id)) + const seen = new Set() + return ids.filter((id) => { + if (seen.has(id)) return false + seen.add(id) + return true + }) + }, [compareRunIds, runId]) + + const runDescriptorsAtom = useMemo( + () => + atom((get) => + orderedRunIds.map((id) => ({ + id, + name: get(runDisplayNameAtomFamily(id)), + })), + ), + [orderedRunIds], + ) + const runDescriptors = useAtomValue(runDescriptorsAtom) const runData = runQuery.data?.camelRun ?? runQuery.data?.rawRun ?? null const runStatus = runData?.status ?? null @@ -169,30 +192,62 @@ const PreviewEvalRunMeta = ({ const lastUpdated = updatedMoment?.isValid() ? updatedMoment.fromNow() : undefined return ( -
- - {runStatus ? ( - <> - - {runStatus} - - - ) : null} - {lastUpdated ? ( - - - Updated {lastUpdated} - - - ) : null} - +
+
+ Evaluations: +
+ {runDescriptors.map((run, index) => { + const isBaseRun = index === 0 + const tagColor = getComparisonSolidColor(index) + const tagBg = getComparisonColor(index) + return ( + + ) : undefined + } + closable={!isBaseRun} + closeIcon={ + !isBaseRun ? ( + + ) : undefined + } + onClose={ + !isBaseRun + ? (event) => { + event.preventDefault() + setCompareRunIds((prev) => + prev.filter((id) => id !== run.id), + ) + } + : undefined + } + > + {run.name} + + ) + })} +
+
+
{showOnlineAction ? (
+ {selectedOption?.description ? ( - - + + {selectedOption.description} - - + + ) : null}
) diff --git a/web/oss/src/components/EvalRunDetails/components/FocusDrawerSidePanel.tsx b/web/oss/src/components/EvalRunDetails/components/FocusDrawerSidePanel.tsx index 7e1af0e46..fb78d62c5 100644 --- a/web/oss/src/components/EvalRunDetails/components/FocusDrawerSidePanel.tsx +++ b/web/oss/src/components/EvalRunDetails/components/FocusDrawerSidePanel.tsx @@ -1,10 +1,11 @@ -import {memo, useCallback, useMemo} from "react" -import type {Key} from "react" +import {memo, useCallback, useMemo, useState} from "react" +import type {ReactNode} from "react" import {TreeStructure, Download, Sparkle, Speedometer} from "@phosphor-icons/react" -import {Skeleton, Tree, type TreeDataNode} from "antd" +import {Skeleton} from "antd" import {useAtomValue} from "jotai" +import CustomTreeComponent from "@/oss/components/CustomUIs/CustomTreeComponent" import {useInfiniteTablePagination} from "@/oss/components/InfiniteVirtualTable" import {evaluationPreviewTableStore} from "../evaluationPreviewTableStore" @@ -16,7 +17,14 @@ const toSectionAnchorId = (value: string) => .replace(/[^a-z0-9]+/g, "-") .replace(/^-+|-+$/g, "")}` -type AnchorTreeNode = TreeDataNode & {anchorId?: string} +interface FocusTreeNode { + id: string + title: string + icon?: ReactNode + anchorId?: string + children?: FocusTreeNode[] + expanded?: boolean +} interface FocusDrawerSidePanelProps { runId: string @@ -26,6 +34,7 @@ interface FocusDrawerSidePanelProps { const FocusDrawerSidePanel = ({runId, scenarioId}: FocusDrawerSidePanelProps) => { const {columnResult} = usePreviewTableData({runId}) const evalType = useAtomValue(previewEvalTypeAtom) + const [selectedKey, setSelectedKey] = useState(null) const {rows} = useInfiniteTablePagination({ store: evaluationPreviewTableStore, @@ -57,11 +66,11 @@ const FocusDrawerSidePanel = ({runId, scenarioId}: FocusDrawerSidePanelProps) => return map }, [columnResult?.groups]) - const evaluatorNodes = useMemo(() => { + const evaluatorNodes = useMemo(() => { if (!columnResult?.evaluators?.length) return [] return columnResult.evaluators.map((evaluator) => ({ title: evaluator.name ?? evaluator.slug ?? "Evaluator", - key: `evaluator:${evaluator.id ?? evaluator.slug ?? evaluator.name}`, + id: `evaluator:${evaluator.id ?? evaluator.slug ?? evaluator.name}`, icon: , anchorId: (evaluator.id && groupAnchorMap.get(`annotation:${evaluator.id}`)) ?? @@ -70,13 +79,13 @@ const FocusDrawerSidePanel = ({runId, scenarioId}: FocusDrawerSidePanelProps) => })) }, [columnResult?.evaluators, groupAnchorMap]) - const treeData = useMemo(() => { - if (!columnResult) return [] + const treeData = useMemo(() => { + if (!columnResult) return null - const children: AnchorTreeNode[] = [ + const children: FocusTreeNode[] = [ { title: "Input", - key: "input", + id: "input", icon: , anchorId: groupAnchorMap.get("inputs") ?? @@ -85,7 +94,7 @@ const FocusDrawerSidePanel = ({runId, scenarioId}: FocusDrawerSidePanelProps) => }, { title: "Output", - key: "output", + id: "output", icon: , anchorId: groupAnchorMap.get("outputs") ?? @@ -97,7 +106,7 @@ const FocusDrawerSidePanel = ({runId, scenarioId}: FocusDrawerSidePanelProps) => if (evaluatorNodes.length) { children.push({ title: "Evaluator", - key: "evaluator", + id: "evaluator", icon: , children: evaluatorNodes, anchorId: @@ -107,19 +116,17 @@ const FocusDrawerSidePanel = ({runId, scenarioId}: FocusDrawerSidePanelProps) => }) } - return [ - { - title: parentTitle, - key: "evaluation", - icon: , - children, - }, - ] - }, [columnResult, parentTitle, evaluatorNodes]) + return { + title: parentTitle, + id: "evaluation", + icon: , + children, + expanded: true, + } + }, [columnResult, evaluatorNodes, groupAnchorMap, parentTitle]) - const handleSelect = useCallback((_selectedKeys: Key[], info: any) => { + const handleSelect = useCallback((key: string, node: FocusTreeNode) => { if (typeof window === "undefined") return - const node = info?.node as AnchorTreeNode | undefined const anchorId = node?.anchorId if (!anchorId) return const target = document.getElementById(anchorId) @@ -136,20 +143,25 @@ const FocusDrawerSidePanel = ({runId, scenarioId}: FocusDrawerSidePanelProps) => ) } - return ( -
-
- -
-
- ) + return treeData ? ( + node.id} + getChildren={(node) => node.children} + renderLabel={(node) => ( +
+ {node.icon} + {node.title} +
+ )} + selectedKey={selectedKey} + onSelect={(key, node) => { + setSelectedKey(key) + handleSelect(key, node) + }} + defaultExpanded + /> + ) : null } export default memo(FocusDrawerSidePanel) diff --git a/web/oss/src/components/SharedDrawers/SessionDrawer/components/SessionTree/index.tsx b/web/oss/src/components/SharedDrawers/SessionDrawer/components/SessionTree/index.tsx index cde9fdd12..e37f5e3d1 100644 --- a/web/oss/src/components/SharedDrawers/SessionDrawer/components/SessionTree/index.tsx +++ b/web/oss/src/components/SharedDrawers/SessionDrawer/components/SessionTree/index.tsx @@ -1,4 +1,4 @@ -import {useMemo, useState} from "react" +import {useCallback, useMemo, useState} from "react" import {MagnifyingGlass, SlidersHorizontal} from "@phosphor-icons/react" import {Button, Divider, Input, Popover} from "antd" @@ -10,6 +10,7 @@ import CustomTreeComponent from "@/oss/components/CustomUIs/CustomTreeComponent" import {filterTree} from "@/oss/components/pages/observability/assets/utils" import {TraceSpanNode} from "@/oss/services/tracing/types" +import {TreeContent} from "../../../TraceDrawer/components/TraceTree" import TraceTreeSettings from "../../../TraceDrawer/components/TraceTreeSettings" import {openTraceDrawerAtom} from "../../../TraceDrawer/store/traceDrawerStore" import {useSessionDrawer} from "../../hooks/useSessionDrawer" @@ -90,6 +91,11 @@ const SessionTree = ({selected, setSelected}: SessionTreeProps) => { const filteredTree = treeRoot + const renderTraceLabel = useCallback( + (node: TraceSpanNode) => , + [traceTreeSettings], + ) + const handleSelect = (key: string) => { setSelected(key) const element = document.getElementById(key) @@ -166,9 +172,12 @@ const SessionTree = ({selected, setSelected}: SessionTreeProps) => { node.span_id} + getChildren={(node) => node.children as TraceSpanNode[] | undefined} + renderLabel={renderTraceLabel} selectedKey={selected} - onSelect={handleSelect} + onSelect={(key) => handleSelect(key)} + defaultExpanded />
) diff --git a/web/oss/src/components/SharedDrawers/TraceDrawer/components/TraceTree/index.tsx b/web/oss/src/components/SharedDrawers/TraceDrawer/components/TraceTree/index.tsx index 03d6b7a2c..6a1ab3678 100644 --- a/web/oss/src/components/SharedDrawers/TraceDrawer/components/TraceTree/index.tsx +++ b/web/oss/src/components/SharedDrawers/TraceDrawer/components/TraceTree/index.tsx @@ -1,4 +1,4 @@ -import {useMemo, useState} from "react" +import {useCallback, useMemo, useState} from "react" import {Coins, MagnifyingGlass, PlusCircle, SlidersHorizontal, Timer} from "@phosphor-icons/react" import {Button, Divider, Input, Popover, Space, Tooltip, Typography} from "antd" @@ -138,6 +138,11 @@ const TraceTree = ({activeTrace: active, activeTraceId, selected, setSelected}: return result || {...treeRoot, children: []} }, [searchValue, treeRoot]) + const renderTraceLabel = useCallback( + (node: TraceSpanNode) => , + [traceTreeSettings], + ) + if (!activeTrace) { return
} @@ -178,9 +183,12 @@ const TraceTree = ({activeTrace: active, activeTraceId, selected, setSelected}: node.span_id} + getChildren={(node) => node.children as TraceSpanNode[] | undefined} + renderLabel={renderTraceLabel} selectedKey={selected} - onSelect={setSelected} + onSelect={(key) => setSelected(key)} + defaultExpanded />
) From 08ee759cdb592203ac38209a9c9b9a5660bc5a62 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Sat, 3 Jan 2026 15:50:35 +0100 Subject: [PATCH 12/31] refactor: enhance GeneralSection and InvocationSection components, improve layout and styling in ConfigurationView --- .../components/GeneralSection.tsx | 31 ++- .../components/InvocationSection.tsx | 31 ++- .../components/SectionPrimitives.tsx | 4 +- .../components/TestsetSection.tsx | 19 +- .../views/ConfigurationView/index.tsx | 227 ++++++++++-------- 5 files changed, 163 insertions(+), 149 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/views/ConfigurationView/components/GeneralSection.tsx b/web/oss/src/components/EvalRunDetails/components/views/ConfigurationView/components/GeneralSection.tsx index 6a4aa6755..c8df14900 100644 --- a/web/oss/src/components/EvalRunDetails/components/views/ConfigurationView/components/GeneralSection.tsx +++ b/web/oss/src/components/EvalRunDetails/components/views/ConfigurationView/components/GeneralSection.tsx @@ -21,6 +21,7 @@ const {Text} = Typography interface GeneralSectionProps { runId: string showActions?: boolean + showHeader?: boolean } const GeneralSectionHeader = ({runId, index}: {runId: string; index: number}) => { @@ -31,7 +32,7 @@ const GeneralSectionHeader = ({runId, index}: {runId: string; index: number}) => ) } -const GeneralSection = ({runId, showActions = true}: GeneralSectionProps) => { +const GeneralSection = ({runId, showActions = true, showHeader = true}: GeneralSectionProps) => { const [collapsed, setCollapsed] = useState(false) const projectId = useAtomValue(effectiveProjectIdAtom) const invalidateRunsTable = useSetAtom(invalidateEvaluationRunsTableAtom) @@ -116,18 +117,22 @@ const GeneralSection = ({runId, showActions = true}: GeneralSectionProps) => { return (
- } - right={ -
- ) : null} - {!collapsed ? content : null} + {content} ) @@ -403,19 +354,89 @@ const ConfigurationSectionColumn = memo( }, ) +const EvaluationRunTagsRow = memo( + ({ + runIds, + registerScrollContainer, + syncScroll, + }: { + runIds: string[] + registerScrollContainer: (key: string, node: HTMLDivElement | null) => void + syncScroll: (key: string, scrollLeft: number) => void + }) => { + const columnClass = + runIds.length > 1 ? "auto-cols-[minmax(480px,1fr)]" : "auto-cols-[minmax(320px,1fr)]" + const refKey = "section-evaluations" + const handleRef = useCallback( + (node: HTMLDivElement | null) => registerScrollContainer(refKey, node), + [refKey, registerScrollContainer], + ) + const handleScroll = useCallback( + (event: UIEvent) => syncScroll(refKey, event.currentTarget.scrollLeft), + [refKey, syncScroll], + ) + + return ( + +
+ {runIds.map((runId, index) => ( + + ))} +
+
+ ) + }, +) + +const EvaluationRunTagItem = memo(({runId, index}: {runId: string; index: number}) => { + const runDisplayNameAtom = useMemo(() => runDisplayNameAtomFamily(runId), [runId]) + const runDisplayName = useAtomValue(runDisplayNameAtom) + const summaryAtom = useMemo( + () => configurationRunSummaryAtomFamily({runId, compareIndex: index}), + [runId, index], + ) + const summary = useAtomValue(summaryAtom) + const label = resolveLabel( + runDisplayName, + summary.runName !== "—" ? summary.runName : undefined, + summary.runSlug ?? undefined, + summary.runId, + ) + + return ( +
+ {summary.isLoading ? ( +
+ ) : ( + + )} +
+ ) +}) + const ConfigurationSectionRow = memo( ({ section, runIds, runIdsSignature, - runDescriptors, registerScrollContainer, syncScroll, }: { section: SectionDefinition runIds: string[] runIdsSignature: string - runDescriptors: RunDescriptor[] registerScrollContainer: (key: string, node: HTMLDivElement | null) => void syncScroll: (key: string, scrollLeft: number) => void }) => { @@ -455,14 +476,13 @@ const ConfigurationSectionRow = memo( return null } - const showRowHeader = false - // section.key === "general" || section.key === "query" - + const columnClass = + runIds.length > 1 ? "auto-cols-[minmax(480px,1fr)]" : "auto-cols-[minmax(320px,1fr)]" const grid = (
{runIds.map((runId, index) => ( setCollapsed((v) => !v) : undefined} /> ))}
) - return
{grid}
+ return ( +
+
+ {section.title} + +
+ {!collapsed ? grid : null} +
+ ) }, ) const ConfigurationLayout = memo(({runIds}: {runIds: string[]}) => { const runIdsSignature = useMemo(() => runIds.join("|"), [runIds]) const {register, syncScroll} = useScrollSync() - const {runDescriptors} = useRunMetricData(runIds) return ( -
+
+ {sectionDefinitions.map((section) => ( ))} - {/* Render evaluators without a shared wrapper; each run renders its own evaluator cards directly */} -
- {runIds.map((runId) => ( -
- -
- ))} -
) }) @@ -534,10 +567,8 @@ const ConfigurationView = ({runId}: ConfigurationViewProps) => { } return ( -
-
- -
+
+
) } From 7bac5871665363a2b2e5a5d0dade9aa47c0f7f00 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Sat, 3 Jan 2026 16:29:20 +0100 Subject: [PATCH 13/31] refactor: enhance FocusDrawer component structure and styling, improve section handling and layout --- .../EvalRunDetails/components/FocusDrawer.tsx | 302 ++++++++---------- 1 file changed, 140 insertions(+), 162 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx b/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx index 0f0964352..46f87a47a 100644 --- a/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx +++ b/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx @@ -1,7 +1,9 @@ -import {memo, useCallback, useMemo, useRef} from "react" +import type {ReactNode} from "react" +import {memo, useCallback, useMemo, useRef, useState} from "react" import {isValidElement} from "react" -import {Collapse, Popover, Skeleton, Tag, Typography} from "antd" +import {DownOutlined} from "@ant-design/icons" +import {Button, Popover, Skeleton, Tag, Typography} from "antd" import clsx from "clsx" import {useAtomValue, useSetAtom} from "jotai" import {AlertCircle} from "lucide-react" @@ -48,6 +50,7 @@ import {formatMetricDisplay, METRIC_EMPTY_PLACEHOLDER} from "../utils/metricForm import EvaluationRunTag from "./EvaluationRunTag" import FocusDrawerHeader from "./FocusDrawerHeader" import FocusDrawerSidePanel from "./FocusDrawerSidePanel" +import {SectionCard} from "./views/ConfigurationView/components/SectionPrimitives" const JsonEditor = dynamic(() => import("@/oss/components/Editor/Editor"), {ssr: false}) @@ -82,7 +85,7 @@ const buildStaticMetricColumn = ( } as EvaluationTableColumn & {__source: "runMetric"} } -const {Text, Title} = Typography +const {Text} = Typography type FocusDrawerColumn = EvaluationTableColumn & {__source?: "runMetric"} @@ -738,7 +741,7 @@ const EvalOutputMetaRow = memo( const resolvedCompareIndex = compareIndex ?? 0 return ( -
+
void +}) => ( +
+ {title} +
+) + +const FocusSectionContent = memo( ({ section, runId, @@ -766,68 +789,69 @@ const FocusDrawerSectionCard = memo( scenarioId: string }) => { const isInputSection = section.group?.kind === "input" - const sectionLabelNode = useMemo( - () => ( - - <FocusGroupLabel group={section.group} label={section.label} runId={runId} /> - - ), - [runId, section.group, section.label], - ) - const sectionContent = useMemo( - () => ( -
- {section.group?.kind === "invocation" ? ( - - ) : null} - {section.columns.map(({column, descriptor}) => ( - - ))} -
- ), - [isInputSection, runId, scenarioId, section], + return ( +
+ {section.group?.kind === "invocation" ? ( + + ) : null} + + {section.columns.map(({column, descriptor}) => ( + + ))} +
) - const collapseItems = useMemo( - () => [ - { - key: section.id, - label: sectionLabelNode, - children: sectionContent, - }, - ], - [section.id, sectionContent, sectionLabelNode], + }, +) + +FocusSectionContent.displayName = "FocusSectionContent" + +const FocusDrawerSectionCard = memo( + ({ + section, + runId, + scenarioId, + }: { + section: FocusDrawerSection + runId: string + scenarioId: string + }) => { + const [collapsed, setCollapsed] = useState(false) + const sectionLabelNode = useMemo( + () => , + [runId, section.group, section.label], ) return ( -
- + setCollapsed((value) => !value)} /> + {!collapsed ? ( +
+ + + +
+ ) : null}
) }, @@ -909,32 +933,15 @@ const CompareRunColumnContent = memo( runId, scenarioId, section, - compareIndex, }: { runId: string scenarioId: string section: FocusDrawerSection - compareIndex: number }) => { return ( -
- {section.group?.kind === "invocation" ? ( - - ) : null} - -
- {section.columns.map(({column, descriptor}) => ( - - ))} -
-
+ + + ) }, ) @@ -973,7 +980,7 @@ const CompareMetaRow = memo( }, [onScrollSync]) return ( -
+
{ scrollRef.current = node @@ -1006,7 +1013,7 @@ const CompareMetaRow = memo( })}
-
+ ) }, ) @@ -1040,10 +1047,11 @@ const CompareSectionRow = memo( registerScrollContainer: (node: HTMLDivElement | null) => void onScrollSync: (node: HTMLDivElement) => void }) => { + const [collapsed, setCollapsed] = useState(false) const scrollRef = useRef(null) const firstSection = sectionMapsPerRun.find((map) => map.get(sectionId))?.get(sectionId) const sectionLabelNode = ( - + <> {sectionGroup && firstSection ? ( <FocusGroupLabel group={sectionGroup} @@ -1053,7 +1061,7 @@ const CompareSectionRow = memo( ) : ( sectionLabel )} - + ) const columnsCount = compareScenarios.length const rowGridStyle = useMemo( @@ -1068,82 +1076,49 @@ const CompareSectionRow = memo( onScrollSync(scrollRef.current) } }, [onScrollSync]) - const collapseItems = useMemo( - () => [ - { - key: sectionId, - label: sectionLabelNode, - children: ( -
{ - scrollRef.current = node - registerScrollContainer(node) - }} - className="overflow-x-auto [scrollbar-width:none] [&::-webkit-scrollbar]:hidden" - onScroll={handleScroll} - > -
- {compareScenarios.map(({runId, scenarioId, compareIndex}) => { - const section = sectionMapsPerRun[compareIndex]?.get(sectionId) - - if (!runId || !scenarioId || !section) { - return ( -
- -
- ) - } + return ( +
+ setCollapsed((value) => !value)} + /> + {!collapsed ? ( +
{ + scrollRef.current = node + registerScrollContainer(node) + }} + className="overflow-x-auto pb-2 [scrollbar-width:none] [&::-webkit-scrollbar]:hidden" + onScroll={handleScroll} + > +
+ {compareScenarios.map(({runId, scenarioId, compareIndex}) => { + const section = sectionMapsPerRun[compareIndex]?.get(sectionId) + if (!runId || !scenarioId || !section) { return ( - +
+ +
) - })} -
-
- ), - }, - ], - [ - compareScenarios, - handleScroll, - registerScrollContainer, - rowGridStyle, - sectionId, - sectionLabelNode, - sectionMapsPerRun, - ], - ) + } - return ( -
- + return ( + + ) + })} +
+
+ ) : null}
) }, @@ -1274,7 +1249,7 @@ const FocusDrawerCompareContentInner = ({ }, []) return ( -
+
{inputSectionEntry ? ( { } return ( -
+
) @@ -1418,16 +1393,19 @@ export const FocusDrawerContent = ({ return (
{sections.map((section) => { if (section.group?.kind === "invocation") { return (
-
+ -
+ Date: Mon, 5 Jan 2026 09:48:10 +0100 Subject: [PATCH 14/31] fix(frontend): keep scenario row tint on hover --- web/oss/src/styles/evaluations.css | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/web/oss/src/styles/evaluations.css b/web/oss/src/styles/evaluations.css index 5b9e52260..8c17ddec8 100644 --- a/web/oss/src/styles/evaluations.css +++ b/web/oss/src/styles/evaluations.css @@ -175,6 +175,14 @@ background-color: inherit !important; } +.agenta-scenario-table .scenario-row .ant-table-cell { + background-color: inherit !important; +} + +.agenta-scenario-table .scenario-row:hover > .ant-table-cell { + background-color: inherit !important; +} + .scenario-table-cell { width: 100%; height: var(--scenario-row-height, 160px); From 67eb181a54835d9eb62915803f9d1a1fd5f0fcb5 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Mon, 5 Jan 2026 10:21:44 +0100 Subject: [PATCH 15/31] fix --- .../components/EvalRunDetails/components/FocusDrawer.tsx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx b/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx index 46f87a47a..505e33cac 100644 --- a/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx +++ b/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx @@ -1066,10 +1066,9 @@ const CompareSectionRow = memo( const columnsCount = compareScenarios.length const rowGridStyle = useMemo( () => ({ - gridTemplateColumns: `repeat(${columnsCount}, ${columnMinWidth}px)`, - minWidth: `${columnsCount * columnMinWidth}px`, + gridTemplateColumns: `repeat(${columnsCount}, 1fr)`, }), - [columnsCount, columnMinWidth], + [columnsCount], ) const handleScroll = useCallback(() => { if (scrollRef.current) { @@ -1394,7 +1393,7 @@ export const FocusDrawerContent = ({ return (
Date: Mon, 5 Jan 2026 10:35:56 +0100 Subject: [PATCH 16/31] fix --- .../components/EvalRunDetails/components/FocusDrawer.tsx | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx b/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx index 505e33cac..2a1f2dc9b 100644 --- a/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx +++ b/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx @@ -968,10 +968,9 @@ const CompareMetaRow = memo( const columnsCount = compareScenarios.length const rowGridStyle = useMemo( () => ({ - gridTemplateColumns: `repeat(${columnsCount}, ${columnMinWidth}px)`, - minWidth: `${columnsCount * columnMinWidth}px`, + gridTemplateColumns: `repeat(${columnsCount}, 1fr)`, }), - [columnsCount, columnMinWidth], + [columnsCount], ) const handleScroll = useCallback(() => { if (scrollRef.current) { @@ -989,7 +988,7 @@ const CompareMetaRow = memo( className="overflow-x-auto [scrollbar-width:none] [&::-webkit-scrollbar]:hidden" onScroll={handleScroll} > -
+
{compareScenarios.map(({runId, scenarioId, compareIndex}) => { if (!runId || !scenarioId) { return ( @@ -1091,7 +1090,7 @@ const CompareSectionRow = memo( className="overflow-x-auto pb-2 [scrollbar-width:none] [&::-webkit-scrollbar]:hidden" onScroll={handleScroll} > -
+
{compareScenarios.map(({runId, scenarioId, compareIndex}) => { const section = sectionMapsPerRun[compareIndex]?.get(sectionId) From fe8c7b919e6df0ce3512872412feda5e2c934f66 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Mon, 5 Jan 2026 13:46:41 +0100 Subject: [PATCH 17/31] fix --- web/oss/src/components/Sidebar/SettingsSidebar.tsx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/web/oss/src/components/Sidebar/SettingsSidebar.tsx b/web/oss/src/components/Sidebar/SettingsSidebar.tsx index 8dbfbe4cb..9927d1a3f 100644 --- a/web/oss/src/components/Sidebar/SettingsSidebar.tsx +++ b/web/oss/src/components/Sidebar/SettingsSidebar.tsx @@ -1,7 +1,7 @@ import {FC, useMemo} from "react" -import {ApartmentOutlined, KeyOutlined} from "@ant-design/icons" -import {ArrowLeft, Sparkle, Receipt} from "@phosphor-icons/react" +import {ApartmentOutlined} from "@ant-design/icons" +import {ArrowLeftIcon, SparkleIcon, ReceiptIcon, KeyIcon} from "@phosphor-icons/react" import {Button, Divider} from "antd" import clsx from "clsx" import {useAtom} from "jotai" @@ -34,19 +34,19 @@ const SettingsSidebar: FC = ({lastPath}) => { { key: "secrets", title: "Model Hub", - icon: , + icon: , }, { key: "apiKeys", title: "API Keys", - icon: , + icon: , }, ] if (isDemo()) { list.push({ key: "billing", title: "Usage & Billing", - icon: , + icon: , }) } return list @@ -70,7 +70,7 @@ const SettingsSidebar: FC = ({lastPath}) => {
+ ) +} const FocusSectionContent = memo( ({ @@ -1392,7 +1414,7 @@ export const FocusDrawerContent = ({ return (
setCollapsed((value) => !value)} + onKeyDown={(event: KeyboardEvent) => { + if (event.key === "Enter" || event.key === " ") { + event.preventDefault() + setCollapsed((value) => !value) + } + }} > {section.title} @@ -511,7 +522,10 @@ const ConfigurationSectionRow = memo( type="link" size="small" icon={} - onClick={() => setCollapsed((value) => !value)} + onClick={(event) => { + event.stopPropagation() + setCollapsed((value) => !value) + }} />
{!collapsed ? grid : null} From 535026564efa6843ea793c15d08edd073448543f Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Tue, 6 Jan 2026 17:28:31 +0100 Subject: [PATCH 20/31] Refactor layout and styles for EvaluatorMetricsChart, AggregatedOverviewSection, RunNameTag, and enhance table styling in evaluations.css --- .../components/EvaluatorMetricsChart/index.tsx | 9 +++++++-- .../EvaluatorMetricsSpiderChart.tsx | 4 ++-- .../components/AggregatedOverviewSection.tsx | 4 ++-- .../views/OverviewView/components/RunNameTag.tsx | 14 +------------- web/oss/src/styles/evaluations.css | 3 +++ 5 files changed, 15 insertions(+), 19 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx b/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx index 3405904dc..ada36f5a7 100644 --- a/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx +++ b/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx @@ -612,9 +612,14 @@ const EvaluatorMetricsChart = ({
-
+
{summaryItems.map((entry) => ( -
+
-
+
-
+
diff --git a/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/RunNameTag.tsx b/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/RunNameTag.tsx index a30be878b..1f8c8998c 100644 --- a/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/RunNameTag.tsx +++ b/web/oss/src/components/EvalRunDetails/components/views/OverviewView/components/RunNameTag.tsx @@ -59,12 +59,6 @@ const formatDateTime = (value: string | number | Date | null | undefined) => { const RunNameTag = ({runId, label, accentColor}: RunNameTagProps) => { const style = useMemo(() => buildAccentStyle(accentColor), [accentColor]) - const tooltip = useMemo(() => { - if (!label) return runId - if (label === runId) return label - return `${label} (${runId})` - }, [label, runId]) - const runQuery = useAtomValueWithSchedule( useMemo(() => evaluationRunQueryAtomFamily(runId), [runId]), {priority: LOW_PRIORITY}, @@ -162,13 +156,7 @@ const RunNameTag = ({runId, label, accentColor}: RunNameTagProps) => { return ( - + ) } diff --git a/web/oss/src/styles/evaluations.css b/web/oss/src/styles/evaluations.css index 4fa5b3ed5..795a31993 100644 --- a/web/oss/src/styles/evaluations.css +++ b/web/oss/src/styles/evaluations.css @@ -150,6 +150,9 @@ .metadata-summary-table .ant-table-tbody > tr > td { border-bottom: 1px solid #eaeff5; border-inline: none; + height: 48px; + padding: 0 16px; + vertical-align: middle; } .metadata-summary-table .ant-table-tbody > tr > td:first-of-type { From adc5272104637c8e0c2a479fe92dfba4a3fb269b Mon Sep 17 00:00:00 2001 From: junaway <7041392+junaway@users.noreply.github.com> Date: Wed, 7 Jan 2026 10:13:02 +0000 Subject: [PATCH 21/31] v0.76.0 --- api/pyproject.toml | 2 +- sdk/pyproject.toml | 2 +- web/ee/package.json | 2 +- web/oss/package.json | 2 +- web/package.json | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/api/pyproject.toml b/api/pyproject.toml index f008eb397..83f2016dc 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "api" -version = "0.75.0" +version = "0.76.0" description = "Agenta API" authors = [ { name = "Mahmoud Mabrouk", email = "mahmoud@agenta.ai" }, diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml index 3687558bc..d6e1c588e 100644 --- a/sdk/pyproject.toml +++ b/sdk/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "agenta" -version = "0.75.0" +version = "0.76.0" description = "The SDK for agenta is an open-source LLMOps platform." readme = "README.md" authors = [ diff --git a/web/ee/package.json b/web/ee/package.json index 33914f681..d5b52fc6c 100644 --- a/web/ee/package.json +++ b/web/ee/package.json @@ -1,6 +1,6 @@ { "name": "@agenta/ee", - "version": "0.75.0", + "version": "0.76.0", "private": true, "engines": { "node": ">=18" diff --git a/web/oss/package.json b/web/oss/package.json index 121a68ba3..17de9745c 100644 --- a/web/oss/package.json +++ b/web/oss/package.json @@ -1,6 +1,6 @@ { "name": "@agenta/oss", - "version": "0.75.0", + "version": "0.76.0", "private": true, "engines": { "node": ">=18" diff --git a/web/package.json b/web/package.json index 0b43bb918..5de8cfacd 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "agenta-web", - "version": "0.75.0", + "version": "0.76.0", "workspaces": [ "ee", "oss", From a57b9e32bb915ec120d19d125dd96dfec633a5c6 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 7 Jan 2026 15:09:32 +0100 Subject: [PATCH 22/31] Enhance evaluation result overview: refactor table export logic, improve comparison run handling, and update UI components for better accessibility and styling. --- .../src/components/EvalRunDetails/Table.tsx | 46 ++--- .../EvalRunDetails/atoms/compare.ts | 15 +- .../EvalRunDetails/atoms/table/run.ts | 17 +- .../components/CompareRunsMenu.tsx | 191 +++++++++++------- .../components/EvaluationRunTag.tsx | 10 +- .../EvalRunDetails/components/FocusDrawer.tsx | 44 ++-- .../components/AggregatedOverviewSection.tsx | 2 +- .../components/OverviewPlaceholders.tsx | 2 + .../OverviewView/components/RunNameTag.tsx | 10 +- .../components/ColumnVisibilityHeader.tsx | 5 +- .../InfiniteVirtualTableFeatureShell.tsx | 3 +- .../hooks/useTableExport.ts | 1 + .../src/components/PageLayout/PageLayout.tsx | 13 +- web/oss/src/styles/evaluations.css | 5 +- 14 files changed, 229 insertions(+), 135 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/Table.tsx b/web/oss/src/components/EvalRunDetails/Table.tsx index b0bde9777..a1fae6248 100644 --- a/web/oss/src/components/EvalRunDetails/Table.tsx +++ b/web/oss/src/components/EvalRunDetails/Table.tsx @@ -87,26 +87,13 @@ const EvalRunDetailsTable = ({ const previewColumns = usePreviewColumns({columnResult, evaluationType}) - // Inject synthetic columns for comparison exports (hidden in table display) - const columnsWithSyntheticColumns = useMemo(() => { + // Inject synthetic columns for comparison exports (do not render in UI) + const exportColumns = useMemo(() => { const hasCompareRuns = compareSlots.some(Boolean) if (!hasCompareRuns) { return previewColumns.columns } - const hiddenColumnStyle = { - display: "none", - width: 0, - minWidth: 0, - maxWidth: 0, - padding: 0, - margin: 0, - border: "none", - visibility: "hidden", - position: "absolute", - left: "-9999px", - } as const - // Create synthetic "Run" column for export only (completely hidden in table) const runColumn = { key: "__run_type__", @@ -118,8 +105,6 @@ const EvalRunDetailsTable = ({ render: () => null, exportEnabled: true, exportLabel: "Run", - onHeaderCell: () => ({style: hiddenColumnStyle}), - onCell: () => ({style: hiddenColumnStyle}), } // Create synthetic "Run ID" column for export only (completely hidden in table) @@ -133,8 +118,6 @@ const EvalRunDetailsTable = ({ render: () => null, exportEnabled: true, exportLabel: "Run ID", - onHeaderCell: () => ({style: hiddenColumnStyle}), - onCell: () => ({style: hiddenColumnStyle}), } return [runColumn, runIdColumn, ...previewColumns.columns] @@ -830,17 +813,27 @@ const EvalRunDetailsTable = ({ resolveColumnLabel, filename: `${runDisplayName || runId}-scenarios.csv`, beforeExport: loadAllPagesBeforeExport, + columnsOverride: exportColumns, }), - [exportResolveValue, resolveColumnLabel, runId, runDisplayName, loadAllPagesBeforeExport], + [ + exportResolveValue, + resolveColumnLabel, + runId, + runDisplayName, + loadAllPagesBeforeExport, + exportColumns, + ], ) + const hasCompareRuns = compareSlots.some(Boolean) + return (
datasetStore={evaluationPreviewDatasetStore} tableScope={tableScope} - columns={columnsWithSyntheticColumns} + columns={previewColumns.columns} rowKey={(record) => record.key} tableClassName={clsx( "agenta-scenario-table", @@ -877,10 +870,13 @@ const EvalRunDetailsTable = ({ bordered: true, tableLayout: "fixed", onRow: (record) => { - // Always tint rows: base run uses index 0; comparisons use their index. - const backgroundColor = getComparisonColor( - typeof record.compareIndex === "number" ? record.compareIndex : 0, - ) + const backgroundColor = hasCompareRuns + ? getComparisonColor( + typeof record.compareIndex === "number" + ? record.compareIndex + : 0, + ) + : "#fff" return { onClick: (event) => { diff --git a/web/oss/src/components/EvalRunDetails/atoms/compare.ts b/web/oss/src/components/EvalRunDetails/atoms/compare.ts index d86d1ecf7..281feefce 100644 --- a/web/oss/src/components/EvalRunDetails/atoms/compare.ts +++ b/web/oss/src/components/EvalRunDetails/atoms/compare.ts @@ -168,7 +168,20 @@ export const deriveRunComparisonStructure = ({ } /** Terminal statuses that allow comparison */ -const TERMINAL_STATUSES = new Set(["success", "failure", "errors", "cancelled"]) +export const TERMINAL_STATUSES = new Set([ + "success", + "failure", + "failed", + "errors", + "cancelled", + "completed", + "finished", + "ok", + "evaluation_finished", + "evaluation_finished_with_errors", + "evaluation_failed", + "evaluation_aggregation_failed", +]) /** Check if a status is terminal (run has finished) */ export const isTerminalStatus = (status: string | undefined | null): boolean => { diff --git a/web/oss/src/components/EvalRunDetails/atoms/table/run.ts b/web/oss/src/components/EvalRunDetails/atoms/table/run.ts index abddb442c..29e4d7624 100644 --- a/web/oss/src/components/EvalRunDetails/atoms/table/run.ts +++ b/web/oss/src/components/EvalRunDetails/atoms/table/run.ts @@ -4,10 +4,14 @@ import {atomWithQuery} from "jotai-tanstack-query" import axios from "@/oss/lib/api/assets/axiosConfig" import {buildRunIndex} from "@/oss/lib/evaluations/buildRunIndex" import {snakeToCamelCaseKeys} from "@/oss/lib/helpers/casing" +import { + getPreviewRunBatcher, + invalidatePreviewRunCache, +} from "@/oss/lib/hooks/usePreviewEvaluations/assets/previewRunBatcher" +import {TERMINAL_STATUSES} from "../compare" import {effectiveProjectIdAtom} from "../run" -import {getPreviewRunBatcher} from "@/agenta-oss-common/lib/hooks/usePreviewEvaluations/assets/previewRunBatcher" import type {EvaluationRun} from "@/agenta-oss-common/lib/hooks/usePreviewEvaluations/types" export interface EvaluationRunQueryResult { @@ -16,6 +20,11 @@ export interface EvaluationRunQueryResult { runIndex: ReturnType } +const isTerminalStatus = (status: string | null | undefined) => { + if (!status) return false + return TERMINAL_STATUSES.has(status.toLowerCase()) +} + const patchedRunRevisionSet = new Set() const buildRevisionPayload = (references: Record | undefined) => { @@ -309,6 +318,11 @@ export const evaluationRunQueryAtomFamily = atomFamily((runId: string | null) => gcTime: 5 * 60 * 1000, refetchOnWindowFocus: false, refetchOnReconnect: false, + refetchInterval: (query) => { + const status = + query.state.data?.rawRun?.status ?? query.state.data?.camelRun?.status + return isTerminalStatus(status) ? false : 5000 + }, queryFn: async () => { if (!runId) { throw new Error("evaluationRunQueryAtomFamily requires a run id") @@ -317,6 +331,7 @@ export const evaluationRunQueryAtomFamily = atomFamily((runId: string | null) => throw new Error("evaluationRunQueryAtomFamily requires a project id") } + invalidatePreviewRunCache(projectId, runId) const batcher = getPreviewRunBatcher() const rawRun = await batcher({projectId, runId}) if (!rawRun) { diff --git a/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx b/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx index dd30138b9..91de4cd93 100644 --- a/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx +++ b/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx @@ -2,8 +2,10 @@ import {memo, useCallback, useEffect, useMemo, useState} from "react" import {Button, Checkbox, Input, List, Popover, Space, Tag, Tooltip, Typography} from "antd" import {useAtomValue, useSetAtom} from "jotai" +import Image from "next/image" import {message} from "@/oss/components/AppMessageContext" +import EmptyComponent from "@/oss/components/Placeholders/EmptyComponent" import ReferenceTag from "@/oss/components/References/ReferenceTag" import axios from "@/oss/lib/api/assets/axiosConfig" import dayjs from "@/oss/lib/helpers/dateTimeHelper/dayjs" @@ -104,7 +106,7 @@ const CompareRunsMenu = ({runId}: CompareRunsMenuProps) => { { setCompareIds((prev) => { @@ -234,10 +240,11 @@ const CompareRunsPopoverContent = memo(({runId, availability}: CompareRunsPopove return ( -
- +
+
+ Testset: {availability.testsetIds.length ? ( - +
{availability.testsetIds.map((id) => { const label = matchingTestsetNameMap[id] ?? id const copyValue = id @@ -252,12 +259,14 @@ const CompareRunsPopoverContent = memo(({runId, availability}: CompareRunsPopove /> ) })} - - ) : null} - - +
+ ) : ( + + )} +
+ - Selected {compareIds.length}/{MAX_COMPARISON_RUNS} + Selected: {compareIds.length}/{MAX_COMPARISON_RUNS} {compareIds.length ? ( + ) : null} +
- - - Selected: {compareIds.length}/{MAX_COMPARISON_RUNS} - - {compareIds.length ? ( - - ) : null} - -
- setSearchTerm(event.target.value)} - /> + setSearchTerm(event.target.value)} + bordered={false} + /> - + +
{showLoading ? (
@@ -335,20 +346,23 @@ const CompareRunsPopoverContent = memo(({runId, availability}: CompareRunsPopove const createdLabel = item.createdAt ? dayjs(item.createdAt).format("DD MMM YYYY") : "" - const _resolvedTestsetNames = - item.testsetNames.length > 0 - ? item.testsetNames - : item.structure.testsetIds - .map((id) => candidateTestsetNameMap[id]) - .filter((name): name is string => Boolean(name)) + return ( handleToggle(item.id)} - className="compare-run-row flex flex-col !items-start justify-start" + className={clsx( + "compare-run-row flex flex-col !items-start justify-start", + "!py-1 !px-2", + "border-b border-[#EAEFF5]", + "last:border-b-0", + isChecked && "compare-run-row--selected", + )} + style={{borderBottomStyle: "solid"}} >
event.stopPropagation()} onChange={(event) => { diff --git a/web/oss/src/styles/evaluations.css b/web/oss/src/styles/evaluations.css index feebb2ed7..4ef374902 100644 --- a/web/oss/src/styles/evaluations.css +++ b/web/oss/src/styles/evaluations.css @@ -36,22 +36,62 @@ margin-inline-end: 0; } +.compare-runs-popover-overlay .ant-popover-inner-content { + padding: 0; +} + +.compare-runs-header { + padding: 8px; + border-bottom: 1px solid #eaeff5; +} + .compare-runs-list .ant-list-item { - border: none !important; - padding: 8px 4px; + padding: 0; +} + +.compare-runs-popover .ant-input-affix-wrapper { + border: none; + box-shadow: none; + padding-inline: 0; + background: transparent; +} + +.compare-runs-popover .ant-input-affix-wrapper:focus, +.compare-runs-popover .ant-input-affix-wrapper-focused, +.compare-runs-popover .ant-input-affix-wrapper:focus-within { + border: none; + box-shadow: none; +} + +.compare-runs-popover .ant-input { + padding-inline: 0; + border: none; + box-shadow: none; + outline: none; + background: transparent; +} + +.compare-runs-popover .ant-input:focus { + border: none; + box-shadow: none; + outline: none; } .compare-run-row { + width: 100%; cursor: pointer; flex-direction: column; gap: 4px; - border-radius: 6px; } .compare-run-row:hover { background: #f9fafb; } +.compare-run-row--selected { + background: #f5f7fa; +} + .compare-run-row__main { display: flex; align-items: flex-start; From d46b80c5366156389394c9b9a98b49e3faa5a0c5 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 7 Jan 2026 17:01:02 +0100 Subject: [PATCH 24/31] Refactor EvalRunDetails components: improve overflow handling in Table, optimize CompareRunsMenu logic, enhance EvaluatorMetricsChart properties, and refine evaluator metrics extraction logic. --- web/oss/src/components/EvalRunDetails/Table.tsx | 4 ++-- .../components/CompareRunsMenu.tsx | 9 --------- .../components/EvaluatorMetricsChart/index.tsx | 16 ++++++++-------- .../views/OverviewView/utils/evaluatorMetrics.ts | 4 +++- 4 files changed, 13 insertions(+), 20 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/Table.tsx b/web/oss/src/components/EvalRunDetails/Table.tsx index a1fae6248..e274a3d36 100644 --- a/web/oss/src/components/EvalRunDetails/Table.tsx +++ b/web/oss/src/components/EvalRunDetails/Table.tsx @@ -828,8 +828,8 @@ const EvalRunDetailsTable = ({ const hasCompareRuns = compareSlots.some(Boolean) return ( -
-
+
+
datasetStore={evaluationPreviewDatasetStore} tableScope={tableScope} diff --git a/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx b/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx index fec701d72..781e18a2c 100644 --- a/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx +++ b/web/oss/src/components/EvalRunDetails/components/CompareRunsMenu.tsx @@ -192,15 +192,6 @@ const CompareRunsPopoverContent = memo(({runId, availability}: CompareRunsPopove }) }, [availability.canCompare, availability.testsetIds, availability.evaluatorIds, runs, runId]) - const candidateTestsetIds = useMemo(() => { - const ids = new Set() - candidates.forEach((candidate) => { - candidate.structure.testsetIds.forEach((id) => ids.add(id)) - }) - return Array.from(ids) - }, [candidates]) - const candidateTestsetNameMap = useTestsetNameMap(candidateTestsetIds) - const filteredCandidates = useMemo(() => { const query = searchTerm.trim().toLowerCase() return candidates.filter((candidate) => { diff --git a/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx b/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx index ada36f5a7..69204c558 100644 --- a/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx +++ b/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsChart/index.tsx @@ -429,13 +429,13 @@ const EvaluatorMetricsChart = ({ key: baseSeriesKey, name: resolvedRunName, color: resolvedBaseColor, - barProps: {radius: [8, 8, 0, 0]}, + barProps: {radius: [8, 8, 0, 0], minPointSize: 2}, }, ...comparisonBooleanHistograms.map((entry) => ({ key: entry.runId, name: entry.runName, color: entry.color, - barProps: {radius: [8, 8, 0, 0]}, + barProps: {radius: [8, 8, 0, 0], minPointSize: 2}, })), ] @@ -449,8 +449,8 @@ const EvaluatorMetricsChart = ({ yDomain={[0, 100]} series={series} barCategoryGap="20%" - showLegend={stableComparisons.length > 0} - reserveLegendSpace={stableComparisons.length > 0} + showLegend={false} + reserveLegendSpace={false} /> ) } @@ -515,13 +515,13 @@ const EvaluatorMetricsChart = ({ key: baseSeriesKey, name: resolvedRunName, color: resolvedBaseColor, - barProps: {radius: [8, 8, 0, 0]}, + barProps: {radius: [8, 8, 0, 0], minPointSize: 2}, }, ...comparisonMaps.map((entry) => ({ key: entry.runId, name: entry.runName, color: entry.color, - barProps: {radius: [8, 8, 0, 0]}, + barProps: {radius: [8, 8, 0, 0], minPointSize: 2}, })), ] @@ -535,8 +535,8 @@ const EvaluatorMetricsChart = ({ yDomain={[0, "auto"]} series={series} barCategoryGap="20%" - showLegend={stableComparisons.length > 0} - reserveLegendSpace={stableComparisons.length > 0} + showLegend={false} + reserveLegendSpace={false} /> ) } diff --git a/web/oss/src/components/EvalRunDetails/components/views/OverviewView/utils/evaluatorMetrics.ts b/web/oss/src/components/EvalRunDetails/components/views/OverviewView/utils/evaluatorMetrics.ts index 83f85fdc1..c17f436f6 100644 --- a/web/oss/src/components/EvalRunDetails/components/views/OverviewView/utils/evaluatorMetrics.ts +++ b/web/oss/src/components/EvalRunDetails/components/views/OverviewView/utils/evaluatorMetrics.ts @@ -92,7 +92,9 @@ export const buildEvaluatorMetricEntries = ( if (!rawKey) return const canonicalKey = canonicalizeMetricKey(rawKey) if (hasSchema && !allowedCanonicalKeys.has(canonicalKey)) { - return + if (!rawKey.startsWith("attributes.ag.data.outputs.")) { + return + } } if (!unique.has(canonicalKey)) { const fallbackDefinition = fallbackByCanonicalKey.get(canonicalKey) From 7f99d05c592585ae6c764d22d681e87ee11fbe91 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Wed, 7 Jan 2026 17:07:26 +0100 Subject: [PATCH 25/31] fix --- .../EvaluatorMetricsSpiderChart.tsx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsSpiderChart/EvaluatorMetricsSpiderChart.tsx b/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsSpiderChart/EvaluatorMetricsSpiderChart.tsx index a835aa8bd..836259571 100644 --- a/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsSpiderChart/EvaluatorMetricsSpiderChart.tsx +++ b/web/oss/src/components/EvalRunDetails/components/EvaluatorMetricsSpiderChart/EvaluatorMetricsSpiderChart.tsx @@ -126,10 +126,12 @@ const EvaluatorMetricsSpiderChart = ({ } const lines = clampLines(label, 18) + const lineHeight = 12 + const blockOffset = -((lines.length - 1) * lineHeight) / 2 return ( {lines.map((ln, i) => ( - + {ln} ))} From 4c291e46a0a0c95465602e0dcbd663b9138ada67 Mon Sep 17 00:00:00 2001 From: Kaosiso Ezealigo Date: Thu, 8 Jan 2026 11:35:58 +0100 Subject: [PATCH 26/31] Refactor EvalRunDetails components: streamline layout, enhance accessibility, and optimize rendering logic across various sections. --- .../EvalRunDetails/components/FocusDrawer.tsx | 177 ++++++++------- .../EvalRunDetails/components/Page.tsx | 4 +- .../components/PreviewEvalRunHeader.tsx | 1 - .../components/EvaluatorSection.tsx | 209 ++++++++---------- .../components/InvocationSection.tsx | 7 +- .../components/TestsetSection.tsx | 9 +- .../views/ConfigurationView/index.tsx | 24 +- .../MetricDetailsPreviewPopover.tsx | 5 +- .../components/References/ReferenceLabels.tsx | 10 +- .../components/References/ReferenceTag.tsx | 2 +- 10 files changed, 230 insertions(+), 218 deletions(-) diff --git a/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx b/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx index 471af25f0..e2c65a515 100644 --- a/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx +++ b/web/oss/src/components/EvalRunDetails/components/FocusDrawer.tsx @@ -3,7 +3,7 @@ import {memo, useCallback, useMemo, useRef, useState} from "react" import {isValidElement} from "react" import {DownOutlined} from "@ant-design/icons" -import {Button, Popover, Skeleton, Tag, Typography} from "antd" +import {Button, Popover, Skeleton, Typography} from "antd" import clsx from "clsx" import {useAtomValue, useSetAtom} from "jotai" import {AlertCircle} from "lucide-react" @@ -14,7 +14,6 @@ import MetricDetailsPreviewPopover from "@/oss/components/Evaluations/components import GenericDrawer from "@/oss/components/GenericDrawer" import SharedGenerationResultUtils from "@/oss/components/SharedGenerationResultUtils" -import ReadOnlyBox from "../../pages/evaluations/onlineEvaluation/components/ReadOnlyBox" import {compareRunIdsAtom, MAX_COMPARISON_RUNS} from "../atoms/compare" import {invocationTraceSummaryAtomFamily} from "../atoms/invocationTraceSummary" import { @@ -55,9 +54,6 @@ import {SectionCard} from "./views/ConfigurationView/components/SectionPrimitive const JsonEditor = dynamic(() => import("@/oss/components/Editor/Editor"), {ssr: false}) -// Color palette for category tags (same as MetricCell) -const TAG_COLORS = ["green", "blue", "purple", "orange", "cyan", "magenta", "gold", "lime"] - const toSectionAnchorId = (value: string) => `focus-section-${value .toLowerCase() @@ -130,6 +126,32 @@ const resolveRunMetricScalar = (stats: any): unknown => { return undefined } +const FocusValueCard = ({ + label, + children, + className, +}: { + label: ReactNode + children: ReactNode + className?: string +}) => ( +
+ {label} +
{children}
+
+) + +const MetricValuePill = ({value, muted}: {value: ReactNode; muted?: boolean}) => ( + + {value} + +) + interface FocusDrawerContentProps { runId: string scenarioId: string @@ -362,32 +384,23 @@ const RunMetricValue = memo( return (
{column.displayLabel ?? column.label ?? column.id} - - {isLoading ? ( - - ) : ( - - - {formattedValue} - - - )} - + {isLoading ? ( + + ) : ( + + + + )}
) }, @@ -538,52 +551,57 @@ const ScenarioColumnValue = memo( })() // Render array metrics as tags in a vertical stack + const isLongTextMetric = + !arrayTags.length && + typeof formattedValue === "string" && + (formattedValue.length > 80 || formattedValue.includes("\n")) + const renderMetricContent = () => { if (arrayTags.length > 0) { return ( -
+
{arrayTags.map((tag, index) => ( - - {tag} - + ))}
) } - return ( - - {formattedValue} - - ) + if (isLongTextMetric) { + return ( + + {formattedValue} + + ) + } + return + } + + const metricContent = showSkeleton ? ( + + ) : ( + + {renderMetricContent()} + + ) + + if (isLongTextMetric) { + return {metricContent} } return ( -
- {displayLabel} - - {showSkeleton ? ( - - ) : ( - - {renderMetricContent()} - - )} - +
+ {displayLabel} + {metricContent}
) } @@ -711,12 +729,7 @@ const ScenarioColumnValue = memo( } } - return ( -
- {displayLabel} - {renderValue()} -
- ) + return {renderValue()} }, ) @@ -778,7 +791,7 @@ const FocusSectionHeader = ({ return (
setCollapsed((value) => !value)} /> {!collapsed ? ( -
- - - -
+ + + ) : null}
) @@ -1110,7 +1121,7 @@ const CompareSectionRow = memo( scrollRef.current = node registerScrollContainer(node) }} - className="overflow-x-auto pb-2 [scrollbar-width:none] [&::-webkit-scrollbar]:hidden" + className="overflow-x-auto [scrollbar-width:none] [&::-webkit-scrollbar]:hidden" onScroll={handleScroll} >
diff --git a/web/oss/src/components/EvalRunDetails/components/Page.tsx b/web/oss/src/components/EvalRunDetails/components/Page.tsx index 4704e2b56..4827164d2 100644 --- a/web/oss/src/components/EvalRunDetails/components/Page.tsx +++ b/web/oss/src/components/EvalRunDetails/components/Page.tsx @@ -138,7 +138,7 @@ const EvalRunPreviewPage = ({runId, evaluationType, projectId = null}: EvalRunPr onChangeView={(v) => setActiveViewParam(v)} /> } - headerClassName="px-4" + headerClassName="px-4 pt-2" >
@@ -175,7 +175,7 @@ const EvalRunPreviewPage = ({runId, evaluationType, projectId = null}: EvalRunPr key: "configuration", label: "Configuration", children: ( -
+
), diff --git a/web/oss/src/components/EvalRunDetails/components/PreviewEvalRunHeader.tsx b/web/oss/src/components/EvalRunDetails/components/PreviewEvalRunHeader.tsx index 1a1fcbc63..78ad5c761 100644 --- a/web/oss/src/components/EvalRunDetails/components/PreviewEvalRunHeader.tsx +++ b/web/oss/src/components/EvalRunDetails/components/PreviewEvalRunHeader.tsx @@ -212,7 +212,6 @@ const PreviewEvalRunMeta = ({
+
+ {!collapsed ? ( +
+ {rawEvaluator ? ( + <> {evaluator.description ? ( {evaluator.description} ) : null} -
-
- {hasEvaluatorJson ? ( - setView(val as "details" | "json")} - /> - ) : null} -
-
- - {!collapsed ? ( - <> -
- {view === "json" && hasEvaluatorJson ? ( -
- -
- ) : ( - - )} -
- - {metricsFallback.length > 0 ? ( -
- Metrics -
- {metricsFallback.map((metric) => ( - - {metric.displayLabel ?? metric.name} - - ))} -
- ) : null} + ) : ( + + )} - ) : null} -
- ) : ( -
- - Evaluator configuration snapshot is unavailable for this run. - - {metricsFallback.length ? ( -
- {metricsFallback.map((metric) => ( - - {metric.displayLabel ?? metric.name} - - ))} + ) : ( + + Evaluator configuration snapshot is unavailable for this run. + + )} + + {metricsFallback.length > 0 ? ( +
+ Metrics +
+ {metricsFallback.map((metric) => ( + + {metric.displayLabel ?? metric.name} + + ))} +
) : null}
- )} + ) : null} ) diff --git a/web/oss/src/components/EvalRunDetails/components/views/ConfigurationView/components/InvocationSection.tsx b/web/oss/src/components/EvalRunDetails/components/views/ConfigurationView/components/InvocationSection.tsx index c95f627e8..38c6a92f3 100644 --- a/web/oss/src/components/EvalRunDetails/components/views/ConfigurationView/components/InvocationSection.tsx +++ b/web/oss/src/components/EvalRunDetails/components/views/ConfigurationView/components/InvocationSection.tsx @@ -233,7 +233,11 @@ const InvocationSection = ({runId}: InvocationSectionProps) => { const headerContent = (
- + {variantId || revisionId ? ( { runId={runId} fallbackVariantName={variantLabel} fallbackRevision={variantVersion} + toneOverride={null} /> ) : variantLabel ? ( {variantLabel} diff --git a/web/oss/src/components/EvalRunDetails/components/views/ConfigurationView/components/TestsetSection.tsx b/web/oss/src/components/EvalRunDetails/components/views/ConfigurationView/components/TestsetSection.tsx index f67cd26d7..3501f1875 100644 --- a/web/oss/src/components/EvalRunDetails/components/views/ConfigurationView/components/TestsetSection.tsx +++ b/web/oss/src/components/EvalRunDetails/components/views/ConfigurationView/components/TestsetSection.tsx @@ -73,7 +73,14 @@ const TestsetCard = ({ return ( } + left={ + + } right={