Skip to content

Commit 4304745

Browse files
authored
Merge pull request #2942 from bluewave-labs/llm-evals-module
Fixed overview trackers
2 parents 247af5d + e39c34e commit 4304745

File tree

1 file changed

+91
-51
lines changed

1 file changed

+91
-51
lines changed

Clients/src/presentation/pages/EvalsDashboard/ProjectOverview.tsx

Lines changed: 91 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import { cardStyles } from "../../themes";
1414
import CustomizableButton from "../../components/Button/CustomizableButton";
1515
import VWLink from "../../components/Link/VWLink";
1616
import { deepEvalProjectsService } from "../../../infrastructure/api/deepEvalProjectsService";
17-
import { experimentsService, monitoringService, type Experiment, type MonitorDashboard } from "../../../infrastructure/api/evaluationLogsService";
17+
import { experimentsService, monitoringService, evaluationLogsService, type Experiment, type MonitorDashboard, type EvaluationLog } from "../../../infrastructure/api/evaluationLogsService";
1818
import NewExperimentModal from "./NewExperimentModal";
1919
import type { DeepEvalProject } from "./types";
2020
import { useNavigate } from "react-router-dom";
@@ -49,78 +49,83 @@ const StatCard: React.FC<StatCardProps> = ({ title, value, Icon, subtitle }) =>
4949
sx={{
5050
...(cardStyles.base(theme) as Record<string, unknown>),
5151
background: "linear-gradient(135deg, #FEFFFE 0%, #F8F9FA 100%)",
52-
border: "1px solid #DCDFE3",
52+
border: "1px solid #E5E7EB",
5353
height: "100%",
54-
minHeight: "90px",
54+
minHeight: "80px",
5555
position: "relative",
5656
transition: "all 0.2s ease",
5757
display: "flex",
5858
flexDirection: "column",
5959
boxSizing: "border-box",
60-
borderRadius: "4px",
60+
borderRadius: "8px",
6161
overflow: "hidden",
6262
"&:hover": {
6363
background: "linear-gradient(135deg, #F9FAFB 0%, #F1F5F9 100%)",
64+
borderColor: "#D1D5DB",
6465
},
6566
}}
6667
>
6768
<CardContent
6869
sx={{
69-
p: 2,
70+
p: "14px 16px",
7071
position: "relative",
7172
height: "100%",
7273
display: "flex",
7374
flexDirection: "column",
7475
flex: 1,
7576
overflow: "hidden",
76-
"&:last-child": { pb: 2 },
77+
"&:last-child": { pb: "14px" },
7778
}}
7879
>
7980
{/* Background Icon */}
8081
<Box
8182
sx={{
8283
position: "absolute",
83-
bottom: "-24px",
84-
right: "-24px",
85-
opacity: isHovered ? 0.06 : 0.025,
86-
transform: isHovered ? "translateY(-5px)" : "translateY(0px)",
84+
bottom: "-20px",
85+
right: "-20px",
86+
opacity: isHovered ? 0.06 : 0.03,
87+
transform: isHovered ? "translateY(-4px)" : "translateY(0px)",
8788
zIndex: 0,
8889
pointerEvents: "none",
8990
transition: "opacity 0.2s ease, transform 0.3s ease",
9091
}}
9192
>
92-
<Icon size={80} />
93+
<Icon size={64} />
9394
</Box>
9495

9596
{/* Content */}
9697
<Box sx={{ position: "relative", zIndex: 1 }}>
9798
<Typography
9899
variant="body2"
99100
sx={{
100-
color: theme.palette.text.secondary,
101-
fontSize: "12px",
102-
fontWeight: 400,
103-
mb: 1,
101+
color: "#6B7280",
102+
fontSize: "11px",
103+
fontWeight: 500,
104+
textTransform: "uppercase",
105+
letterSpacing: "0.5px",
106+
mb: 0.5,
104107
}}
105108
>
106109
{title}
107110
</Typography>
108111
<Typography
109112
sx={{
110-
fontSize: "28px",
113+
fontSize: "20px",
111114
fontWeight: 600,
112-
color: theme.palette.text.primary,
113-
lineHeight: 1.2,
115+
color: "#111827",
116+
lineHeight: 1.3,
117+
fontFamily: "'Inter', -apple-system, BlinkMacSystemFont, sans-serif",
114118
}}
115119
>
116120
{value}
117121
</Typography>
118122
{subtitle && (
119123
<Typography
120124
sx={{
121-
fontSize: "11px",
122-
color: theme.palette.text.secondary,
123-
mt: 0.5,
125+
fontSize: "10px",
126+
color: "#9CA3AF",
127+
mt: 0.25,
128+
fontWeight: 400,
124129
}}
125130
>
126131
{subtitle}
@@ -141,6 +146,7 @@ export default function ProjectOverview({
141146
const navigate = useNavigate();
142147
const [loading, setLoading] = useState(true);
143148
const [experiments, setExperiments] = useState<Experiment[]>([]);
149+
const [evaluationLogs, setEvaluationLogs] = useState<EvaluationLog[]>([]);
144150
const [dashboardData, setDashboardData] = useState<MonitorDashboard | null>(null);
145151
const [newExperimentModalOpen, setNewExperimentModalOpen] = useState(false);
146152

@@ -158,13 +164,15 @@ export default function ProjectOverview({
158164
onProjectUpdate(projectData.project);
159165
}
160166

161-
// Load experiments and dashboard data in parallel
162-
const [experimentsData, dashboardResponse] = await Promise.all([
163-
experimentsService.getExperiments({ project_id: projectId, limit: 10 }),
167+
// Load experiments, logs, and dashboard data in parallel
168+
const [experimentsData, logsData, dashboardResponse] = await Promise.all([
169+
experimentsService.getExperiments({ project_id: projectId, limit: 100 }),
170+
evaluationLogsService.getLogs({ project_id: projectId, limit: 1000 }).catch(() => ({ logs: [] })),
164171
monitoringService.getDashboard(projectId).catch(() => ({ data: null })),
165172
]);
166173

167174
setExperiments(experimentsData.experiments || []);
175+
setEvaluationLogs(logsData.logs || []);
168176
setDashboardData(dashboardResponse.data);
169177
} catch (err) {
170178
console.error("Failed to load overview data:", err);
@@ -200,12 +208,6 @@ export default function ProjectOverview({
200208
return `${Math.round(ms)}ms`;
201209
};
202210

203-
const formatPercentage = (rate: number | undefined): string => {
204-
if (rate === undefined || rate === null || isNaN(rate)) return "-";
205-
const successRate = 100 - rate;
206-
return `${successRate.toFixed(1)}%`;
207-
};
208-
209211
const formatScore = (score: number | undefined): string => {
210212
if (score === undefined || score === null || isNaN(score)) return "-";
211213
return score.toFixed(2);
@@ -221,20 +223,56 @@ export default function ProjectOverview({
221223

222224
const hasExperiments = experiments.length > 0;
223225

224-
// Extract metrics from dashboard data
225-
const totalEvals = dashboardData?.logs?.total ?? 0;
226-
const successRate = dashboardData?.logs?.error_rate !== undefined
227-
? formatPercentage(dashboardData.logs.error_rate)
228-
: "-";
229-
const avgLatency = dashboardData?.metrics?.latency?.average !== undefined
230-
? formatLatency(dashboardData.metrics.latency.average)
231-
: "-";
232-
const avgScore = dashboardData?.metrics?.score_average?.average !== undefined
233-
? formatScore(dashboardData.metrics.score_average.average)
234-
: "-";
235-
const totalTokens = dashboardData?.metrics?.token_count?.average !== undefined && dashboardData?.logs?.total
236-
? formatNumber(dashboardData.metrics.token_count.average * dashboardData.logs.total)
226+
// Calculate metrics from experiments data
227+
const totalExperiments = experiments.length;
228+
const completedExperiments = experiments.filter(e => e.status === "completed").length;
229+
const failedExperiments = experiments.filter(e => e.status === "failed").length;
230+
231+
// Success rate: completed / (completed + failed) - ignore running/pending
232+
const finishedExperiments = completedExperiments + failedExperiments;
233+
const successRate = finishedExperiments > 0
234+
? `${((completedExperiments / finishedExperiments) * 100).toFixed(0)}%`
237235
: "-";
236+
237+
// Calculate avg latency from evaluation logs (each log = one prompt evaluation)
238+
const logsWithLatency = evaluationLogs.filter(log =>
239+
typeof log.latency_ms === 'number' && !isNaN(log.latency_ms) && log.latency_ms > 0
240+
);
241+
const avgLatency = logsWithLatency.length > 0
242+
? formatLatency(logsWithLatency.reduce((sum, log) => sum + (log.latency_ms || 0), 0) / logsWithLatency.length)
243+
: dashboardData?.metrics?.latency?.average !== undefined
244+
? formatLatency(dashboardData.metrics.latency.average)
245+
: "-";
246+
247+
// Calculate avg score from experiment results (avg_scores contains metric averages)
248+
const experimentsWithResults = experiments.filter(e => e.results && typeof e.results === 'object');
249+
const allScores: number[] = [];
250+
experimentsWithResults.forEach(e => {
251+
const results = e.results as Record<string, unknown>;
252+
const avgScores = results?.avg_scores as Record<string, number> | undefined;
253+
if (avgScores && typeof avgScores === 'object') {
254+
// Get all metric scores and average them
255+
const metricValues = Object.values(avgScores).filter((v): v is number => typeof v === 'number' && !isNaN(v) && v > 0);
256+
if (metricValues.length > 0) {
257+
allScores.push(metricValues.reduce((a, b) => a + b, 0) / metricValues.length);
258+
}
259+
}
260+
});
261+
const avgScore = allScores.length > 0
262+
? formatScore(allScores.reduce((a, b) => a + b, 0) / allScores.length)
263+
: dashboardData?.metrics?.score_average?.average !== undefined
264+
? formatScore(dashboardData.metrics.score_average.average)
265+
: "-";
266+
267+
// Calculate total tokens from evaluation logs
268+
const logsWithTokens = evaluationLogs.filter(log =>
269+
typeof log.token_count === 'number' && !isNaN(log.token_count) && log.token_count > 0
270+
);
271+
const totalTokens = logsWithTokens.length > 0
272+
? formatNumber(logsWithTokens.reduce((sum, log) => sum + (log.token_count || 0), 0))
273+
: dashboardData?.metrics?.token_count?.average !== undefined && dashboardData?.logs?.total
274+
? formatNumber(dashboardData.metrics.token_count.average * dashboardData.logs.total)
275+
: "-";
238276

239277
return (
240278
<Box>
@@ -273,14 +311,16 @@ export default function ProjectOverview({
273311
{/* Top row: 4 stat cards */}
274312
<Box sx={{ display: "grid", gridTemplateColumns: "repeat(4, 1fr)", gap: "16px", mb: "16px" }}>
275313
<StatCard
276-
title="Total evaluations"
277-
value={formatNumber(totalEvals)}
278-
Icon={Activity}
314+
title="Experiments"
315+
value={formatNumber(totalExperiments)}
316+
Icon={Beaker}
317+
subtitle={`${completedExperiments} completed`}
279318
/>
280319
<StatCard
281320
title="Success rate"
282321
value={successRate}
283322
Icon={CheckCircle}
323+
subtitle={finishedExperiments > 0 ? `${finishedExperiments} finished` : undefined}
284324
/>
285325
<StatCard
286326
title="Avg latency"
@@ -457,13 +497,13 @@ export default function ProjectOverview({
457497
title="Total tokens"
458498
value={totalTokens}
459499
Icon={Coins}
460-
subtitle="Across all evaluations"
500+
subtitle="Across all experiments"
461501
/>
462502
<StatCard
463-
title="Experiments"
464-
value={formatNumber(experiments.length)}
465-
Icon={Beaker}
466-
subtitle="Total experiments run"
503+
title="Running"
504+
value={experiments.filter(e => e.status === "running").length}
505+
Icon={Activity}
506+
subtitle="Experiments in progress"
467507
/>
468508
</Box>
469509
</Box>

0 commit comments

Comments
 (0)