1
- import { Alert , Button , Input , Select , Space , Table } from "antd" ;
1
+ import {
2
+ Alert ,
3
+ Button ,
4
+ Input ,
5
+ Progress ,
6
+ Select ,
7
+ Space ,
8
+ Table ,
9
+ Tooltip ,
10
+ } from "antd" ;
2
11
3
12
import {
4
13
redux ,
@@ -20,12 +29,13 @@ import { trunc_middle } from "@cocalc/util/misc";
20
29
import { COLORS } from "@cocalc/util/theme" ;
21
30
import { PROMPTS } from "./tests" ;
22
31
import { Value } from "./value" ;
23
-
24
32
interface TestResult {
25
33
model : string ;
26
34
status : "pending" | "running" | "passed" | "failed" ;
27
35
output : string ;
28
36
error ?: string ;
37
+ firstResponseTime ?: number ; // Time in milliseconds until first token
38
+ totalTime ?: number ; // Total time in milliseconds until completion
29
39
}
30
40
31
41
export function TestLLMAdmin ( ) {
@@ -101,6 +111,10 @@ export function TestLLMAdmin() {
101
111
102
112
return new Promise ( ( resolve ) => {
103
113
try {
114
+ const startTime = Date . now ( ) ;
115
+ let firstResponseTime : number | undefined ;
116
+ let totalTime : number | undefined ;
117
+
104
118
const llmStream = webapp_client . openai_client . queryStream ( {
105
119
input : prompt ,
106
120
project_id : null ,
@@ -116,6 +130,10 @@ export function TestLLMAdmin() {
116
130
llmStream . on ( "token" , ( token ) => {
117
131
console . log ( { model, system, token } ) ;
118
132
if ( token != null ) {
133
+ // Record first response time if this is the first token
134
+ if ( firstResponseTime === undefined ) {
135
+ firstResponseTime = Date . now ( ) - startTime ;
136
+ }
119
137
reply += token ;
120
138
// Update the result in real-time
121
139
setTestResults ( ( prev ) =>
@@ -125,22 +143,28 @@ export function TestLLMAdmin() {
125
143
) ;
126
144
} else {
127
145
// Stream is complete (token is null)
146
+ totalTime = Date . now ( ) - startTime ;
128
147
const passed = expectedRegex . test ( reply ) ;
129
148
resolve ( {
130
149
model,
131
150
status : passed ? "passed" : "failed" ,
132
151
output : reply ,
152
+ firstResponseTime,
153
+ totalTime,
133
154
} ) ;
134
155
}
135
156
} ) ;
136
157
137
158
llmStream . on ( "error" , ( err ) => {
159
+ totalTime = Date . now ( ) - startTime ;
138
160
console . error ( `Error in LLM stream for model ${ model } :` , err ) ;
139
161
resolve ( {
140
162
model,
141
163
status : "failed" ,
142
164
output : reply ,
143
165
error : err ?. toString ( ) ,
166
+ firstResponseTime,
167
+ totalTime,
144
168
} ) ;
145
169
} ) ;
146
170
@@ -241,6 +265,57 @@ export function TestLLMAdmin() {
241
265
}
242
266
}
243
267
268
+ function formatTiming ( timeMs : number | undefined ) : string {
269
+ if ( timeMs === undefined ) return "-" ;
270
+ return `${ ( timeMs / 1000 ) . toFixed ( 1 ) } s` ;
271
+ }
272
+
273
+ function renderTimingColumn ( record : TestResult ) {
274
+ const { firstResponseTime, totalTime, status } = record ;
275
+
276
+ if ( status === "pending" || status === "running" ) {
277
+ return < span style = { { color : COLORS . GRAY_M } } > -</ span > ;
278
+ }
279
+
280
+ if ( firstResponseTime === undefined || totalTime === undefined ) {
281
+ return < span style = { { color : COLORS . GRAY_M } } > -</ span > ;
282
+ }
283
+
284
+ // Calculate progress bar values (normalize to 10 seconds max)
285
+ const maxTime = Math . max (
286
+ 10000 ,
287
+ ...testResults . filter ( ( r ) => r . totalTime ) . map ( ( r ) => r . totalTime ! ) ,
288
+ ) ;
289
+ const totalPercent = Math . min ( 100 , ( totalTime / maxTime ) * 100 ) ;
290
+
291
+ // Determine if this is one of the slowest (top 10% quantile)
292
+ const completedResults = testResults . filter (
293
+ ( r ) => r . totalTime !== undefined ,
294
+ ) ;
295
+ const sortedTimes = completedResults
296
+ . map ( ( r ) => r . totalTime ! )
297
+ . sort ( ( a , b ) => b - a ) ;
298
+ const slowThreshold =
299
+ sortedTimes [ Math . floor ( sortedTimes . length * 0.1 ) ] || 0 ;
300
+ const isSlow = totalTime >= slowThreshold && completedResults . length > 1 ;
301
+
302
+ return (
303
+ < div >
304
+ < Tooltip title = "First response time / Total completion time" >
305
+ < div style = { { marginBottom : 2 } } >
306
+ { formatTiming ( firstResponseTime ) } /{ formatTiming ( totalTime ) }
307
+ </ div >
308
+ </ Tooltip >
309
+ < Progress
310
+ percent = { totalPercent }
311
+ size = "small"
312
+ status = { isSlow ? "exception" : "normal" }
313
+ showInfo = { false }
314
+ />
315
+ </ div >
316
+ ) ;
317
+ }
318
+
244
319
function renderTestResults ( ) {
245
320
if ( testResults . length === 0 ) {
246
321
return (
@@ -292,6 +367,12 @@ export function TestLLMAdmin() {
292
367
< span style = { { color : COLORS . GRAY_M } } > -</ span >
293
368
) ,
294
369
} ,
370
+ {
371
+ title : "Timing" ,
372
+ key : "timing" ,
373
+ width : 120 ,
374
+ render : ( _ , record : TestResult ) => renderTimingColumn ( record ) ,
375
+ } ,
295
376
{
296
377
title : "Test" ,
297
378
key : "test" ,
0 commit comments