@@ -32,6 +32,7 @@ export class ResponsesClient {
32
32
user : User
33
33
ragIndex ?: RagIndex
34
34
clientStreamError : Error | null = null
35
+ startTime ?: number
35
36
36
37
constructor ( {
37
38
model,
@@ -78,6 +79,8 @@ export class ResponsesClient {
78
79
include ?: ResponseIncludable [ ]
79
80
attemptNumber ?: number
80
81
} ) : Promise < Stream < ResponseStreamEvent > > {
82
+ this . startTime = Date . now ( )
83
+
81
84
try {
82
85
const sanitizedInput = validatedInputSchema . parse ( input )
83
86
@@ -122,6 +125,8 @@ export class ResponsesClient {
122
125
123
126
async handleResponse ( { stream, encoding, res } : { stream : Stream < ResponseStreamEvent > ; encoding : Tiktoken ; res : Response } ) {
124
127
let tokenCount = 0
128
+ let firstTokenTS = 0
129
+ let timeToFirstToken : number | undefined = undefined
125
130
const contents : string [ ] = [ ]
126
131
127
132
for await ( const event of stream ) {
@@ -142,6 +147,12 @@ export class ResponsesClient {
142
147
143
148
contents . push ( event . delta )
144
149
tokenCount += encoding . encode ( event . delta ) . length ?? 0
150
+
151
+ if ( ! timeToFirstToken && this . startTime ) {
152
+ firstTokenTS = Date . now ( )
153
+ timeToFirstToken = firstTokenTS - this . startTime
154
+ }
155
+
145
156
break
146
157
}
147
158
@@ -229,8 +240,13 @@ export class ResponsesClient {
229
240
}
230
241
}
231
242
243
+ // Time from first token to completion
244
+ const tokenStreamingDuration = Date . now ( ) - firstTokenTS
245
+
232
246
return {
233
247
tokenCount,
248
+ timeToFirstToken,
249
+ tokensPerSecond : timeToFirstToken ? tokenCount / tokenStreamingDuration : undefined ,
234
250
response : contents . join ( '' ) ,
235
251
}
236
252
}
0 commit comments