Skip to content

Commit e13c6a4

Browse files
committed
add more metric logging to v2 response streaming endpoint
1 parent c52464e commit e13c6a4

File tree

2 files changed

+18
-0
lines changed

2 files changed

+18
-0
lines changed

src/server/routes/openai.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,8 @@ openaiRouter.post('/stream/v2', upload.single('file'), async (r, res) => {
237237
course: course?.name?.fi,
238238
ragIndexId: ragIndex?.id,
239239
fileSize: req.file?.size,
240+
timeToFirstToken: result.timeToFirstToken,
241+
tokensPerSecond: result.tokensPerSecond,
240242
}
241243

242244
logger.info(`Stream ended. Total tokens: ${tokenCount}`, chatCompletionMeta)

src/server/util/azure/ResponsesAPI.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ export class ResponsesClient {
3232
user: User
3333
ragIndex?: RagIndex
3434
clientStreamError: Error | null = null
35+
startTime?: number
3536

3637
constructor({
3738
model,
@@ -78,6 +79,8 @@ export class ResponsesClient {
7879
include?: ResponseIncludable[]
7980
attemptNumber?: number
8081
}): Promise<Stream<ResponseStreamEvent>> {
82+
this.startTime = Date.now()
83+
8184
try {
8285
const sanitizedInput = validatedInputSchema.parse(input)
8386

@@ -122,6 +125,8 @@ export class ResponsesClient {
122125

123126
async handleResponse({ stream, encoding, res }: { stream: Stream<ResponseStreamEvent>; encoding: Tiktoken; res: Response }) {
124127
let tokenCount = 0
128+
let firstTokenTS = 0
129+
let timeToFirstToken: number | undefined = undefined
125130
const contents: string[] = []
126131

127132
for await (const event of stream) {
@@ -142,6 +147,12 @@ export class ResponsesClient {
142147

143148
contents.push(event.delta)
144149
tokenCount += encoding.encode(event.delta).length ?? 0
150+
151+
if (!timeToFirstToken && this.startTime) {
152+
firstTokenTS = Date.now()
153+
timeToFirstToken = firstTokenTS - this.startTime
154+
}
155+
145156
break
146157
}
147158

@@ -229,8 +240,13 @@ export class ResponsesClient {
229240
}
230241
}
231242

243+
// Time from first token to completion
244+
const tokenStreamingDuration = Date.now() - firstTokenTS
245+
232246
return {
233247
tokenCount,
248+
timeToFirstToken,
249+
tokensPerSecond: timeToFirstToken ? tokenCount / tokenStreamingDuration : undefined,
234250
response: contents.join(''),
235251
}
236252
}

0 commit comments

Comments
 (0)