Skip to content

Commit d90ee70

Browse files
feat: Add time_to_first_token for Google Gen AI (#3785)
1 parent 81650e1 commit d90ee70

File tree

4 files changed

+43
-2
lines changed

4 files changed

+43
-2
lines changed

lib/llm-events/google-genai/chat-completion-summary.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@ module.exports = class GoogleGenAiLlmChatCompletionSummary extends LlmChatComple
2020
* @param {Transaction} params.transaction Current and active transaction
2121
* @param {object} params.request Google Gen AI request object
2222
* @param {object} params.response Google Gen AI response object
23+
* @param {number} [params.timeOfFirstToken] Timestamp of when the first token was sent, for streaming only.
2324
* @param {boolean} [params.error] Set to `true` if an error occurred
2425
*/
25-
constructor({ agent, segment, transaction, request, response, error }) {
26+
constructor({ agent, segment, transaction, request, response, timeOfFirstToken, error }) {
2627
super({ agent,
2728
segment,
2829
transaction,
@@ -32,6 +33,7 @@ module.exports = class GoogleGenAiLlmChatCompletionSummary extends LlmChatComple
3233
maxTokens: request.config?.maxOutputTokens,
3334
temperature: request.config?.temperature,
3435
vendor: 'gemini',
36+
timeOfFirstToken,
3537
error })
3638

3739
let requestMessagesLength = 0

lib/subscribers/google-genai/generate-content-stream.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class GoogleGenAIGenerateContentStreamSubscriber extends GoogleGenAIGenerateCont
3030
let cachedResult = {}
3131
let err
3232
let entireMessage = ''
33+
let timeOfFirstToken
3334
response.next = async function wrappedNext(...nextArgs) {
3435
let result = {}
3536
try {
@@ -44,6 +45,7 @@ class GoogleGenAIGenerateContentStreamSubscriber extends GoogleGenAIGenerateCont
4445
}
4546

4647
if (result?.value?.text) {
48+
if (!timeOfFirstToken) timeOfFirstToken = Date.now()
4749
entireMessage += result.value.text // readonly variable that equates to result.value.candidates[0].content.parts[0].text
4850
}
4951
} catch (streamErr) {
@@ -64,6 +66,7 @@ class GoogleGenAIGenerateContentStreamSubscriber extends GoogleGenAIGenerateCont
6466
ctx,
6567
request,
6668
response: cachedResult,
69+
timeOfFirstToken,
6770
err
6871
})
6972
}

lib/subscribers/google-genai/generate-content.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,19 @@ class GoogleGenAIGenerateContentSubscriber extends AiMonitoringChatSubscriber {
5656
* @param {Context} params.ctx active context
5757
* @param {object} params.request request made to method
5858
* @param {object} params.response response from method
59+
* @param {number} [params.timeOfFirstToken] Timestamp of when the first streaming token was sent.
5960
* @param {object} [params.err] error object if present
6061
* @returns {object} a llm completion summary instance for Google Gen AI
6162
*/
62-
createCompletionSummary({ ctx, request, response = {}, err }) {
63+
createCompletionSummary({ ctx, request, response = {}, timeOfFirstToken, err }) {
6364
const { transaction, segment } = ctx
6465
return new LlmChatCompletionSummary({
6566
agent: this.agent,
6667
segment,
6768
transaction,
6869
request,
6970
response,
71+
timeOfFirstToken,
7072
error: !!err
7173
})
7274
}

test/versioned/google-genai/chat-completions.test.js

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,40 @@ test('should call the tokenCountCallback in streaming', (t, end) => {
285285
})
286286
})
287287

288+
test('should set time_to_first_token on llm chat completion summary', (t, end) => {
289+
const { client, agent } = t.nr
290+
helper.runInTransaction(agent, async (tx) => {
291+
const content = 'Streamed response'
292+
const model = 'gemini-2.0-flash'
293+
const stream = await client.models.generateContentStream({
294+
config: {
295+
maxOutputTokens: 100,
296+
temperature: 0.5
297+
},
298+
model,
299+
contents: [content, 'What does 1 plus 1 equal?']
300+
})
301+
302+
let res = ''
303+
for await (const chunk of stream) {
304+
assert.ok(chunk.text, 'should have text in chunk')
305+
res += chunk.text
306+
}
307+
assert.ok(res)
308+
309+
const events = agent.customEventAggregator.events.toArray()
310+
const chatSummary = events.filter(([{ type }]) => type === 'LlmChatCompletionSummary')[0]
311+
assert.equal(chatSummary[0].type, 'LlmChatCompletionSummary')
312+
const timeToFirstToken = chatSummary?.[1]?.['time_to_first_token']
313+
assert.ok(timeToFirstToken, 'time_to_first_token should exist')
314+
assert.equal(typeof timeToFirstToken, 'number', 'time_to_first_token should be a number')
315+
assert.ok(timeToFirstToken >= 0, 'time_to_first_token should be >= 0')
316+
317+
tx.end()
318+
end()
319+
})
320+
})
321+
288322
test('handles error in stream', (t, end) => {
289323
const { client, agent } = t.nr
290324
helper.runInTransaction(agent, async (tx) => {

0 commit comments

Comments
 (0)