Skip to content

Commit 5358bb8

Browse files
authored
brianyin/ajs-298-unify-all-metric-duration-to-ms (#765)
1 parent 7817d79 commit 5358bb8

File tree

12 files changed

+94
-69
lines changed

12 files changed

+94
-69
lines changed

.changeset/hungry-olives-sink.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
'@livekit/agents-plugin-google': patch
3+
'@livekit/agents-plugin-openai': patch
4+
'@livekit/agents': patch
5+
---
6+
7+
Convert and rename all time-based metric fields to \*Ms variants

agents/src/audio.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { AudioFrame } from '@livekit/rtc-node';
55
import { log } from './log.js';
66
import type { AudioBuffer } from './utils.js';
77

8-
export function calculateAudioDuration(frame: AudioBuffer) {
8+
export function calculateAudioDurationSeconds(frame: AudioBuffer) {
99
// TODO(AJS-102): use frame.durationMs once available in rtc-node
1010
return Array.isArray(frame)
1111
? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0)

agents/src/llm/llm.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -203,21 +203,24 @@ export abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {
203203
this.output.close();
204204

205205
const duration = process.hrtime.bigint() - startTime;
206+
const durationMs = Math.trunc(Number(duration / BigInt(1000000)));
206207
const metrics: LLMMetrics = {
207208
type: 'llm_metrics',
208209
timestamp: Date.now(),
209210
requestId,
210-
ttft: ttft === BigInt(-1) ? -1 : Math.trunc(Number(ttft / BigInt(1000000))),
211-
duration: Math.trunc(Number(duration / BigInt(1000000))),
211+
ttftMs: ttft === BigInt(-1) ? -1 : Math.trunc(Number(ttft / BigInt(1000000))),
212+
durationMs,
212213
cancelled: this.abortController.signal.aborted,
213214
label: this.#llm.label(),
214215
completionTokens: usage?.completionTokens || 0,
215216
promptTokens: usage?.promptTokens || 0,
216217
promptCachedTokens: usage?.promptCachedTokens || 0,
217218
totalTokens: usage?.totalTokens || 0,
218219
tokensPerSecond: (() => {
219-
const durationSeconds = Math.trunc(Number(duration / BigInt(1000000000)));
220-
return durationSeconds > 0 ? (usage?.completionTokens || 0) / durationSeconds : 0;
220+
if (durationMs <= 0) {
221+
return 0;
222+
}
223+
return (usage?.completionTokens || 0) / (durationMs / 1000);
221224
})(),
222225
};
223226
this.#llm.emit('metrics_collected', metrics);

agents/src/metrics/base.ts

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ export type LLMMetrics = {
1515
label: string;
1616
requestId: string;
1717
timestamp: number;
18-
duration: number; // in milliseconds
19-
ttft: number;
18+
/** Duration of the request in milliseconds. */
19+
durationMs: number;
20+
/** Time to first token in milliseconds. */
21+
ttftMs: number;
2022
cancelled: boolean;
2123
completionTokens: number;
2224
promptTokens: number;
@@ -32,13 +34,13 @@ export type STTMetrics = {
3234
requestId: string;
3335
timestamp: number;
3436
/**
35-
* The request duration in seconds, 0.0 if the STT is streaming.
37+
* The request duration in milliseconds, 0.0 if the STT is streaming.
3638
*/
37-
duration: number;
39+
durationMs: number;
3840
/**
39-
* The duration of the pushed audio in seconds.
41+
* The duration of the pushed audio in milliseconds.
4042
*/
41-
audioDuration: number;
43+
audioDurationMs: number;
4244
/**
4345
* Whether the STT is streaming (e.g using websocket).
4446
*/
@@ -50,9 +52,12 @@ export type TTSMetrics = {
5052
label: string;
5153
requestId: string;
5254
timestamp: number;
53-
ttfb: number;
54-
duration: number;
55-
audioDuration: number;
55+
/** Time to first byte in milliseconds. */
56+
ttfbMs: number;
57+
/** Total synthesis duration in milliseconds. */
58+
durationMs: number;
59+
/** Generated audio duration in milliseconds. */
60+
audioDurationMs: number;
5661
cancelled: boolean;
5762
charactersCount: number;
5863
streamed: boolean;
@@ -64,8 +69,8 @@ export type VADMetrics = {
6469
type: 'vad_metrics';
6570
label: string;
6671
timestamp: number;
67-
idleTime: number;
68-
inferenceDurationTotal: number;
72+
idleTimeMs: number;
73+
inferenceDurationTotalMs: number;
6974
inferenceCount: number;
7075
};
7176

@@ -76,16 +81,16 @@ export type EOUMetrics = {
7681
* Amount of time between the end of speech from VAD and the decision to end the user's turn.
7782
* Set to 0.0 if the end of speech was not detected.
7883
*/
79-
endOfUtteranceDelay: number;
84+
endOfUtteranceDelayMs: number;
8085
/**
8186
* Time taken to obtain the transcript after the end of the user's speech.
8287
* Set to 0.0 if the end of speech was not detected.
8388
*/
84-
transcriptionDelay: number;
89+
transcriptionDelayMs: number;
8590
/**
8691
* Time taken to invoke the user's `Agent.onUserTurnCompleted` callback.
8792
*/
88-
onUserTurnCompletedDelay: number;
93+
onUserTurnCompletedDelayMs: number;
8994
speechId?: string;
9095
};
9196

@@ -118,13 +123,13 @@ export type RealtimeModelMetrics = {
118123
*/
119124
timestamp: number;
120125
/**
121-
* The duration of the response from created to done in seconds.
126+
* The duration of the response from created to done in milliseconds.
122127
*/
123-
duration: number;
128+
durationMs: number;
124129
/**
125-
* Time to first audio token in seconds. -1 if no audio token was sent.
130+
* Time to first audio token in milliseconds. -1 if no audio token was sent.
126131
*/
127-
ttft: number;
132+
ttftMs: number;
128133
/**
129134
* Whether the request was cancelled.
130135
*/

agents/src/metrics/usage_collector.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ export interface UsageSummary {
88
llmPromptCachedTokens: number;
99
llmCompletionTokens: number;
1010
ttsCharactersCount: number;
11-
sttAudioDuration: number;
11+
sttAudioDurationMs: number;
1212
}
1313

1414
export class UsageCollector {
@@ -20,7 +20,7 @@ export class UsageCollector {
2020
llmPromptCachedTokens: 0,
2121
llmCompletionTokens: 0,
2222
ttsCharactersCount: 0,
23-
sttAudioDuration: 0,
23+
sttAudioDurationMs: 0,
2424
};
2525
}
2626

@@ -36,7 +36,7 @@ export class UsageCollector {
3636
} else if (metrics.type === 'tts_metrics') {
3737
this.summary.ttsCharactersCount += metrics.charactersCount;
3838
} else if (metrics.type === 'stt_metrics') {
39-
this.summary.sttAudioDuration += metrics.audioDuration;
39+
this.summary.sttAudioDurationMs += metrics.audioDurationMs;
4040
}
4141
}
4242

agents/src/metrics/utils.ts

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ export const logMetrics = (metrics: AgentMetrics) => {
1313
if (metrics.type === 'llm_metrics') {
1414
logger
1515
.child({
16-
ttft: roundTwoDecimals(metrics.ttft),
16+
ttftMs: roundTwoDecimals(metrics.ttftMs),
1717
inputTokens: metrics.promptTokens,
1818
promptCachedTokens: metrics.promptCachedTokens,
1919
outputTokens: metrics.completionTokens,
@@ -23,7 +23,7 @@ export const logMetrics = (metrics: AgentMetrics) => {
2323
} else if (metrics.type === 'realtime_model_metrics') {
2424
logger
2525
.child({
26-
ttft: roundTwoDecimals(metrics.ttft),
26+
ttftMs: roundTwoDecimals(metrics.ttftMs),
2727
input_tokens: metrics.inputTokens,
2828
cached_input_tokens: metrics.inputTokenDetails.cachedTokens,
2929
output_tokens: metrics.outputTokens,
@@ -34,21 +34,30 @@ export const logMetrics = (metrics: AgentMetrics) => {
3434
} else if (metrics.type === 'tts_metrics') {
3535
logger
3636
.child({
37-
ttfb: roundTwoDecimals(metrics.ttfb),
38-
audioDuration: metrics.audioDuration,
37+
ttfbMs: roundTwoDecimals(metrics.ttfbMs),
38+
audioDurationMs: Math.round(metrics.audioDurationMs),
3939
})
4040
.info('TTS metrics');
4141
} else if (metrics.type === 'eou_metrics') {
4242
logger
4343
.child({
44-
end_of_utterance_delay: roundTwoDecimals(metrics.endOfUtteranceDelay),
45-
transcription_delay: roundTwoDecimals(metrics.transcriptionDelay),
44+
endOfUtteranceDelayMs: roundTwoDecimals(metrics.endOfUtteranceDelayMs),
45+
transcriptionDelayMs: roundTwoDecimals(metrics.transcriptionDelayMs),
46+
onUserTurnCompletedDelayMs: roundTwoDecimals(metrics.onUserTurnCompletedDelayMs),
4647
})
4748
.info('EOU metrics');
49+
} else if (metrics.type === 'vad_metrics') {
50+
logger
51+
.child({
52+
idleTimeMs: Math.round(metrics.idleTimeMs),
53+
inferenceDurationTotalMs: Math.round(metrics.inferenceDurationTotalMs),
54+
inferenceCount: metrics.inferenceCount,
55+
})
56+
.info('VAD metrics');
4857
} else if (metrics.type === 'stt_metrics') {
4958
logger
5059
.child({
51-
audioDuration: metrics.audioDuration,
60+
audioDurationMs: Math.round(metrics.audioDurationMs),
5261
})
5362
.info('STT metrics');
5463
}

agents/src/stt/stt.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
66
import { EventEmitter } from 'node:events';
77
import type { ReadableStream } from 'node:stream/web';
88
import { APIConnectionError, APIError } from '../_exceptions.js';
9-
import { calculateAudioDuration } from '../audio.js';
9+
import { calculateAudioDurationSeconds } from '../audio.js';
1010
import { log } from '../log.js';
1111
import type { STTMetrics } from '../metrics/base.js';
1212
import { DeferredReadableStream } from '../stream/deferred_stream.js';
@@ -110,14 +110,14 @@ export abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCal
110110
async recognize(frame: AudioBuffer): Promise<SpeechEvent> {
111111
const startTime = process.hrtime.bigint();
112112
const event = await this._recognize(frame);
113-
const duration = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));
113+
const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));
114114
this.emit('metrics_collected', {
115115
type: 'stt_metrics',
116116
requestId: event.requestId ?? '',
117117
timestamp: Date.now(),
118-
duration,
118+
durationMs,
119119
label: this.label,
120-
audioDuration: calculateAudioDuration(frame),
120+
audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),
121121
streamed: false,
122122
});
123123
return event;
@@ -252,9 +252,9 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
252252
type: 'stt_metrics',
253253
timestamp: Date.now(),
254254
requestId: event.requestId!,
255-
duration: 0,
255+
durationMs: 0,
256256
label: this.#stt.label,
257-
audioDuration: event.recognitionUsage!.audioDuration,
257+
audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),
258258
streamed: true,
259259
};
260260
this.#stt.emit('metrics_collected', metrics);

agents/src/tts/tts.ts

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -228,22 +228,23 @@ export abstract class SynthesizeStream
228228

229229
protected async monitorMetrics() {
230230
const startTime = process.hrtime.bigint();
231-
let audioDuration = 0;
231+
let audioDurationMs = 0;
232232
let ttfb: bigint = BigInt(-1);
233233
let requestId = '';
234234

235235
const emit = () => {
236236
if (this.#metricsPendingTexts.length) {
237237
const text = this.#metricsPendingTexts.shift()!;
238238
const duration = process.hrtime.bigint() - startTime;
239+
const roundedAudioDurationMs = Math.round(audioDurationMs);
239240
const metrics: TTSMetrics = {
240241
type: 'tts_metrics',
241242
timestamp: Date.now(),
242243
requestId,
243-
ttfb: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),
244-
duration: Math.trunc(Number(duration / BigInt(1000000))),
244+
ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),
245+
durationMs: Math.trunc(Number(duration / BigInt(1000000))),
245246
charactersCount: text.length,
246-
audioDuration,
247+
audioDurationMs: roundedAudioDurationMs,
247248
cancelled: this.abortController.signal.aborted,
248249
label: this.#tts.label,
249250
streamed: false,
@@ -263,7 +264,7 @@ export abstract class SynthesizeStream
263264
ttfb = process.hrtime.bigint() - startTime;
264265
}
265266
// TODO(AJS-102): use frame.durationMs once available in rtc-node
266-
audioDuration += audio.frame.samplesPerChannel / audio.frame.sampleRate;
267+
audioDurationMs += (audio.frame.samplesPerChannel / audio.frame.sampleRate) * 1000;
267268
if (audio.final) {
268269
emit();
269270
}
@@ -436,7 +437,7 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
436437

437438
protected async monitorMetrics() {
438439
const startTime = process.hrtime.bigint();
439-
let audioDuration = 0;
440+
let audioDurationMs = 0;
440441
let ttfb: bigint = BigInt(-1);
441442
let requestId = '';
442443

@@ -446,7 +447,7 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
446447
if (ttfb === BigInt(-1)) {
447448
ttfb = process.hrtime.bigint() - startTime;
448449
}
449-
audioDuration += audio.frame.samplesPerChannel / audio.frame.sampleRate;
450+
audioDurationMs += (audio.frame.samplesPerChannel / audio.frame.sampleRate) * 1000;
450451
}
451452
this.output.close();
452453

@@ -455,10 +456,10 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
455456
type: 'tts_metrics',
456457
timestamp: Date.now(),
457458
requestId,
458-
ttfb: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),
459-
duration: Math.trunc(Number(duration / BigInt(1000000))),
459+
ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),
460+
durationMs: Math.trunc(Number(duration / BigInt(1000000))),
460461
charactersCount: this.#text.length,
461-
audioDuration,
462+
audioDurationMs: Math.round(audioDurationMs),
462463
cancelled: false, // TODO(AJS-186): support ChunkedStream with 1.0 - add this.abortController.signal.aborted here
463464
label: this.#tts.label,
464465
streamed: false,

agents/src/vad.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ export abstract class VADStream implements AsyncIterableIterator<VADEvent> {
139139
}
140140

141141
protected async monitorMetrics() {
142-
let inferenceDurationTotal = 0;
142+
let inferenceDurationTotalMs = 0;
143143
let inferenceCount = 0;
144144
const metricsReader = this.metricsStream.getReader();
145145
while (true) {
@@ -154,20 +154,20 @@ export abstract class VADStream implements AsyncIterableIterator<VADEvent> {
154154
this.#vad.emit('metrics_collected', {
155155
type: 'vad_metrics',
156156
timestamp: Date.now(),
157-
idleTime: Math.trunc(
157+
idleTimeMs: Math.trunc(
158158
Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1000000)),
159159
),
160-
inferenceDurationTotal,
160+
inferenceDurationTotalMs,
161161
inferenceCount,
162162
label: this.#vad.label,
163163
});
164164

165165
inferenceCount = 0;
166-
inferenceDurationTotal = 0;
166+
inferenceDurationTotalMs = 0;
167167
}
168168
break;
169169
case VADEventType.INFERENCE_DONE:
170-
inferenceDurationTotal += value.inferenceDuration;
170+
inferenceDurationTotalMs += Math.round(value.inferenceDuration);
171171
this.#lastActivityTime = process.hrtime.bigint();
172172
break;
173173
case VADEventType.END_OF_SPEECH:

agents/src/voice/agent_activity.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -984,9 +984,9 @@ export class AgentActivity implements RecognitionHooks {
984984
const eouMetrics: EOUMetrics = {
985985
type: 'eou_metrics',
986986
timestamp: Date.now(),
987-
endOfUtteranceDelay: info.endOfUtteranceDelay,
988-
transcriptionDelay: info.transcriptionDelay,
989-
onUserTurnCompletedDelay: callbackDuration,
987+
endOfUtteranceDelayMs: info.endOfUtteranceDelay,
988+
transcriptionDelayMs: info.transcriptionDelay,
989+
onUserTurnCompletedDelayMs: callbackDuration,
990990
speechId: speechHandle.id,
991991
};
992992

0 commit comments

Comments
 (0)