@@ -78,6 +78,11 @@ export interface Transcription {
7878 * to the `include` array.
7979 */
8080 logprobs ?: Array < Transcription . Logprob > ;
81+
82+ /**
83+ * Token usage statistics for the request.
84+ */
85+ usage ?: Transcription . Tokens | Transcription . Duration ;
8186}
8287
8388export namespace Transcription {
@@ -97,6 +102,68 @@ export namespace Transcription {
97102 */
98103 logprob ?: number ;
99104 }
105+
106+ /**
107+ * Usage statistics for models billed by token usage.
108+ */
109+ export interface Tokens {
110+ /**
111+ * Number of input tokens billed for this request.
112+ */
113+ input_tokens : number ;
114+
115+ /**
116+ * Number of output tokens generated.
117+ */
118+ output_tokens : number ;
119+
120+ /**
121+ * Total number of tokens used (input + output).
122+ */
123+ total_tokens : number ;
124+
125+ /**
126+ * The type of the usage object. Always `tokens` for this variant.
127+ */
128+ type : 'tokens' ;
129+
130+ /**
131+ * Details about the input tokens billed for this request.
132+ */
133+ input_token_details ?: Tokens . InputTokenDetails ;
134+ }
135+
136+ export namespace Tokens {
137+ /**
138+ * Details about the input tokens billed for this request.
139+ */
140+ export interface InputTokenDetails {
141+ /**
142+ * Number of audio tokens billed for this request.
143+ */
144+ audio_tokens ?: number ;
145+
146+ /**
147+ * Number of text tokens billed for this request.
148+ */
149+ text_tokens ?: number ;
150+ }
151+ }
152+
153+ /**
154+ * Usage statistics for models billed by audio input duration.
155+ */
156+ export interface Duration {
157+ /**
158+ * Duration of the input audio in seconds.
159+ */
160+ duration : number ;
161+
162+ /**
163+ * The type of the usage object. Always `duration` for this variant.
164+ */
165+ type : 'duration' ;
166+ }
100167}
101168
102169export type TranscriptionInclude = 'logprobs' ;
@@ -232,6 +299,11 @@ export interface TranscriptionTextDoneEvent {
232299 * with the `include[]` parameter set to `logprobs`.
233300 */
234301 logprobs ?: Array < TranscriptionTextDoneEvent . Logprob > ;
302+
303+ /**
304+ * Usage statistics for models billed by token usage.
305+ */
306+ usage ?: TranscriptionTextDoneEvent . Usage ;
235307}
236308
237309export namespace TranscriptionTextDoneEvent {
@@ -251,6 +323,53 @@ export namespace TranscriptionTextDoneEvent {
251323 */
252324 logprob ?: number ;
253325 }
326+
327+ /**
328+ * Usage statistics for models billed by token usage.
329+ */
330+ export interface Usage {
331+ /**
332+ * Number of input tokens billed for this request.
333+ */
334+ input_tokens : number ;
335+
336+ /**
337+ * Number of output tokens generated.
338+ */
339+ output_tokens : number ;
340+
341+ /**
342+ * Total number of tokens used (input + output).
343+ */
344+ total_tokens : number ;
345+
346+ /**
347+ * The type of the usage object. Always `tokens` for this variant.
348+ */
349+ type : 'tokens' ;
350+
351+ /**
352+ * Details about the input tokens billed for this request.
353+ */
354+ input_token_details ?: Usage . InputTokenDetails ;
355+ }
356+
357+ export namespace Usage {
358+ /**
359+ * Details about the input tokens billed for this request.
360+ */
361+ export interface InputTokenDetails {
362+ /**
363+ * Number of audio tokens billed for this request.
364+ */
365+ audio_tokens ?: number ;
366+
367+ /**
368+ * Number of text tokens billed for this request.
369+ */
370+ text_tokens ?: number ;
371+ }
372+ }
254373}
255374
256375/**
@@ -278,12 +397,34 @@ export interface TranscriptionVerbose {
278397 */
279398 segments ?: Array < TranscriptionSegment > ;
280399
400+ /**
401+ * Usage statistics for models billed by audio input duration.
402+ */
403+ usage ?: TranscriptionVerbose . Usage ;
404+
281405 /**
282406 * Extracted words and their corresponding timestamps.
283407 */
284408 words ?: Array < TranscriptionWord > ;
285409}
286410
411+ export namespace TranscriptionVerbose {
412+ /**
413+ * Usage statistics for models billed by audio input duration.
414+ */
415+ export interface Usage {
416+ /**
417+ * Duration of the input audio in seconds.
418+ */
419+ duration : number ;
420+
421+ /**
422+ * The type of the usage object. Always `duration` for this variant.
423+ */
424+ type : 'duration' ;
425+ }
426+ }
427+
287428export interface TranscriptionWord {
288429 /**
289430 * End time of the word in seconds.
0 commit comments