@@ -78,6 +78,11 @@ export interface Transcription {
78
78
* to the `include` array.
79
79
*/
80
80
logprobs ?: Array < Transcription . Logprob > ;
81
+
82
+ /**
83
+ * Token usage statistics for the request.
84
+ */
85
+ usage ?: Transcription . Tokens | Transcription . Duration ;
81
86
}
82
87
83
88
export namespace Transcription {
@@ -97,6 +102,68 @@ export namespace Transcription {
97
102
*/
98
103
logprob ?: number ;
99
104
}
105
+
106
+ /**
107
+ * Usage statistics for models billed by token usage.
108
+ */
109
+ export interface Tokens {
110
+ /**
111
+ * Number of input tokens billed for this request.
112
+ */
113
+ input_tokens : number ;
114
+
115
+ /**
116
+ * Number of output tokens generated.
117
+ */
118
+ output_tokens : number ;
119
+
120
+ /**
121
+ * Total number of tokens used (input + output).
122
+ */
123
+ total_tokens : number ;
124
+
125
+ /**
126
+ * The type of the usage object. Always `tokens` for this variant.
127
+ */
128
+ type : 'tokens' ;
129
+
130
+ /**
131
+ * Details about the input tokens billed for this request.
132
+ */
133
+ input_token_details ?: Tokens . InputTokenDetails ;
134
+ }
135
+
136
+ export namespace Tokens {
137
+ /**
138
+ * Details about the input tokens billed for this request.
139
+ */
140
+ export interface InputTokenDetails {
141
+ /**
142
+ * Number of audio tokens billed for this request.
143
+ */
144
+ audio_tokens ?: number ;
145
+
146
+ /**
147
+ * Number of text tokens billed for this request.
148
+ */
149
+ text_tokens ?: number ;
150
+ }
151
+ }
152
+
153
+ /**
154
+ * Usage statistics for models billed by audio input duration.
155
+ */
156
+ export interface Duration {
157
+ /**
158
+ * Duration of the input audio in seconds.
159
+ */
160
+ duration : number ;
161
+
162
+ /**
163
+ * The type of the usage object. Always `duration` for this variant.
164
+ */
165
+ type : 'duration' ;
166
+ }
100
167
}
101
168
102
169
export type TranscriptionInclude = 'logprobs' ;
@@ -232,6 +299,11 @@ export interface TranscriptionTextDoneEvent {
232
299
* with the `include[]` parameter set to `logprobs`.
233
300
*/
234
301
logprobs ?: Array < TranscriptionTextDoneEvent . Logprob > ;
302
+
303
+ /**
304
+ * Usage statistics for models billed by token usage.
305
+ */
306
+ usage ?: TranscriptionTextDoneEvent . Usage ;
235
307
}
236
308
237
309
export namespace TranscriptionTextDoneEvent {
@@ -251,6 +323,53 @@ export namespace TranscriptionTextDoneEvent {
251
323
*/
252
324
logprob ?: number ;
253
325
}
326
+
327
+ /**
328
+ * Usage statistics for models billed by token usage.
329
+ */
330
+ export interface Usage {
331
+ /**
332
+ * Number of input tokens billed for this request.
333
+ */
334
+ input_tokens : number ;
335
+
336
+ /**
337
+ * Number of output tokens generated.
338
+ */
339
+ output_tokens : number ;
340
+
341
+ /**
342
+ * Total number of tokens used (input + output).
343
+ */
344
+ total_tokens : number ;
345
+
346
+ /**
347
+ * The type of the usage object. Always `tokens` for this variant.
348
+ */
349
+ type : 'tokens' ;
350
+
351
+ /**
352
+ * Details about the input tokens billed for this request.
353
+ */
354
+ input_token_details ?: Usage . InputTokenDetails ;
355
+ }
356
+
357
+ export namespace Usage {
358
+ /**
359
+ * Details about the input tokens billed for this request.
360
+ */
361
+ export interface InputTokenDetails {
362
+ /**
363
+ * Number of audio tokens billed for this request.
364
+ */
365
+ audio_tokens ?: number ;
366
+
367
+ /**
368
+ * Number of text tokens billed for this request.
369
+ */
370
+ text_tokens ?: number ;
371
+ }
372
+ }
254
373
}
255
374
256
375
/**
@@ -278,12 +397,34 @@ export interface TranscriptionVerbose {
278
397
*/
279
398
segments ?: Array < TranscriptionSegment > ;
280
399
400
+ /**
401
+ * Usage statistics for models billed by audio input duration.
402
+ */
403
+ usage ?: TranscriptionVerbose . Usage ;
404
+
281
405
/**
282
406
* Extracted words and their corresponding timestamps.
283
407
*/
284
408
words ?: Array < TranscriptionWord > ;
285
409
}
286
410
411
+ export namespace TranscriptionVerbose {
412
+ /**
413
+ * Usage statistics for models billed by audio input duration.
414
+ */
415
+ export interface Usage {
416
+ /**
417
+ * Duration of the input audio in seconds.
418
+ */
419
+ duration : number ;
420
+
421
+ /**
422
+ * The type of the usage object. Always `duration` for this variant.
423
+ */
424
+ type : 'duration' ;
425
+ }
426
+ }
427
+
287
428
export interface TranscriptionWord {
288
429
/**
289
430
* End time of the word in seconds.
0 commit comments