Skip to content

Commit 1e5bf3d

Browse files
fix(client): properly support srt and vtt in audio transcriptions. (#472)
* feat(api): adding srt and vtt support for audio transcriptions. * chore: refactoring * chore: fix import format --------- Co-authored-by: Tomer Aberbach <[email protected]>
1 parent 90371d7 commit 1e5bf3d

File tree

4 files changed

+84
-4
lines changed

4 files changed

+84
-4
lines changed

openai-java-core/src/main/kotlin/com/openai/models/audio/AudioResponseFormat.kt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,17 @@ class AudioResponseFormat @JsonCreator private constructor(private val value: Js
142142
*/
143143
@JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1
144144

145+
@JvmSynthetic
146+
internal fun isJson(): Boolean =
147+
when (this) {
148+
JSON -> true
149+
TEXT -> false
150+
SRT -> false
151+
VERBOSE_JSON -> true
152+
VTT -> false
153+
else -> false
154+
}
155+
145156
override fun equals(other: Any?): Boolean {
146157
if (this === other) {
147158
return true

openai-java-core/src/main/kotlin/com/openai/services/async/audio/TranscriptionServiceAsyncImpl.kt

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@ import com.openai.core.handlers.errorHandler
99
import com.openai.core.handlers.jsonHandler
1010
import com.openai.core.handlers.mapJson
1111
import com.openai.core.handlers.sseHandler
12+
import com.openai.core.handlers.stringHandler
1213
import com.openai.core.handlers.withErrorHandler
1314
import com.openai.core.http.AsyncStreamResponse
1415
import com.openai.core.http.HttpMethod
1516
import com.openai.core.http.HttpRequest
17+
import com.openai.core.http.HttpResponse
1618
import com.openai.core.http.HttpResponse.Handler
1719
import com.openai.core.http.HttpResponseFor
1820
import com.openai.core.http.StreamResponse
@@ -22,10 +24,12 @@ import com.openai.core.http.parseable
2224
import com.openai.core.http.toAsync
2325
import com.openai.core.prepareAsync
2426
import com.openai.models.ErrorObject
27+
import com.openai.models.audio.transcriptions.Transcription
2528
import com.openai.models.audio.transcriptions.TranscriptionCreateParams
2629
import com.openai.models.audio.transcriptions.TranscriptionCreateResponse
2730
import com.openai.models.audio.transcriptions.TranscriptionStreamEvent
2831
import java.util.concurrent.CompletableFuture
32+
import kotlin.jvm.optionals.getOrNull
2933

3034
class TranscriptionServiceAsyncImpl internal constructor(private val clientOptions: ClientOptions) :
3135
TranscriptionServiceAsync {
@@ -58,9 +62,19 @@ class TranscriptionServiceAsyncImpl internal constructor(private val clientOptio
5862

5963
private val errorHandler: Handler<ErrorObject?> = errorHandler(clientOptions.jsonMapper)
6064

61-
private val createHandler: Handler<TranscriptionCreateResponse> =
65+
private val createJsonHandler: Handler<TranscriptionCreateResponse> =
6266
jsonHandler<TranscriptionCreateResponse>(clientOptions.jsonMapper)
6367
.withErrorHandler(errorHandler)
68+
private val createStringHandler: Handler<TranscriptionCreateResponse> =
69+
object : Handler<TranscriptionCreateResponse> {
70+
71+
private val stringHandler = stringHandler().withErrorHandler(errorHandler)
72+
73+
override fun handle(response: HttpResponse): TranscriptionCreateResponse =
74+
TranscriptionCreateResponse.ofTranscription(
75+
Transcription.builder().text(stringHandler.handle(response)).build()
76+
)
77+
}
6478

6579
override fun create(
6680
params: TranscriptionCreateParams,
@@ -81,9 +95,13 @@ class TranscriptionServiceAsyncImpl internal constructor(private val clientOptio
8195
return request
8296
.thenComposeAsync { clientOptions.httpClient.executeAsync(it, requestOptions) }
8397
.thenApply { response ->
98+
val handler =
99+
if (params.responseFormat().getOrNull()?.isJson() != false)
100+
createJsonHandler
101+
else createStringHandler
84102
response.parseable {
85103
response
86-
.use { createHandler.handle(it) }
104+
.use { handler.handle(it) }
87105
.also {
88106
if (requestOptions.responseValidation!!) {
89107
it.validate()

openai-java-core/src/main/kotlin/com/openai/services/blocking/audio/TranscriptionServiceImpl.kt

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@ import com.openai.core.handlers.errorHandler
99
import com.openai.core.handlers.jsonHandler
1010
import com.openai.core.handlers.mapJson
1111
import com.openai.core.handlers.sseHandler
12+
import com.openai.core.handlers.stringHandler
1213
import com.openai.core.handlers.withErrorHandler
1314
import com.openai.core.http.HttpMethod
1415
import com.openai.core.http.HttpRequest
16+
import com.openai.core.http.HttpResponse
1517
import com.openai.core.http.HttpResponse.Handler
1618
import com.openai.core.http.HttpResponseFor
1719
import com.openai.core.http.StreamResponse
@@ -20,9 +22,11 @@ import com.openai.core.http.multipartFormData
2022
import com.openai.core.http.parseable
2123
import com.openai.core.prepare
2224
import com.openai.models.ErrorObject
25+
import com.openai.models.audio.transcriptions.Transcription
2326
import com.openai.models.audio.transcriptions.TranscriptionCreateParams
2427
import com.openai.models.audio.transcriptions.TranscriptionCreateResponse
2528
import com.openai.models.audio.transcriptions.TranscriptionStreamEvent
29+
import kotlin.jvm.optionals.getOrNull
2630

2731
class TranscriptionServiceImpl internal constructor(private val clientOptions: ClientOptions) :
2832
TranscriptionService {
@@ -52,9 +56,19 @@ class TranscriptionServiceImpl internal constructor(private val clientOptions: C
5256

5357
private val errorHandler: Handler<ErrorObject?> = errorHandler(clientOptions.jsonMapper)
5458

55-
private val createHandler: Handler<TranscriptionCreateResponse> =
59+
private val createJsonHandler: Handler<TranscriptionCreateResponse> =
5660
jsonHandler<TranscriptionCreateResponse>(clientOptions.jsonMapper)
5761
.withErrorHandler(errorHandler)
62+
private val createStringHandler: Handler<TranscriptionCreateResponse> =
63+
object : Handler<TranscriptionCreateResponse> {
64+
65+
private val stringHandler = stringHandler().withErrorHandler(errorHandler)
66+
67+
override fun handle(response: HttpResponse): TranscriptionCreateResponse =
68+
TranscriptionCreateResponse.ofTranscription(
69+
Transcription.builder().text(stringHandler.handle(response)).build()
70+
)
71+
}
5872

5973
override fun create(
6074
params: TranscriptionCreateParams,
@@ -70,8 +84,11 @@ class TranscriptionServiceImpl internal constructor(private val clientOptions: C
7084
val requestOptions = requestOptions.applyDefaults(RequestOptions.from(clientOptions))
7185
val response = clientOptions.httpClient.execute(request, requestOptions)
7286
return response.parseable {
87+
val handler =
88+
if (params.responseFormat().getOrNull()?.isJson() != false) createJsonHandler
89+
else createStringHandler
7390
response
74-
.use { createHandler.handle(it) }
91+
.use { handler.handle(it) }
7592
.also {
7693
if (requestOptions.responseValidation!!) {
7794
it.validate()
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package com.openai.example;
2+
3+
import com.openai.client.OpenAIClient;
4+
import com.openai.client.okhttp.OpenAIOkHttpClient;
5+
import com.openai.models.audio.AudioModel;
6+
import com.openai.models.audio.AudioResponseFormat;
7+
import com.openai.models.audio.transcriptions.Transcription;
8+
import com.openai.models.audio.transcriptions.TranscriptionCreateParams;
9+
import java.nio.file.Path;
10+
import java.nio.file.Paths;
11+
12+
public final class AudioTranscriptionsVttExample {
13+
private AudioTranscriptionsVttExample() {}
14+
15+
public static void main(String[] args) throws Exception {
16+
// Configures using one of:
17+
// - The `OPENAI_API_KEY` environment variable
18+
// - The `OPENAI_BASE_URL` and `AZURE_OPENAI_KEY` environment variables
19+
OpenAIClient client = OpenAIOkHttpClient.fromEnv();
20+
21+
ClassLoader classloader = Thread.currentThread().getContextClassLoader();
22+
Path path = Paths.get(classloader.getResource("sports.wav").toURI());
23+
24+
TranscriptionCreateParams createParams = TranscriptionCreateParams.builder()
25+
.file(path)
26+
.model(AudioModel.WHISPER_1)
27+
.responseFormat(AudioResponseFormat.VTT)
28+
.build();
29+
30+
Transcription transcription =
31+
client.audio().transcriptions().create(createParams).asTranscription();
32+
System.out.println(transcription.text());
33+
}
34+
}

0 commit comments

Comments
 (0)