Skip to content

Commit edca997

Browse files
make to receive voice option as a string
Signed-off-by: jonghoon park <[email protected]>
1 parent 8329402 commit edca997

File tree

10 files changed

+60
-43
lines changed

10 files changed

+60
-43
lines changed

auto-configurations/models/spring-ai-autoconfigure-model-openai/src/main/java/org/springframework/ai/model/openai/autoconfigure/OpenAiAudioSpeechProperties.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
*
3030
* @author Ahmed Yousri
3131
* @author Stefan Vassilev
32+
* @author Jonghoon Park
3233
*/
3334
@ConfigurationProperties(OpenAiAudioSpeechProperties.CONFIG_PREFIX)
3435
public class OpenAiAudioSpeechProperties extends OpenAiParentProperties {
@@ -39,7 +40,7 @@ public class OpenAiAudioSpeechProperties extends OpenAiParentProperties {
3940

4041
private static final Float SPEED = 1.0f;
4142

42-
private static final OpenAiAudioApi.SpeechRequest.Voice VOICE = OpenAiAudioApi.SpeechRequest.Voice.ALLOY;
43+
private static final String VOICE = OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue();
4344

4445
private static final OpenAiAudioApi.SpeechRequest.AudioResponseFormat DEFAULT_RESPONSE_FORMAT = OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3;
4546

auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiPropertiesTests.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -40,6 +40,7 @@
4040
*
4141
* @author Christian Tzolov
4242
* @author Thomas Vitale
43+
* @author Jonghoon Park
4344
* @since 0.8.0
4445
*/
4546
public class OpenAiPropertiesTests {
@@ -177,7 +178,7 @@ public void speechProperties() {
177178

178179
assertThat(speechProperties.getOptions().getModel()).isEqualTo("TTS_1");
179180
assertThat(speechProperties.getOptions().getVoice())
180-
.isEqualTo(OpenAiAudioApi.SpeechRequest.Voice.ALLOY);
181+
.isEqualTo(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue());
181182
assertThat(speechProperties.getOptions().getResponseFormat())
182183
.isEqualTo(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3);
183184
assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(0.75f);
@@ -205,7 +206,7 @@ public void speechPropertiesTest() {
205206

206207
assertThat(speechProperties.getOptions().getModel()).isEqualTo("TTS_1");
207208
assertThat(speechProperties.getOptions().getVoice())
208-
.isEqualTo(OpenAiAudioApi.SpeechRequest.Voice.ALLOY);
209+
.isEqualTo(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue());
209210
assertThat(speechProperties.getOptions().getResponseFormat())
210211
.isEqualTo(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3);
211212
assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(0.75f);
@@ -237,7 +238,8 @@ public void speechOverrideConnectionPropertiesTest() {
237238
assertThat(speechProperties.getBaseUrl()).isEqualTo("TEST_BASE_URL2");
238239

239240
assertThat(speechProperties.getOptions().getModel()).isEqualTo("TTS_2");
240-
assertThat(speechProperties.getOptions().getVoice()).isEqualTo(OpenAiAudioApi.SpeechRequest.Voice.ECHO);
241+
assertThat(speechProperties.getOptions().getVoice())
242+
.isEqualTo(OpenAiAudioApi.SpeechRequest.Voice.ECHO.getValue());
241243
assertThat(speechProperties.getOptions().getResponseFormat())
242244
.isEqualTo(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.OPUS);
243245
assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(0.5f);

models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechModel.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -42,6 +42,7 @@
4242
* @author Ahmed Yousri
4343
* @author Hyunjoon Choi
4444
* @author Thomas Vitale
45+
* @author Jonghoon Park
4546
* @see OpenAiAudioApi
4647
* @since 1.0.0-M1
4748
*/
@@ -81,7 +82,7 @@ public OpenAiAudioSpeechModel(OpenAiAudioApi audioApi) {
8182
OpenAiAudioSpeechOptions.builder()
8283
.model(OpenAiAudioApi.TtsModel.TTS_1.getValue())
8384
.responseFormat(AudioResponseFormat.MP3)
84-
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY)
85+
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue())
8586
.speed(SPEED)
8687
.build());
8788
}

models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechOptions.java

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -29,14 +29,15 @@
2929
* @author Ahmed Yousri
3030
* @author Hyunjoon Choi
3131
* @author Ilayaperumal Gopinathan
32+
* @author Jonghoon Park
3233
* @since 1.0.0-M1
3334
*/
3435
@JsonInclude(JsonInclude.Include.NON_NULL)
3536
public class OpenAiAudioSpeechOptions implements ModelOptions {
3637

3738
/**
38-
* ID of the model to use for generating the audio. One of the available TTS models:
39-
* tts-1 or tts-1-hd.
39+
* ID of the model to use for generating the audio. For OpenAI's TTS API, use one of
40+
* the available models: tts-1 or tts-1-hd.
4041
*/
4142
@JsonProperty("model")
4243
private String model;
@@ -48,11 +49,11 @@ public class OpenAiAudioSpeechOptions implements ModelOptions {
4849
private String input;
4950

5051
/**
51-
* The voice to use for synthesis. One of the available voices for the chosen model:
52-
* 'alloy', 'echo', 'fable', 'onyx', 'nova', and 'shimmer'.
52+
* The voice to use for synthesis. For OpenAI's TTS API, One of the available voices
53+
* for the chosen model: 'alloy', 'echo', 'fable', 'onyx', 'nova', and 'shimmer'.
5354
*/
5455
@JsonProperty("voice")
55-
private Voice voice;
56+
private String voice;
5657

5758
/**
5859
* The format of the audio output. Supported formats are mp3, opus, aac, and flac.
@@ -88,11 +89,11 @@ public void setInput(String input) {
8889
this.input = input;
8990
}
9091

91-
public Voice getVoice() {
92+
public String getVoice() {
9293
return this.voice;
9394
}
9495

95-
public void setVoice(Voice voice) {
96+
public void setVoice(String voice) {
9697
this.voice = voice;
9798
}
9899

@@ -197,7 +198,7 @@ public Builder input(String input) {
197198
return this;
198199
}
199200

200-
public Builder voice(Voice voice) {
201+
public Builder voice(String voice) {
201202
this.options.voice = voice;
202203
return this;
203204
}

models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiAudioApi.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -47,6 +47,7 @@
4747
*
4848
* @author Christian Tzolov
4949
* @author Ilayaperumal Gopinathan
50+
* @author Jonghoon Park
5051
* @since 0.8.1
5152
*/
5253
public class OpenAiAudioApi {
@@ -330,7 +331,7 @@ public record SpeechRequest(
330331
// @formatter:off
331332
@JsonProperty("model") String model,
332333
@JsonProperty("input") String input,
333-
@JsonProperty("voice") Voice voice,
334+
@JsonProperty("voice") String voice,
334335
@JsonProperty("response_format") AudioResponseFormat responseFormat,
335336
@JsonProperty("speed") Float speed) {
336337
// @formatter:on
@@ -415,7 +416,7 @@ public static class Builder {
415416

416417
private String input;
417418

418-
private Voice voice;
419+
private String voice;
419420

420421
private AudioResponseFormat responseFormat = AudioResponseFormat.MP3;
421422

@@ -431,7 +432,7 @@ public Builder input(String input) {
431432
return this;
432433
}
433434

434-
public Builder voice(Voice voice) {
435+
public Builder voice(String voice) {
435436
this.voice = voice;
436437
return this;
437438
}

models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/api/OpenAiAudioApiIT.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -37,6 +37,7 @@
3737

3838
/**
3939
* @author Christian Tzolov
40+
* @author Jonghoon Park
4041
*/
4142
@EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = ".+")
4243
public class OpenAiAudioApiIT {
@@ -53,7 +54,7 @@ void speechTranscriptionAndTranslation() throws IOException {
5354
.createSpeech(SpeechRequest.builder()
5455
.model(TtsModel.TTS_1_HD.getValue())
5556
.input("Hello, my name is Chris and I love Spring A.I.")
56-
.voice(Voice.ONYX)
57+
.voice(Voice.ONYX.getValue())
5758
.build())
5859
.getBody();
5960

models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/api/OpenAiAudioModelNoOpApiKeysIT.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
/**
3333
* @author Ilayaperumal Gopinathan
34+
* @author Jonghoon Park
3435
*/
3536
@SpringBootTest(classes = OpenAiAudioModelNoOpApiKeysIT.Config.class)
3637
@EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = ".+")
@@ -46,7 +47,7 @@ void checkNoOpKey() {
4647
.createSpeech(OpenAiAudioApi.SpeechRequest.builder()
4748
.model(OpenAiAudioApi.TtsModel.TTS_1_HD.getValue())
4849
.input("Hello, my name is Chris and I love Spring A.I.")
49-
.voice(OpenAiAudioApi.SpeechRequest.Voice.ONYX)
50+
.voice(OpenAiAudioApi.SpeechRequest.Voice.ONYX.getValue())
5051
.build())
5152
.getBody();
5253
}).isInstanceOf(NonTransientAiException.class);

models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelIT.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -33,6 +33,10 @@
3333

3434
import static org.assertj.core.api.Assertions.assertThat;
3535

36+
/**
37+
* @author Ahmed Yousri
38+
* @author Jonghoon Park
39+
*/
3640
@SpringBootTest(classes = OpenAiTestConfiguration.class)
3741
@EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = ".+")
3842
class OpenAiSpeechModelIT extends AbstractIT {
@@ -57,7 +61,7 @@ void shouldProduceAudioBytesDirectlyFromMessage() {
5761
@Test
5862
void shouldGenerateNonEmptyMp3AudioFromSpeechPrompt() {
5963
OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
60-
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY)
64+
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue())
6165
.speed(SPEED)
6266
.responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
6367
.model(OpenAiAudioApi.TtsModel.TTS_1.value)
@@ -75,7 +79,7 @@ void shouldGenerateNonEmptyMp3AudioFromSpeechPrompt() {
7579
@Test
7680
void speechRateLimitTest() {
7781
OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
78-
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY)
82+
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue())
7983
.speed(SPEED)
8084
.responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
8185
.model(OpenAiAudioApi.TtsModel.TTS_1.value)
@@ -95,7 +99,7 @@ void speechRateLimitTest() {
9599
void shouldStreamNonEmptyResponsesForValidSpeechPrompts() {
96100

97101
OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
98-
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY)
102+
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue())
99103
.speed(SPEED)
100104
.responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
101105
.model(OpenAiAudioApi.TtsModel.TTS_1.value)
@@ -117,7 +121,7 @@ void shouldStreamNonEmptyResponsesForValidSpeechPrompts() {
117121
@ValueSource(strings = { "alloy", "echo", "fable", "onyx", "nova", "shimmer", "sage", "coral", "ash" })
118122
void speechVoicesTest(String voice) {
119123
OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
120-
.voice(OpenAiAudioApi.SpeechRequest.Voice.valueOf(voice.toUpperCase()))
124+
.voice(voice)
121125
.speed(SPEED)
122126
.responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
123127
.model(OpenAiAudioApi.TtsModel.TTS_1.value)

models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelWithSpeechResponseMetadataTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646

4747
/**
4848
* @author Ahmed Yousri
49+
* @author Jonghoon Park
4950
*/
5051
@RestClientTest(OpenAiSpeechModelWithSpeechResponseMetadataTests.Config.class)
5152
public class OpenAiSpeechModelWithSpeechResponseMetadataTests {
@@ -71,7 +72,7 @@ void aiResponseContainsImageResponseMetadata() {
7172
prepareMock();
7273

7374
OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
74-
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY)
75+
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue())
7576
.speed(SPEED)
7677
.responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
7778
.model(OpenAiAudioApi.TtsModel.TTS_1.value)

spring-ai-docs/src/main/antora/modules/ROOT/pages/api/audio/speech/openai-speech.adoc

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ The prefix `spring.ai.openai.audio.speech` is used as the property prefix that l
8585
| spring.ai.openai.audio.speech.api-key | The API Key | -
8686
| spring.ai.openai.audio.speech.organization-id | Optionally you can specify which organization used for an API request. | -
8787
| spring.ai.openai.audio.speech.project-id | Optionally, you can specify which project is used for an API request. | -
88-
| spring.ai.openai.audio.speech.options.model | ID of the model to use. Only tts-1 is currently available. | tts-1
89-
| spring.ai.openai.audio.speech.options.voice | The voice to use for the TTS output. Available options are: alloy, echo, fable, onyx, nova, and shimmer. | alloy
88+
| spring.ai.openai.audio.speech.options.model | ID of the model to use for generating the audio. For OpenAI's TTS API, use one of the available models: tts-1 or tts-1-hd. | tts-1
89+
| spring.ai.openai.audio.speech.options.voice | The voice to use for synthesis. For OpenAI's TTS API, One of the available voices for the chosen model: alloy, echo, fable, onyx, nova, and shimmer. | alloy
9090
| spring.ai.openai.audio.speech.options.response-format | The format of the audio output. Supported formats are mp3, opus, aac, flac, wav, and pcm. | mp3
9191
| spring.ai.openai.audio.speech.options.speed | The speed of the voice synthesis. The acceptable range is from 0.25 (slowest) to 4.0 (fastest). | 1.0
9292
|====
@@ -113,8 +113,8 @@ OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
113113
.speed(1.0f)
114114
.build();
115115
116-
SpeechPrompt speechPrompt = new SpeechPrompt("Hello, this is a text-to-speech example.", this.speechOptions);
117-
SpeechResponse response = openAiAudioSpeechModel.call(this.speechPrompt);
116+
SpeechPrompt speechPrompt = new SpeechPrompt("Hello, this is a text-to-speech example.", speechOptions);
117+
SpeechResponse response = openAiAudioSpeechModel.call(speechPrompt);
118118
----
119119

120120
== Manual Configuration
@@ -144,23 +144,25 @@ Next, create an `OpenAiAudioSpeechModel`:
144144

145145
[source,java]
146146
----
147-
var openAiAudioApi = new OpenAiAudioApi(System.getenv("OPENAI_API_KEY"));
147+
var openAiAudioApi = new OpenAiAudioApi()
148+
.apiKey(System.getenv("OPENAI_API_KEY"))
149+
.build();
148150
149-
var openAiAudioSpeechModel = new OpenAiAudioSpeechModel(this.openAiAudioApi);
151+
var openAiAudioSpeechModel = new OpenAiAudioSpeechModel(openAiAudioApi);
150152
151153
var speechOptions = OpenAiAudioSpeechOptions.builder()
152154
.responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
153155
.speed(1.0f)
154156
.model(OpenAiAudioApi.TtsModel.TTS_1.value)
155157
.build();
156158
157-
var speechPrompt = new SpeechPrompt("Hello, this is a text-to-speech example.", this.speechOptions);
158-
SpeechResponse response = this.openAiAudioSpeechModel.call(this.speechPrompt);
159+
var speechPrompt = new SpeechPrompt("Hello, this is a text-to-speech example.", speechOptions);
160+
SpeechResponse response = openAiAudioSpeechModel.call(speechPrompt);
159161
160162
// Accessing metadata (rate limit info)
161-
OpenAiAudioSpeechResponseMetadata metadata = this.response.getMetadata();
163+
OpenAiAudioSpeechResponseMetadata metadata = response.getMetadata();
162164
163-
byte[] responseAsBytes = this.response.getResult().getOutput();
165+
byte[] responseAsBytes = response.getResult().getOutput();
164166
----
165167

166168
== Streaming Real-time Audio
@@ -169,9 +171,11 @@ The Speech API provides support for real-time audio streaming using chunk transf
169171

170172
[source,java]
171173
----
172-
var openAiAudioApi = new OpenAiAudioApi(System.getenv("OPENAI_API_KEY"));
174+
var openAiAudioApi = new OpenAiAudioApi()
175+
.apiKey(System.getenv("OPENAI_API_KEY"))
176+
.build();
173177
174-
var openAiAudioSpeechModel = new OpenAiAudioSpeechModel(this.openAiAudioApi);
178+
var openAiAudioSpeechModel = new OpenAiAudioSpeechModel(openAiAudioApi);
175179
176180
OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
177181
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY)
@@ -180,9 +184,9 @@ OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
180184
.model(OpenAiAudioApi.TtsModel.TTS_1.value)
181185
.build();
182186
183-
SpeechPrompt speechPrompt = new SpeechPrompt("Today is a wonderful day to build something people love!", this.speechOptions);
187+
SpeechPrompt speechPrompt = new SpeechPrompt("Today is a wonderful day to build something people love!", speechOptions);
184188
185-
Flux<SpeechResponse> responseStream = this.openAiAudioSpeechModel.stream(this.speechPrompt);
189+
Flux<SpeechResponse> responseStream = openAiAudioSpeechModel.stream(speechPrompt);
186190
----
187191

188192
== Example Code

0 commit comments

Comments
 (0)