1919import org .apache .commons .lang3 .StringUtils ;
2020import org .slf4j .Logger ;
2121import org .slf4j .LoggerFactory ;
22+ import org .springframework .ai .audio .speech .Speech ;
23+ import org .springframework .ai .audio .speech .SpeechModel ;
24+ import org .springframework .ai .audio .speech .SpeechOptions ;
25+ import org .springframework .ai .audio .speech .SpeechPrompt ;
26+ import org .springframework .ai .audio .speech .SpeechResponse ;
27+ import org .springframework .ai .audio .speech .StreamingSpeechModel ;
2228import org .springframework .ai .chat .metadata .RateLimit ;
29+ import org .springframework .ai .model .ModelOptionsUtils ;
2330import org .springframework .ai .openai .api .OpenAiAudioApi ;
2431import org .springframework .ai .openai .api .OpenAiAudioApi .SpeechRequest .AudioResponseFormat ;
25- import org .springframework .ai .openai .audio .speech .Speech ;
26- import org .springframework .ai .openai .audio .speech .SpeechModel ;
27- import org .springframework .ai .openai .audio .speech .SpeechPrompt ;
28- import org .springframework .ai .openai .audio .speech .SpeechResponse ;
29- import org .springframework .ai .openai .audio .speech .StreamingSpeechModel ;
3032import org .springframework .ai .openai .metadata .audio .OpenAiAudioSpeechResponseMetadata ;
3133import org .springframework .ai .openai .metadata .support .OpenAiResponseHeaderExtractor ;
3234import org .springframework .ai .retry .RetryUtils ;
3335import org .springframework .http .ResponseEntity ;
36+ import org .springframework .lang .Nullable ;
3437import org .springframework .retry .support .RetryTemplate ;
3538import org .springframework .util .Assert ;
3639import reactor .core .publisher .Flux ;
3740
3841/**
39- * OpenAI audio speech client implementation for backed by {@link OpenAiAudioApi}.
42+ * OpenAI audio speech client implementation backed by {@link OpenAiAudioApi}.
4043 *
4144 * @author Ahmed Yousri
4245 * @author Hyunjoon Choi
4346 * @author Thomas Vitale
4447 * @see OpenAiAudioApi
45- * @since 1.0.0-M1
48+ * @since 1.0.0
4649 */
4750public class OpenAiAudioSpeechModel implements SpeechModel , StreamingSpeechModel {
4851
49- private final Logger logger = LoggerFactory .getLogger (getClass () );
52+ private final static Logger logger = LoggerFactory .getLogger (OpenAiAudioSpeechModel . class );
5053
5154 /**
5255 * The default options used for the audio completion requests.
@@ -114,16 +117,10 @@ public OpenAiAudioSpeechModel(OpenAiAudioApi audioApi, OpenAiAudioSpeechOptions
114117 this .retryTemplate = retryTemplate ;
115118 }
116119
117- @ Override
118- public byte [] call (String text ) {
119- SpeechPrompt speechRequest = new SpeechPrompt (text );
120- return call (speechRequest ).getResult ().getOutput ();
121- }
122-
123120 @ Override
124121 public SpeechResponse call (SpeechPrompt speechPrompt ) {
125-
126- OpenAiAudioApi .SpeechRequest speechRequest = createRequest (speechPrompt );
122+ OpenAiAudioSpeechOptions requestSpeechOptions = mergeOptions ( speechPrompt . getOptions (), this . defaultOptions );
123+ OpenAiAudioApi .SpeechRequest speechRequest = createRequest (speechPrompt , requestSpeechOptions );
127124
128125 ResponseEntity <byte []> speechEntity = this .retryTemplate
129126 .execute (ctx -> this .audioApi .createSpeech (speechRequest ));
@@ -149,53 +146,54 @@ public SpeechResponse call(SpeechPrompt speechPrompt) {
149146 */
150147 @ Override
151148 public Flux <SpeechResponse > stream (SpeechPrompt speechPrompt ) {
149+ OpenAiAudioSpeechOptions requestSpeechOptions = mergeOptions (speechPrompt .getOptions (), this .defaultOptions );
150+ OpenAiAudioApi .SpeechRequest speechRequest = createRequest (speechPrompt , requestSpeechOptions );
152151
153- OpenAiAudioApi .SpeechRequest speechRequest = createRequest (speechPrompt );
154-
155- Flux <ResponseEntity <byte []>> speechEntity = this .retryTemplate
156- .execute (ctx -> this .audioApi .stream (speechRequest ));
152+ Flux <ResponseEntity <byte []>> speechEntity = this .audioApi .stream (speechRequest );
157153
158- return speechEntity .map (entity -> new SpeechResponse (new Speech (entity .getBody ()),
154+ return speechEntity .map (entity -> new SpeechResponse (
155+ new Speech (entity .getBody () != null ? entity .getBody () : new byte [0 ]),
159156 new OpenAiAudioSpeechResponseMetadata (OpenAiResponseHeaderExtractor .extractAiResponseHeaders (entity ))));
160157 }
161158
162- private OpenAiAudioApi .SpeechRequest createRequest (SpeechPrompt request ) {
163- OpenAiAudioSpeechOptions options = this .defaultOptions ;
164-
165- if (request .getOptions () != null ) {
166- if (request .getOptions () instanceof OpenAiAudioSpeechOptions runtimeOptions ) {
167- options = this .merge (runtimeOptions , options );
168- }
169- else {
170- throw new IllegalArgumentException ("Prompt options are not of type SpeechOptions: "
171- + request .getOptions ().getClass ().getSimpleName ());
172- }
173- }
174-
175- String input = StringUtils .isNotBlank (options .getInput ()) ? options .getInput ()
159+ private OpenAiAudioApi .SpeechRequest createRequest (SpeechPrompt request ,
160+ OpenAiAudioSpeechOptions requestSpeechOptions ) {
161+ String input = StringUtils .isNotBlank (requestSpeechOptions .getInput ()) ? requestSpeechOptions .getInput ()
176162 : request .getInstructions ().getText ();
177163
178164 OpenAiAudioApi .SpeechRequest .Builder requestBuilder = OpenAiAudioApi .SpeechRequest .builder ()
179- .withModel (options .getModel ())
165+ .withModel (requestSpeechOptions .getModel ())
180166 .withInput (input )
181- .withVoice ( options . getVoice ())
182- .withResponseFormat ( options . getResponseFormat ())
183- .withSpeed ( options . getSpeed ());
167+ .withResponseFormat ( requestSpeechOptions . getResponseFormat ())
168+ .withSpeed ( requestSpeechOptions . getSpeed ())
169+ .withVoice ( requestSpeechOptions . getVoice ());
184170
185171 return requestBuilder .build ();
186172 }
187173
188- private OpenAiAudioSpeechOptions merge (OpenAiAudioSpeechOptions source , OpenAiAudioSpeechOptions target ) {
189- OpenAiAudioSpeechOptions .Builder mergedBuilder = OpenAiAudioSpeechOptions .builder ();
174+ /**
175+ * Merge runtime and default {@link SpeechOptions} to compute the final options to use
176+ * in the request.
177+ */
178+ private OpenAiAudioSpeechOptions mergeOptions (@ Nullable SpeechOptions runtimeOptions ,
179+ OpenAiAudioSpeechOptions defaultOptions ) {
180+ var runtimeOptionsForProvider = ModelOptionsUtils .copyToTarget (runtimeOptions , SpeechOptions .class ,
181+ OpenAiAudioSpeechOptions .class );
190182
191- mergedBuilder .withModel (source .getModel () != null ? source .getModel () : target .getModel ());
192- mergedBuilder .withInput (source .getInput () != null ? source .getInput () : target .getInput ());
193- mergedBuilder .withVoice (source .getVoice () != null ? source .getVoice () : target .getVoice ());
194- mergedBuilder .withResponseFormat (
195- source .getResponseFormat () != null ? source .getResponseFormat () : target .getResponseFormat ());
196- mergedBuilder .withSpeed (source .getSpeed () != null ? source .getSpeed () : target .getSpeed ());
183+ if (runtimeOptionsForProvider == null ) {
184+ return defaultOptions ;
185+ }
197186
198- return mergedBuilder .build ();
187+ return OpenAiAudioSpeechOptions .builder ()
188+ // Handle portable options
189+ .withModel (ModelOptionsUtils .mergeOption (runtimeOptionsForProvider .getModel (), defaultOptions .getModel ()))
190+ // Handle OpenAI specific options
191+ .withInput (ModelOptionsUtils .mergeOption (runtimeOptionsForProvider .getInput (), defaultOptions .getInput ()))
192+ .withResponseFormat (ModelOptionsUtils .mergeOption (runtimeOptionsForProvider .getResponseFormat (),
193+ defaultOptions .getResponseFormat ()))
194+ .withSpeed (ModelOptionsUtils .mergeOption (runtimeOptionsForProvider .getSpeed (), defaultOptions .getSpeed ()))
195+ .withVoice (ModelOptionsUtils .mergeOption (runtimeOptionsForProvider .getVoice (), defaultOptions .getVoice ()))
196+ .build ();
199197 }
200198
201199}
0 commit comments