Merge branch 'spring-projects:main' into main

ddobrin · web-flow · commit f4a9b12cd6c5 · 2025-07-15T09:19:54.000-04:00
diff --git a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java
@@ -34,8 +34,8 @@
 import org.springframework.ai.azure.openai.AzureOpenAiAudioTranscriptionOptions.StructuredResponse.Word;
 import org.springframework.ai.azure.openai.AzureOpenAiAudioTranscriptionOptions.TranscriptResponseFormat;
 import org.springframework.ai.azure.openai.metadata.AzureOpenAiAudioTranscriptionResponseMetadata;
-import org.springframework.ai.model.Model;
 import org.springframework.ai.model.ModelOptionsUtils;
+import org.springframework.ai.audio.transcription.TranscriptionModel;
 import org.springframework.core.io.Resource;
 import org.springframework.util.Assert;
 import org.springframework.util.StringUtils;
@@ -47,7 +47,7 @@
  *
  * @author Piotr Olaszewski
  */
-public class AzureOpenAiAudioTranscriptionModel implements Model<AudioTranscriptionPrompt, AudioTranscriptionResponse> {
+public class AzureOpenAiAudioTranscriptionModel implements TranscriptionModel {
 
 	private static final List<AudioTranscriptionFormat> JSON_FORMATS = List.of(AudioTranscriptionFormat.JSON,
 			AudioTranscriptionFormat.VERBOSE_JSON);
diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioTranscriptionModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioTranscriptionModel.java
@@ -23,7 +23,7 @@
 import org.springframework.ai.audio.transcription.AudioTranscriptionPrompt;
 import org.springframework.ai.audio.transcription.AudioTranscriptionResponse;
 import org.springframework.ai.chat.metadata.RateLimit;
-import org.springframework.ai.model.Model;
+import org.springframework.ai.audio.transcription.TranscriptionModel;
 import org.springframework.ai.openai.api.OpenAiAudioApi;
 import org.springframework.ai.openai.api.OpenAiAudioApi.StructuredResponse;
 import org.springframework.ai.openai.metadata.audio.OpenAiAudioTranscriptionResponseMetadata;
@@ -45,7 +45,7 @@
  * @see OpenAiAudioApi
  * @since 0.8.1
  */
-public class OpenAiAudioTranscriptionModel implements Model<AudioTranscriptionPrompt, AudioTranscriptionResponse> {
+public class OpenAiAudioTranscriptionModel implements TranscriptionModel {
 
 	private final Logger logger = LoggerFactory.getLogger(getClass());
 
diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/transcription/OpenAiAudioTranscriptionModelTests.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/transcription/OpenAiAudioTranscriptionModelTests.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2023-2024 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.openai.audio.transcription;
+
+import org.junit.jupiter.api.Test;
+import org.springframework.ai.audio.transcription.AudioTranscriptionPrompt;
+import org.springframework.ai.audio.transcription.AudioTranscriptionResponse;
+import org.springframework.ai.audio.transcription.TranscriptionModel;
+import org.springframework.ai.model.SimpleApiKey;
+import org.springframework.ai.openai.OpenAiAudioTranscriptionModel;
+import org.springframework.ai.openai.OpenAiAudioTranscriptionOptions;
+import org.springframework.ai.openai.api.OpenAiAudioApi;
+import org.springframework.ai.openai.api.OpenAiAudioApi.TranscriptResponseFormat;
+import org.springframework.ai.retry.RetryUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.test.autoconfigure.web.client.RestClientTest;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.core.io.ClassPathResource;
+import org.springframework.http.HttpMethod;
+import org.springframework.http.MediaType;
+import org.springframework.test.web.client.MockRestServiceServer;
+import org.springframework.util.LinkedMultiValueMap;
+import org.springframework.web.client.RestClient;
+import org.springframework.web.reactive.function.client.WebClient;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.springframework.test.web.client.match.MockRestRequestMatchers.method;
+import static org.springframework.test.web.client.match.MockRestRequestMatchers.requestTo;
+import static org.springframework.test.web.client.response.MockRestResponseCreators.withSuccess;
+
+@RestClientTest(OpenAiAudioTranscriptionModelTests.Config.class)
+class OpenAiAudioTranscriptionModelTests {
+
+	@Autowired
+	private MockRestServiceServer server;
+
+	@Autowired
+	private TranscriptionModel transcriptionModel;
+
+	@Test
+	void transcribeRequestReturnsResponseCorrectly() {
+		String mockResponse = """
+				{
+				  "text": "All your bases are belong to us"
+				}
+				""".stripIndent();
+
+		this.server.expect(requestTo("https://api.openai.com/v1/audio/transcriptions"))
+			.andExpect(method(HttpMethod.POST))
+			.andRespond(withSuccess(mockResponse, MediaType.APPLICATION_JSON));
+
+		String transcription = this.transcriptionModel.transcribe(new ClassPathResource("/speech.flac"));
+
+		assertThat(transcription).isEqualTo("All your bases are belong to us");
+		this.server.verify();
+	}
+
+	@Test
+	void callWithDefaultOptions() {
+		String mockResponse = """
+				{
+				  "text": "Hello, this is a test transcription."
+				}
+				""".stripIndent();
+
+		this.server.expect(requestTo("https://api.openai.com/v1/audio/transcriptions"))
+			.andExpect(method(HttpMethod.POST))
+			.andRespond(withSuccess(mockResponse, MediaType.APPLICATION_JSON));
+
+		AudioTranscriptionPrompt prompt = new AudioTranscriptionPrompt(new ClassPathResource("/speech.flac"));
+		AudioTranscriptionResponse response = this.transcriptionModel.call(prompt);
+
+		assertThat(response.getResult().getOutput()).isEqualTo("Hello, this is a test transcription.");
+		this.server.verify();
+	}
+
+	@Test
+	void transcribeWithOptions() {
+		String mockResponse = """
+				{
+				  "text": "Hello, this is a test transcription with options."
+				}
+				""".stripIndent();
+
+		this.server.expect(requestTo("https://api.openai.com/v1/audio/transcriptions"))
+			.andExpect(method(HttpMethod.POST))
+			.andRespond(withSuccess(mockResponse, MediaType.APPLICATION_JSON));
+
+		OpenAiAudioTranscriptionOptions options = OpenAiAudioTranscriptionOptions.builder()
+			.temperature(0.5f)
+			.responseFormat(TranscriptResponseFormat.JSON)
+			.build();
+
+		String transcription = this.transcriptionModel.transcribe(new ClassPathResource("/speech.flac"), options);
+
+		assertThat(transcription).isEqualTo("Hello, this is a test transcription with options.");
+		this.server.verify();
+	}
+
+	@Configuration
+	static class Config {
+
+		@Bean
+		public OpenAiAudioApi openAiAudioApi(RestClient.Builder builder) {
+			return new OpenAiAudioApi("https://api.openai.com", new SimpleApiKey("test-api-key"),
+					new LinkedMultiValueMap<>(), builder, WebClient.builder(),
+					RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER);
+		}
+
+		@Bean
+		public OpenAiAudioTranscriptionModel openAiAudioTranscriptionModel(OpenAiAudioApi audioApi) {
+			return new OpenAiAudioTranscriptionModel(audioApi);
+		}
+
+	}
+
+}
diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/transcription/OpenAiTranscriptionModelIT.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/transcription/OpenAiTranscriptionModelIT.java
@@ -40,7 +40,7 @@ class OpenAiTranscriptionModelIT extends AbstractIT {
 	private Resource audioFile;
 
 	@Test
-	void transcriptionTest() {
+	void callTest() {
 		OpenAiAudioTranscriptionOptions transcriptionOptions = OpenAiAudioTranscriptionOptions.builder()
 			.responseFormat(TranscriptResponseFormat.TEXT)
 			.temperature(0f)
@@ -53,7 +53,7 @@ void transcriptionTest() {
 	}
 
 	@Test
-	void transcriptionTestWithOptions() {
+	void callTestWithOptions() {
 		OpenAiAudioApi.TranscriptResponseFormat responseFormat = OpenAiAudioApi.TranscriptResponseFormat.VTT;
 
 		OpenAiAudioTranscriptionOptions transcriptionOptions = OpenAiAudioTranscriptionOptions.builder()
@@ -69,4 +69,24 @@ void transcriptionTestWithOptions() {
 		assertThat(response.getResults().get(0).getOutput().toLowerCase().contains("fellow")).isTrue();
 	}
 
+	@Test
+	void transcribeTest() {
+		String response = this.transcriptionModel.transcribe(this.audioFile);
+		assertThat(response).isNotNull();
+		assertThat(response.toLowerCase().contains("fellow")).isTrue();
+	}
+
+	@Test
+	void transcribeTestWithOptions() {
+		OpenAiAudioTranscriptionOptions transcriptionOptions = OpenAiAudioTranscriptionOptions.builder()
+			.language("en")
+			.prompt("Ask not this, but ask that")
+			.temperature(0f)
+			.responseFormat(TranscriptResponseFormat.TEXT)
+			.build();
+		String response = this.transcriptionModel.transcribe(this.audioFile, transcriptionOptions);
+		assertThat(response).isNotNull();
+		assertThat(response.toLowerCase().contains("fellow")).isTrue();
+	}
+
 }
diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/transcription/TranscriptionModelTests.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/transcription/TranscriptionModelTests.java
diff --git a/models/spring-ai-openai/src/test/resources/speech.flac b/models/spring-ai-openai/src/test/resources/speech.flac
diff --git a/spring-ai-model/src/main/java/org/springframework/ai/audio/transcription/TranscriptionModel.java b/spring-ai-model/src/main/java/org/springframework/ai/audio/transcription/TranscriptionModel.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2023-2024 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.audio.transcription;
+
+import org.springframework.ai.model.Model;
+import org.springframework.core.io.Resource;
+
+/**
+ * A transcription model is a type of AI model that converts audio to text. This is also
+ * known as Speech-to-Text.
+ *
+ * @author Mudabir Hussain
+ * @since 1.0.0
+ */
+public interface TranscriptionModel extends Model<AudioTranscriptionPrompt, AudioTranscriptionResponse> {
+
+	/**
+	 * Transcribes the audio from the given prompt.
+	 * @param transcriptionPrompt The prompt containing the audio resource and options.
+	 * @return The transcription response.
+	 */
+	AudioTranscriptionResponse call(AudioTranscriptionPrompt transcriptionPrompt);
+
+	/**
+	 * A convenience method for transcribing an audio resource.
+	 * @param resource The audio resource to transcribe.
+	 * @return The transcribed text.
+	 */
+	default String transcribe(Resource resource) {
+		AudioTranscriptionPrompt prompt = new AudioTranscriptionPrompt(resource);
+		return this.call(prompt).getResult().getOutput();
+	}
+
+	/**
+	 * A convenience method for transcribing an audio resource with the given options.
+	 * @param resource The audio resource to transcribe.
+	 * @param options The transcription options.
+	 * @return The transcribed text.
+	 */
+	default String transcribe(Resource resource, AudioTranscriptionOptions options) {
+		AudioTranscriptionPrompt prompt = new AudioTranscriptionPrompt(resource, options);
+		return this.call(prompt).getResult().getOutput();
+	}
+
+}