spring-projects
diff --git a/‎models/spring-ai-openai/README.md‎
Lines changed: 3 additions & 0 deletions b/‎models/spring-ai-openai/README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java‎
Lines changed: 136 additions & 37 deletions b/‎models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java‎
Lines changed: 136 additions & 37 deletions
diff --git a/‎models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiAudioApi.java‎
Lines changed: 9 additions & 9 deletions b/‎models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiAudioApi.java‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/transcription/AudioTranscription.java‎
Lines changed: 76 additions & 0 deletions b/‎models/spring-ai-openai/src/main/java/org/springframework/ai/openai/audio/transcription/AudioTranscription.java‎
Lines changed: 76 additions & 0 deletions
@@ -2,3 +2,6 @@
 
 [OpenAI Embedding Documentation](https://docs.spring.io/spring-ai/reference/api/embeddings/openai-embeddings.html)
 
+[OpenAI Image Generation](https://docs.spring.io/spring-ai/reference/api/clients/image/openai-image.html)
+
+[OpenAI Transcription Generation](TODO)
@@ -30,9 +30,11 @@
 import org.springframework.ai.openai.api.common.ApiUtils;
 import org.springframework.boot.context.properties.bind.ConstructorBinding;
 import org.springframework.core.ParameterizedTypeReference;
+import org.springframework.http.MediaType;
 import org.springframework.http.ResponseEntity;
 import org.springframework.util.Assert;
 import org.springframework.util.CollectionUtils;
+import org.springframework.util.MultiValueMap;
 import org.springframework.web.client.RestClient;
 import org.springframework.web.reactive.function.client.WebClient;
 
@@ -42,6 +44,7 @@
  * OpenAI Embedding API: https://platform.openai.com/docs/api-reference/embeddings.
  *
  * @author Christian Tzolov
+ * @author Michael Lavelle
  */
 public class OpenAiApi {
 
@@ -50,6 +53,9 @@ public class OpenAiApi {
 	private static final Predicate<String> SSE_DONE_PREDICATE = "[DONE]"::equals;
 
 	private final RestClient restClient;
+
+	private final RestClient multipartRestClient;
+
 	private final WebClient webClient;
 
 	/**
@@ -86,6 +92,15 @@ public OpenAiApi(String baseUrl, String openAiToken, RestClient.Builder restClie
 				.defaultStatusHandler(ApiUtils.DEFAULT_RESPONSE_ERROR_HANDLER)
 				.build();
 
+		this.multipartRestClient = restClientBuilder
+				.baseUrl(baseUrl)
+				.defaultHeaders(multipartFormDataHeaders -> {
+					multipartFormDataHeaders.setBearerAuth(openAiToken);
+					multipartFormDataHeaders.setContentType(MediaType.MULTIPART_FORM_DATA);
+				})
+				.defaultStatusHandler(ApiUtils.DEFAULT_RESPONSE_ERROR_HANDLER)
+				.build();
+
 		this.webClient = WebClient.builder()
 				.baseUrl(baseUrl)
 				.defaultHeaders(ApiUtils.getJsonContentHeaders(openAiToken))
@@ -97,7 +112,7 @@ public OpenAiApi(String baseUrl, String openAiToken, RestClient.Builder restClie
 	 * <a href="https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo">GPT-4 and GPT-4 Turbo</a> and
 	 * <a href="https://platform.openai.com/docs/models/gpt-3-5-turbo">GPT-3.5 Turbo</a>.
 	 */
-	enum ChatModel {
+	public enum ChatModel {
 		/**
 		 * (New) GPT-4 Turbo - latest GPT-4 model intended to reduce cases
 		 * of “laziness” where the model doesn’t complete a task.
@@ -169,42 +184,6 @@ public String getValue() {
 		}
 	}
 
-	/**
-	 * OpenAI Embeddings Models:
-	 * <a href="https://platform.openai.com/docs/models/embeddings">Embeddings</a>.
-	 */
-	enum EmbeddingModel {
-
-		/**
-		 * Most capable embedding model for both english and non-english tasks.
-		 * DIMENSION: 3072
-		 */
-		TEXT_EMBEDDING_3_LARGE("text-embedding-3-large"),
-
-		/**
-		 * Increased performance over 2nd generation ada embedding model.
-		 * DIMENSION: 1536
-		 */
-		TEXT_EMBEDDING_3_SMALL("text-embedding-3-small"),
-
-		/**
-		 * Most capable 2nd generation embedding model, replacing 16 first
-		 * generation models.
-		 * DIMENSION: 1536
-		 */
-		TEXT_EMBEDDING_ADA_002("text-embedding-ada-002");
-
-		public final String  value;
-
-		EmbeddingModel(String value) {
-			this.value = value;
-		}
-
-		public String getValue() {
-			return value;
-		}
-	}
-
 	/**
 	 * Represents a tool the model may call. Currently, only functions are supported as a tool.
 	 *
@@ -708,6 +687,44 @@ public Flux<ChatCompletionChunk> chatCompletionStream(ChatCompletionRequest chat
 				.map(content -> ModelOptionsUtils.jsonToObject(content, ChatCompletionChunk.class));
 	}
 
+	// Embeddings API
+
+	/**
+	 * OpenAI Embeddings Models:
+	 * <a href="https://platform.openai.com/docs/models/embeddings">Embeddings</a>.
+	 */
+	public enum EmbeddingModel {
+
+		/**
+		 * Most capable embedding model for both english and non-english tasks.
+		 * DIMENSION: 3072
+		 */
+		TEXT_EMBEDDING_3_LARGE("text-embedding-3-large"),
+
+		/**
+		 * Increased performance over 2nd generation ada embedding model.
+		 * DIMENSION: 1536
+		 */
+		TEXT_EMBEDDING_3_SMALL("text-embedding-3-small"),
+
+		/**
+		 * Most capable 2nd generation embedding model, replacing 16 first
+		 * generation models.
+		 * DIMENSION: 1536
+		 */
+		TEXT_EMBEDDING_ADA_002("text-embedding-ada-002");
+
+		public final String  value;
+
+		EmbeddingModel(String value) {
+			this.value = value;
+		}
+
+		public String getValue() {
+			return value;
+		}
+	}
+
 	/**
 	 * Represents an embedding vector returned by embedding endpoint.
 	 *
@@ -824,5 +841,87 @@ public <T> ResponseEntity<EmbeddingList<Embedding>> embeddings(EmbeddingRequest<
 				.toEntity(new ParameterizedTypeReference<>() {
 				});
 	}
+
+	// Transcription API
+
+	// @JsonInclude(Include.NON_NULL)
+	// public record Transcription(
+	// 		@JsonProperty("text") String text) {
+	// }
+
+	// 	/**
+	//  *
+	//  * @param model ID of the model to use.
+	//  * @param language The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.
+	//  * @param prompt An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language.
+	//  * @param responseFormat An object specifying the format that the model must output.
+	//  * @param temperature What sampling temperature to use, between 0 and 1. Higher values like 0.8 will make the output
+	//  * more random, while lower values like 0.2 will make it more focused and deterministic. */
+	// @JsonInclude(Include.NON_NULL)
+	// public record TranscriptionRequest (
+	// 		@JsonProperty("model") String model,
+	// 		@JsonProperty("language") String language,
+	// 		@JsonProperty("prompt") String prompt,
+	// 		@JsonProperty("response_format") ResponseFormat responseFormat,
+	// 		@JsonProperty("temperature") Float temperature) {
+
+	// 	/**
+	// 	 * Shortcut constructor for a transcription request with the given model and temperature
+	// 	 *
+	// 	 * @param model ID of the model to use.
+	// 	 * @param temperature What sampling temperature to use, between 0 and 1.
+	// 	 */
+	// 	public TranscriptionRequest(String model, Float temperature) {
+	// 		this(model, null, null, null, temperature);
+	// 	}
+
+	// 	public TranscriptionRequest() {
+	// 		this(null, null, null, null, null);
+	// 	}
+
+	// 	/**
+	// 	 * An object specifying the format that the model must output.
+	// 	 * @param type Must be one of 'text' or 'json_object'.
+	// 	 */
+	// 	@JsonInclude(Include.NON_NULL)
+	// 	public record ResponseFormat(
+	// 			@JsonProperty("type") String type) {
+	// 	}
+	// }
+
+	// /**
+	//  * Creates a model response for the given transcription.
+	//  *
+	//  * @param transcriptionRequest The transcription request.
+	//  * @return Entity response with {@link Transcription} as a body and HTTP status code and headers.
+	//  */
+	// public ResponseEntity<Transcription> transcriptionEntityJson(MultiValueMap<String, Object> transcriptionRequest) {
+
+	// 	Assert.notNull(transcriptionRequest, "The request body can not be null.");
+
+	// 	return this.multipartRestClient.post()
+	// 			.uri("/v1/audio/transcriptions")
+	// 			.body(transcriptionRequest)
+	// 			.retrieve()
+	// 			.toEntity(Transcription.class);
+	// }
+
+	// /**
+	//  * Creates a model response for the given transcription.
+	//  *
+	//  * @param transcriptionRequest The transcription request.
+	//  * @return Entity response with {@link String} as a body and HTTP status code and headers.
+	//  */
+	// public ResponseEntity<String> transcriptionEntityText(MultiValueMap<String, Object> transcriptionRequest) {
+
+	// 	Assert.notNull(transcriptionRequest, "The request body can not be null.");
+
+	// 	return this.multipartRestClient.post()
+	// 			.uri("/v1/audio/transcriptions")
+	// 			.body(transcriptionRequest)
+	// 			.accept(MediaType.TEXT_PLAIN)
+	// 			.retrieve()
+	// 			.toEntity(String.class);
+	// }
 }
 // @formatter:on
@@ -280,7 +280,7 @@ public record TranscriptionRequest(
 		@JsonProperty("model") String model,
 		@JsonProperty("language") String language,
 		@JsonProperty("prompt") String prompt,
-		@JsonProperty("response_format") TextualResponseFormat responseFormat,
+		@JsonProperty("response_format") TranscriptResponseFormat responseFormat,
 		@JsonProperty("temperature") Float temperature,
 		@JsonProperty("timestamp_granularities") GranularityType granularityType) {
 		// @formatter:on
@@ -318,7 +318,7 @@ public static class Builder {
 
 			private String prompt;
 
-			private TextualResponseFormat responseFormat = TextualResponseFormat.JSON;
+			private TranscriptResponseFormat responseFormat = TranscriptResponseFormat.JSON;
 
 			private Float temperature;
 
@@ -344,7 +344,7 @@ public Builder withPrompt(String prompt) {
 				return this;
 			}
 
-			public Builder withResponseFormat(TextualResponseFormat response_format) {
+			public Builder withResponseFormat(TranscriptResponseFormat response_format) {
 				this.responseFormat = response_format;
 				return this;
 			}
@@ -375,7 +375,7 @@ public TranscriptionRequest build() {
 	 * The format of the transcript and translation outputs, in one of these options:
 	 * json, text, srt, verbose_json, or vtt. Defaults to json.
 	 */
-	public enum TextualResponseFormat {
+	public enum TranscriptResponseFormat {
 
 		// @formatter:off
 		@JsonProperty("json") JSON("json", StructuredResponse.class),
@@ -393,7 +393,7 @@ public boolean isJsonType() {
 			return this == JSON || this == VERBOSE_JSON;
 		}
 
-		TextualResponseFormat(String value, Class<?> responseType) {
+		TranscriptResponseFormat(String value, Class<?> responseType) {
 			this.value = value;
 			this.responseType = responseType;
 		}
@@ -429,7 +429,7 @@ public record TranslationRequest(
 		@JsonProperty("file") byte[] file,
 		@JsonProperty("model") String model,
 		@JsonProperty("prompt") String prompt,
-		@JsonProperty("response_format") TextualResponseFormat responseFormat,
+		@JsonProperty("response_format") TranscriptResponseFormat responseFormat,
 		@JsonProperty("temperature") Float temperature) {
 		// @formatter:on
 
@@ -445,7 +445,7 @@ public static class Builder {
 
 			private String prompt;
 
-			private TextualResponseFormat responseFormat = TextualResponseFormat.JSON;
+			private TranscriptResponseFormat responseFormat = TranscriptResponseFormat.JSON;
 
 			private Float temperature;
 
@@ -464,7 +464,7 @@ public Builder withPrompt(String prompt) {
 				return this;
 			}
 
-			public Builder withResponseFormat(TextualResponseFormat responseFormat) {
+			public Builder withResponseFormat(TranscriptResponseFormat responseFormat) {
 				this.responseFormat = responseFormat;
 				return this;
 			}
@@ -601,7 +601,7 @@ public String getFilename() {
 		multipartBody.add("response_format", requestBody.responseFormat().getValue());
 		multipartBody.add("temperature", requestBody.temperature());
 		if (requestBody.granularityType() != null) {
-			Assert.isTrue(requestBody.responseFormat() == TextualResponseFormat.VERBOSE_JSON,
+			Assert.isTrue(requestBody.responseFormat() == TranscriptResponseFormat.VERBOSE_JSON,
 					"response_format must be set to verbose_json to use timestamp granularities.");
 			multipartBody.add("timestamp_granularities[]", requestBody.granularityType().getValue());
 		}
 
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2024-2024 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.openai.audio.transcription;
+
+import org.springframework.ai.model.ModelResult;
+import org.springframework.ai.openai.metadata.audio.OpenAiAudioTranscriptionMetadata;
+import org.springframework.lang.Nullable;
+
+import java.util.Objects;
+
+/**
+ * Represents a response returned by the AI.
+ *
+ * @author Michael Lavelle
+ * @since 0.8.1
+ */
+public class AudioTranscription implements ModelResult<String> {
+
+	private String text;
+
+	private OpenAiAudioTranscriptionMetadata transcriptionMetadata;
+
+	public AudioTranscription(String text) {
+		this.text = text;
+	}
+
+	@Override
+	public String getOutput() {
+		return this.text;
+	}
+
+	@Override
+	public OpenAiAudioTranscriptionMetadata getMetadata() {
+		return transcriptionMetadata != null ? transcriptionMetadata : OpenAiAudioTranscriptionMetadata.NULL;
+	}
+
+	public AudioTranscription withTranscriptionMetadata(
+			@Nullable OpenAiAudioTranscriptionMetadata transcriptionMetadata) {
+		this.transcriptionMetadata = transcriptionMetadata;
+		return this;
+	}
+
+	@Override
+	public boolean equals(Object o) {
+		if (this == o)
+			return true;
+		if (!(o instanceof AudioTranscription that))
+			return false;
+		return Objects.equals(text, that.text) && Objects.equals(transcriptionMetadata, that.transcriptionMetadata);
+	}
+
+	@Override
+	public int hashCode() {
+		return Objects.hash(text, transcriptionMetadata);
+	}
+
+	@Override
+	public String toString() {
+		return "Transcript{" + "text=" + text + ", transcriptionMetadata=" + transcriptionMetadata + '}';
+	}
+
+}