Support create translation

qianmoQ · qianmoQ · commit 025c24f15826 · 2023-07-13T20:43:02.000+08:00
diff --git a/docs/docs/reference/audio.md b/docs/docs/reference/audio.md
@@ -0,0 +1,38 @@
+---
+title: Audio
+---
+
+!!! Note
+
+    Please build the client before calling, the build code is as follows:
+
+    ```java
+    OpenAiClient client = OpenAiClient.builder()
+            .apiHost("https://api.openai.com")
+            .apiKey(System.getProperty("openai.token"))
+            .build();
+    ```
+
+    `System.getProperty("openai.token")` is the key to access the API authorization.
+
+### Create translation
+
+---
+
+Translates audio into English.
+
+```java
+String file=this.getClass().getResource("/hello.mp3").getFile();
+AudioEntity configure = AudioEntity.builder()
+        .file(new File(file))
+        .build();
+client.audioTranscriptions(configure);
+```
+
+Returns
+
+```json
+{
+  "text": "Hello, my name is Wolfgang and I come from Germany. Where are you heading today?"
+}
+```
diff --git a/docs/docs/reference/audio.zh.md b/docs/docs/reference/audio.zh.md
@@ -0,0 +1,38 @@
+---
+title: Audio
+---
+
+!!! Note
+
+    调用前请先构建客户端，构建代码如下：
+
+    ```java
+    OpenAiClient client = OpenAiClient.builder()
+            .apiHost("https://api.openai.com")
+            .apiKey(System.getProperty("openai.token"))
+            .build();
+    ```
+
+    `System.getProperty("openai.token")` 是访问 API 授权的关键。
+
+### Create translation
+
+---
+
+将音频翻译成默认音频语言。
+
+```java
+String file=this.getClass().getResource("/hello.mp3").getFile();
+AudioEntity configure = AudioEntity.builder()
+        .file(new File(file))
+        .build();
+client.audioTranscriptions(configure);
+```
+
+Returns
+
+```json
+{
+  "text": "Hello, my name is Wolfgang and I come from Germany. Where are you heading today?"
+}
+```
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -64,6 +64,7 @@ nav:
       - reference/completions_chat.md
       - reference/images.md
       - reference/embeddings.md
+      - reference/audio.md
       - Provider:
           - reference/provider/azure.md
   - released.md
diff --git a/src/main/java/org/devlive/sdk/openai/DefaultApi.java b/src/main/java/org/devlive/sdk/openai/DefaultApi.java
@@ -9,6 +9,7 @@
 import org.devlive.sdk.openai.entity.ImageEntity;
 import org.devlive.sdk.openai.entity.ModelEntity;
 import org.devlive.sdk.openai.entity.UserKeyEntity;
+import org.devlive.sdk.openai.response.AudioResponse;
 import org.devlive.sdk.openai.response.CompleteChatResponse;
 import org.devlive.sdk.openai.response.CompleteResponse;
 import org.devlive.sdk.openai.response.EmbeddingResponse;
@@ -100,4 +101,14 @@ Single<ImageResponse> fetchImagesVariations(@Url String url,
     @POST
     Single<EmbeddingResponse> fetchEmbeddings(@Url String url,
                                               @Body EmbeddingEntity configure);
+
+    /**
+     * Transcribes audio into the input language.
+     * 将音频转录为输入语言。
+     */
+    @POST
+    @Multipart
+    Single<AudioResponse> fetchAudioTranscriptions(@Url String url,
+                                                   @Part() MultipartBody.Part audio,
+                                                   @PartMap Map<String, RequestBody> configure);
 }
diff --git a/src/main/java/org/devlive/sdk/openai/DefaultClient.java b/src/main/java/org/devlive/sdk/openai/DefaultClient.java
@@ -4,6 +4,7 @@
 import okhttp3.MultipartBody;
 import okhttp3.OkHttpClient;
 import org.apache.commons.lang3.ObjectUtils;
+import org.devlive.sdk.openai.entity.AudioEntity;
 import org.devlive.sdk.openai.entity.CompletionChatEntity;
 import org.devlive.sdk.openai.entity.CompletionEntity;
 import org.devlive.sdk.openai.entity.EmbeddingEntity;
@@ -12,6 +13,7 @@
 import org.devlive.sdk.openai.entity.UserKeyEntity;
 import org.devlive.sdk.openai.model.ProviderModel;
 import org.devlive.sdk.openai.model.UrlModel;
+import org.devlive.sdk.openai.response.AudioResponse;
 import org.devlive.sdk.openai.response.CompleteChatResponse;
 import org.devlive.sdk.openai.response.CompleteResponse;
 import org.devlive.sdk.openai.response.EmbeddingResponse;
@@ -102,6 +104,15 @@ public EmbeddingResponse createEmbeddings(EmbeddingEntity configure)
                 .blockingGet();
     }
 
+    public AudioResponse audioTranscriptions(AudioEntity configure)
+    {
+        MultipartBody.Part fileBody = MultipartBodyUtils.getPart(configure.getFile(), "file");
+        return this.api.fetchAudioTranscriptions(ProviderUtils.getUrl(provider, UrlModel.FETCH_AUDIO_TRANSCRIPTIONS),
+                        fileBody,
+                        configure.convertMap())
+                .blockingGet();
+    }
+
     public void close()
     {
         if (ObjectUtils.isNotEmpty(this.client)) {
diff --git a/src/main/java/org/devlive/sdk/openai/entity/AudioEntity.java b/src/main/java/org/devlive/sdk/openai/entity/AudioEntity.java
@@ -0,0 +1,171 @@
+package org.devlive.sdk.openai.entity;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.Maps;
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import lombok.ToString;
+import okhttp3.RequestBody;
+import org.apache.commons.lang3.EnumUtils;
+import org.apache.commons.lang3.ObjectUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.devlive.sdk.openai.exception.ParamException;
+import org.devlive.sdk.openai.model.AudioFormatModel;
+import org.devlive.sdk.openai.model.AudioModel;
+import org.devlive.sdk.openai.utils.FileUtils;
+import org.devlive.sdk.openai.utils.MultipartBodyUtils;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Map;
+
+@Data
+@Builder
+@ToString
+@NoArgsConstructor
+@AllArgsConstructor
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class AudioEntity
+{
+    /**
+     * The audio file object (not file name) to transcribe, in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.
+     * 要转录的音频文件对象（不是文件名），采用以下格式之一：mp3、mp4、mpeg、mpga、m4a、wav 或 webm。
+     */
+    @JsonProperty(value = "file")
+    private File file;
+
+    /**
+     * ID of the model to use. Only whisper-1 is currently available.
+     * 要使用的模型的 ID。目前只有 whisper-1 可用。
+     */
+    @JsonProperty(value = "model")
+    private String model;
+
+    /**
+     * An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language.
+     * 用于指导模型风格或继续之前的音频片段的可选文本。提示应与音频语言相匹配。
+     */
+    @JsonProperty(value = "prompt")
+    private String prompt;
+
+    /**
+     * The format of the transcript output
+     * 转录输出的格式
+     */
+    @JsonProperty(value = "response_format")
+    private String format;
+
+    /**
+     * The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.
+     * 采样温度，介于 0 和 1 之间。较高的值（如 0.8）将使输出更加随机，而较低的值（如 0.2）将使其更加集中和确定性。如果设置为 0，模型将使用对数概率自动升高温度，直到达到特定阈值。
+     */
+    @JsonProperty(value = "temperature")
+    private Double temperature;
+
+    /**
+     * The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.
+     * 输入音频的语言。以 ISO-639-1 格式提供输入语言将提高准确性和延迟。
+     */
+    @JsonProperty(value = "language")
+    private String language;
+
+    public Map<String, RequestBody> convertMap()
+    {
+        Map<String, RequestBody> map = Maps.newConcurrentMap();
+        if (StringUtils.isNotEmpty(this.model)) {
+            map.put("model", RequestBody.create(MultipartBodyUtils.TYPE, this.getModel()));
+        }
+        if (StringUtils.isNotEmpty(this.prompt)) {
+            map.put("prompt", RequestBody.create(MultipartBodyUtils.TYPE, this.getPrompt()));
+        }
+        if (StringUtils.isNotEmpty(this.format)) {
+            map.put("response_format", RequestBody.create(MultipartBodyUtils.TYPE, this.getFormat()));
+        }
+        if (ObjectUtils.isNotEmpty(this.temperature)) {
+            map.put("temperature", RequestBody.create(MultipartBodyUtils.TYPE, String.valueOf(this.getTemperature())));
+        }
+        if (StringUtils.isNotEmpty(this.language)) {
+            map.put("language", RequestBody.create(MultipartBodyUtils.TYPE, this.getLanguage()));
+        }
+        return map;
+    }
+
+    private AudioEntity(AudioEntityBuilder builder)
+    {
+        if (ObjectUtils.isEmpty(builder.file)) {
+            builder.file(null);
+        }
+        this.file = builder.file;
+
+        if (ObjectUtils.isEmpty(builder.model)) {
+            builder.model("whisper-1");
+        }
+        this.model = builder.model;
+
+        this.prompt = builder.prompt;
+
+        if (StringUtils.isEmpty(builder.format)) {
+            builder.format(AudioFormatModel.json.name());
+        }
+        this.format = builder.format;
+
+        if (ObjectUtils.isEmpty(builder.temperature)) {
+            builder.temperature(1D);
+        }
+        this.temperature = builder.temperature;
+
+        this.language = builder.language;
+    }
+
+    public static class AudioEntityBuilder
+    {
+        public AudioEntityBuilder file(File file)
+        {
+            if (ObjectUtils.isEmpty(file)) {
+                throw new ParamException("Invalid file must not be empty");
+            }
+
+            String extension = FileUtils.getExtension(file);
+            if (StringUtils.isEmpty(extension) || ObjectUtils.isEmpty(EnumUtils.getEnum(AudioModel.class, extension.toLowerCase()))) {
+                throw new ParamException(String.format("Invalid extension: %s , Must be one of %s", extension, Arrays.toString(AudioModel.values())));
+            }
+            this.file = file;
+            return this;
+        }
+
+        public AudioEntityBuilder model(String model)
+        {
+            if (!model.equals("whisper-1")) {
+                throw new ParamException(String.format("Invalid model: %s , Must be only support whisper-1", model));
+            }
+            this.model = model;
+            return this;
+        }
+
+        public AudioEntityBuilder format(String format)
+        {
+            if (ObjectUtils.isEmpty(EnumUtils.getEnum(AudioFormatModel.class, format))) {
+                throw new ParamException(String.format("Invalid format: %s , Must be one of %s", format, Arrays.toString(AudioFormatModel.values())));
+            }
+            this.format = format;
+            return this;
+        }
+
+        public AudioEntityBuilder temperature(Double temperature)
+        {
+            if (temperature < 0 || temperature > 2) {
+                throw new ParamException(String.format("Invalid temperature: %s , between 0 and 2", temperature));
+            }
+            this.temperature = temperature;
+            return this;
+        }
+
+        public AudioEntity build()
+        {
+            return new AudioEntity(this);
+        }
+    }
+}
diff --git a/src/main/java/org/devlive/sdk/openai/model/AudioFormatModel.java b/src/main/java/org/devlive/sdk/openai/model/AudioFormatModel.java
@@ -0,0 +1,10 @@
+package org.devlive.sdk.openai.model;
+
+public enum AudioFormatModel
+{
+    json,
+    text,
+    srt,
+    verbose_json,
+    vtt
+}
diff --git a/src/main/java/org/devlive/sdk/openai/model/AudioModel.java b/src/main/java/org/devlive/sdk/openai/model/AudioModel.java
@@ -0,0 +1,12 @@
+package org.devlive.sdk.openai.model;
+
+public enum AudioModel
+{
+    mp3,
+    mp4,
+    mpeg,
+    mpga,
+    m4a,
+    wav,
+    webm
+}
diff --git a/src/main/java/org/devlive/sdk/openai/model/UrlModel.java b/src/main/java/org/devlive/sdk/openai/model/UrlModel.java
@@ -11,5 +11,6 @@ public enum UrlModel
     FETCH_IMAGES_GENERATIONS,
     FETCH_IMAGES_EDITS,
     FETCH_IMAGES_VARIATIONS,
-    FETCH_EMBEDDINGS
+    FETCH_EMBEDDINGS,
+    FETCH_AUDIO_TRANSCRIPTIONS
 }
diff --git a/src/main/java/org/devlive/sdk/openai/response/AudioResponse.java b/src/main/java/org/devlive/sdk/openai/response/AudioResponse.java
@@ -0,0 +1,13 @@
+package org.devlive.sdk.openai.response;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import lombok.Data;
+
+@Data
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class AudioResponse
+{
+    @JsonProperty(value = "text")
+    private String text;
+}
diff --git a/src/main/java/org/devlive/sdk/openai/utils/FileUtils.java b/src/main/java/org/devlive/sdk/openai/utils/FileUtils.java
@@ -0,0 +1,30 @@
+package org.devlive.sdk.openai.utils;
+
+import java.io.File;
+
+public class FileUtils
+{
+    private FileUtils()
+    {
+    }
+
+    /**
+     * Get the file suffix from the provided file
+     * 根据提供的文件获取文件后缀
+     *
+     * @param file Provided file <br />
+     *             提供的文件
+     * @return extension <br />
+     * 后缀名
+     */
+    public static String getExtension(File file)
+    {
+        String fileExtension = null;
+        String fileName = file.getName();
+        int dotIndex = fileName.lastIndexOf(".");
+        if (dotIndex > 0 && dotIndex < fileName.length() - 1) {
+            fileExtension = fileName.substring(dotIndex + 1);
+        }
+        return fileExtension;
+    }
+}
diff --git a/src/main/java/org/devlive/sdk/openai/utils/ProviderUtils.java b/src/main/java/org/devlive/sdk/openai/utils/ProviderUtils.java
@@ -22,6 +22,7 @@ public class ProviderUtils
         DEFAULT_PROVIDER.put(UrlModel.FETCH_IMAGES_EDITS, "v1/images/edits");
         DEFAULT_PROVIDER.put(UrlModel.FETCH_IMAGES_VARIATIONS, "v1/images/variations");
         DEFAULT_PROVIDER.put(UrlModel.FETCH_EMBEDDINGS, "v1/embeddings");
+        DEFAULT_PROVIDER.put(UrlModel.FETCH_AUDIO_TRANSCRIPTIONS, "v1/audio/transcriptions");
 
         AZURE_PROVIDER.put(UrlModel.FETCH_COMPLETIONS, "completions");
         AZURE_PROVIDER.put(UrlModel.FETCH_CHAT_COMPLETIONS, "chat/completions");
diff --git a/src/test/java/org/devlive/sdk/openai/OpenAiClientTest.java b/src/test/java/org/devlive/sdk/openai/OpenAiClientTest.java
diff --git a/src/test/java/org/devlive/sdk/openai/entity/AudioEntityTest.java b/src/test/java/org/devlive/sdk/openai/entity/AudioEntityTest.java
diff --git a/src/test/java/org/devlive/sdk/openai/utils/FileUtilsTest.java b/src/test/java/org/devlive/sdk/openai/utils/FileUtilsTest.java
diff --git a/src/test/resources/hello.mp3 b/src/test/resources/hello.mp3

-Original file line number
+Diff line change
@@ @@ -0,0 +1,12 @@ @@
 +package org.devlive.sdk.openai.model;
++
 +public enum AudioModel
 +{
 +    mp3,
 +    mp4,
 +    mpeg,
 +    mpga,
 +    m4a,
 +    wav,
 +    webm
 +}
Original file line number	Diff line number	Diff line change
`@@ -11,5 +11,6 @@ public enum UrlModel`
`11`	`11`	`FETCH_IMAGES_GENERATIONS,`
`12`	`12`	`FETCH_IMAGES_EDITS,`
`13`	`13`	`FETCH_IMAGES_VARIATIONS,`
`14`		`- FETCH_EMBEDDINGS`
	`14`	`+ FETCH_EMBEDDINGS,`
	`15`	`+ FETCH_AUDIO_TRANSCRIPTIONS`
`15`	`16`	`}`