Skip to content

Commit 025c24f

Browse files
committed
Support create translation
1 parent 407618d commit 025c24f

File tree

16 files changed

+423
-1
lines changed

16 files changed

+423
-1
lines changed

docs/docs/reference/audio.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
---
2+
title: Audio
3+
---
4+
5+
!!! Note
6+
7+
Please build the client before calling, the build code is as follows:
8+
9+
```java
10+
OpenAiClient client = OpenAiClient.builder()
11+
.apiHost("https://api.openai.com")
12+
.apiKey(System.getProperty("openai.token"))
13+
.build();
14+
```
15+
16+
`System.getProperty("openai.token")` is the key to access the API authorization.
17+
18+
### Create translation
19+
20+
---
21+
22+
Translates audio into English.
23+
24+
```java
25+
String file=this.getClass().getResource("/hello.mp3").getFile();
26+
AudioEntity configure = AudioEntity.builder()
27+
.file(new File(file))
28+
.build();
29+
client.audioTranscriptions(configure);
30+
```
31+
32+
Returns
33+
34+
```json
35+
{
36+
"text": "Hello, my name is Wolfgang and I come from Germany. Where are you heading today?"
37+
}
38+
```

docs/docs/reference/audio.zh.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
---
2+
title: Audio
3+
---
4+
5+
!!! Note
6+
7+
调用前请先构建客户端,构建代码如下:
8+
9+
```java
10+
OpenAiClient client = OpenAiClient.builder()
11+
.apiHost("https://api.openai.com")
12+
.apiKey(System.getProperty("openai.token"))
13+
.build();
14+
```
15+
16+
`System.getProperty("openai.token")` 是访问 API 授权的关键。
17+
18+
### Create translation
19+
20+
---
21+
22+
将音频翻译成默认音频语言。
23+
24+
```java
25+
String file=this.getClass().getResource("/hello.mp3").getFile();
26+
AudioEntity configure = AudioEntity.builder()
27+
.file(new File(file))
28+
.build();
29+
client.audioTranscriptions(configure);
30+
```
31+
32+
Returns
33+
34+
```json
35+
{
36+
"text": "Hello, my name is Wolfgang and I come from Germany. Where are you heading today?"
37+
}
38+
```

docs/mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ nav:
6464
- reference/completions_chat.md
6565
- reference/images.md
6666
- reference/embeddings.md
67+
- reference/audio.md
6768
- Provider:
6869
- reference/provider/azure.md
6970
- released.md

src/main/java/org/devlive/sdk/openai/DefaultApi.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.devlive.sdk.openai.entity.ImageEntity;
1010
import org.devlive.sdk.openai.entity.ModelEntity;
1111
import org.devlive.sdk.openai.entity.UserKeyEntity;
12+
import org.devlive.sdk.openai.response.AudioResponse;
1213
import org.devlive.sdk.openai.response.CompleteChatResponse;
1314
import org.devlive.sdk.openai.response.CompleteResponse;
1415
import org.devlive.sdk.openai.response.EmbeddingResponse;
@@ -100,4 +101,14 @@ Single<ImageResponse> fetchImagesVariations(@Url String url,
100101
@POST
101102
Single<EmbeddingResponse> fetchEmbeddings(@Url String url,
102103
@Body EmbeddingEntity configure);
104+
105+
/**
106+
* Transcribes audio into the input language.
107+
* 将音频转录为输入语言。
108+
*/
109+
@POST
110+
@Multipart
111+
Single<AudioResponse> fetchAudioTranscriptions(@Url String url,
112+
@Part() MultipartBody.Part audio,
113+
@PartMap Map<String, RequestBody> configure);
103114
}

src/main/java/org/devlive/sdk/openai/DefaultClient.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import okhttp3.MultipartBody;
55
import okhttp3.OkHttpClient;
66
import org.apache.commons.lang3.ObjectUtils;
7+
import org.devlive.sdk.openai.entity.AudioEntity;
78
import org.devlive.sdk.openai.entity.CompletionChatEntity;
89
import org.devlive.sdk.openai.entity.CompletionEntity;
910
import org.devlive.sdk.openai.entity.EmbeddingEntity;
@@ -12,6 +13,7 @@
1213
import org.devlive.sdk.openai.entity.UserKeyEntity;
1314
import org.devlive.sdk.openai.model.ProviderModel;
1415
import org.devlive.sdk.openai.model.UrlModel;
16+
import org.devlive.sdk.openai.response.AudioResponse;
1517
import org.devlive.sdk.openai.response.CompleteChatResponse;
1618
import org.devlive.sdk.openai.response.CompleteResponse;
1719
import org.devlive.sdk.openai.response.EmbeddingResponse;
@@ -102,6 +104,15 @@ public EmbeddingResponse createEmbeddings(EmbeddingEntity configure)
102104
.blockingGet();
103105
}
104106

107+
public AudioResponse audioTranscriptions(AudioEntity configure)
108+
{
109+
MultipartBody.Part fileBody = MultipartBodyUtils.getPart(configure.getFile(), "file");
110+
return this.api.fetchAudioTranscriptions(ProviderUtils.getUrl(provider, UrlModel.FETCH_AUDIO_TRANSCRIPTIONS),
111+
fileBody,
112+
configure.convertMap())
113+
.blockingGet();
114+
}
115+
105116
public void close()
106117
{
107118
if (ObjectUtils.isNotEmpty(this.client)) {
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
package org.devlive.sdk.openai.entity;
2+
3+
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
4+
import com.fasterxml.jackson.annotation.JsonProperty;
5+
import com.google.common.collect.Maps;
6+
import lombok.AllArgsConstructor;
7+
import lombok.Builder;
8+
import lombok.Data;
9+
import lombok.NoArgsConstructor;
10+
import lombok.ToString;
11+
import okhttp3.RequestBody;
12+
import org.apache.commons.lang3.EnumUtils;
13+
import org.apache.commons.lang3.ObjectUtils;
14+
import org.apache.commons.lang3.StringUtils;
15+
import org.devlive.sdk.openai.exception.ParamException;
16+
import org.devlive.sdk.openai.model.AudioFormatModel;
17+
import org.devlive.sdk.openai.model.AudioModel;
18+
import org.devlive.sdk.openai.utils.FileUtils;
19+
import org.devlive.sdk.openai.utils.MultipartBodyUtils;
20+
21+
import java.io.File;
22+
import java.util.Arrays;
23+
import java.util.Map;
24+
25+
@Data
26+
@Builder
27+
@ToString
28+
@NoArgsConstructor
29+
@AllArgsConstructor
30+
@JsonIgnoreProperties(ignoreUnknown = true)
31+
public class AudioEntity
32+
{
33+
/**
34+
* The audio file object (not file name) to transcribe, in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.
35+
* 要转录的音频文件对象(不是文件名),采用以下格式之一:mp3、mp4、mpeg、mpga、m4a、wav 或 webm。
36+
*/
37+
@JsonProperty(value = "file")
38+
private File file;
39+
40+
/**
41+
* ID of the model to use. Only whisper-1 is currently available.
42+
* 要使用的模型的 ID。目前只有 whisper-1 可用。
43+
*/
44+
@JsonProperty(value = "model")
45+
private String model;
46+
47+
/**
48+
* An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language.
49+
* 用于指导模型风格或继续之前的音频片段的可选文本。提示应与音频语言相匹配。
50+
*/
51+
@JsonProperty(value = "prompt")
52+
private String prompt;
53+
54+
/**
55+
* The format of the transcript output
56+
* 转录输出的格式
57+
*/
58+
@JsonProperty(value = "response_format")
59+
private String format;
60+
61+
/**
62+
* The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.
63+
* 采样温度,介于 0 和 1 之间。较高的值(如 0.8)将使输出更加随机,而较低的值(如 0.2)将使其更加集中和确定性。如果设置为 0,模型将使用对数概率自动升高温度,直到达到特定阈值。
64+
*/
65+
@JsonProperty(value = "temperature")
66+
private Double temperature;
67+
68+
/**
69+
* The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.
70+
* 输入音频的语言。以 ISO-639-1 格式提供输入语言将提高准确性和延迟。
71+
*/
72+
@JsonProperty(value = "language")
73+
private String language;
74+
75+
public Map<String, RequestBody> convertMap()
76+
{
77+
Map<String, RequestBody> map = Maps.newConcurrentMap();
78+
if (StringUtils.isNotEmpty(this.model)) {
79+
map.put("model", RequestBody.create(MultipartBodyUtils.TYPE, this.getModel()));
80+
}
81+
if (StringUtils.isNotEmpty(this.prompt)) {
82+
map.put("prompt", RequestBody.create(MultipartBodyUtils.TYPE, this.getPrompt()));
83+
}
84+
if (StringUtils.isNotEmpty(this.format)) {
85+
map.put("response_format", RequestBody.create(MultipartBodyUtils.TYPE, this.getFormat()));
86+
}
87+
if (ObjectUtils.isNotEmpty(this.temperature)) {
88+
map.put("temperature", RequestBody.create(MultipartBodyUtils.TYPE, String.valueOf(this.getTemperature())));
89+
}
90+
if (StringUtils.isNotEmpty(this.language)) {
91+
map.put("language", RequestBody.create(MultipartBodyUtils.TYPE, this.getLanguage()));
92+
}
93+
return map;
94+
}
95+
96+
private AudioEntity(AudioEntityBuilder builder)
97+
{
98+
if (ObjectUtils.isEmpty(builder.file)) {
99+
builder.file(null);
100+
}
101+
this.file = builder.file;
102+
103+
if (ObjectUtils.isEmpty(builder.model)) {
104+
builder.model("whisper-1");
105+
}
106+
this.model = builder.model;
107+
108+
this.prompt = builder.prompt;
109+
110+
if (StringUtils.isEmpty(builder.format)) {
111+
builder.format(AudioFormatModel.json.name());
112+
}
113+
this.format = builder.format;
114+
115+
if (ObjectUtils.isEmpty(builder.temperature)) {
116+
builder.temperature(1D);
117+
}
118+
this.temperature = builder.temperature;
119+
120+
this.language = builder.language;
121+
}
122+
123+
public static class AudioEntityBuilder
124+
{
125+
public AudioEntityBuilder file(File file)
126+
{
127+
if (ObjectUtils.isEmpty(file)) {
128+
throw new ParamException("Invalid file must not be empty");
129+
}
130+
131+
String extension = FileUtils.getExtension(file);
132+
if (StringUtils.isEmpty(extension) || ObjectUtils.isEmpty(EnumUtils.getEnum(AudioModel.class, extension.toLowerCase()))) {
133+
throw new ParamException(String.format("Invalid extension: %s , Must be one of %s", extension, Arrays.toString(AudioModel.values())));
134+
}
135+
this.file = file;
136+
return this;
137+
}
138+
139+
public AudioEntityBuilder model(String model)
140+
{
141+
if (!model.equals("whisper-1")) {
142+
throw new ParamException(String.format("Invalid model: %s , Must be only support whisper-1", model));
143+
}
144+
this.model = model;
145+
return this;
146+
}
147+
148+
public AudioEntityBuilder format(String format)
149+
{
150+
if (ObjectUtils.isEmpty(EnumUtils.getEnum(AudioFormatModel.class, format))) {
151+
throw new ParamException(String.format("Invalid format: %s , Must be one of %s", format, Arrays.toString(AudioFormatModel.values())));
152+
}
153+
this.format = format;
154+
return this;
155+
}
156+
157+
public AudioEntityBuilder temperature(Double temperature)
158+
{
159+
if (temperature < 0 || temperature > 2) {
160+
throw new ParamException(String.format("Invalid temperature: %s , between 0 and 2", temperature));
161+
}
162+
this.temperature = temperature;
163+
return this;
164+
}
165+
166+
public AudioEntity build()
167+
{
168+
return new AudioEntity(this);
169+
}
170+
}
171+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package org.devlive.sdk.openai.model;
2+
3+
public enum AudioFormatModel
4+
{
5+
json,
6+
text,
7+
srt,
8+
verbose_json,
9+
vtt
10+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package org.devlive.sdk.openai.model;
2+
3+
public enum AudioModel
4+
{
5+
mp3,
6+
mp4,
7+
mpeg,
8+
mpga,
9+
m4a,
10+
wav,
11+
webm
12+
}

src/main/java/org/devlive/sdk/openai/model/UrlModel.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,6 @@ public enum UrlModel
1111
FETCH_IMAGES_GENERATIONS,
1212
FETCH_IMAGES_EDITS,
1313
FETCH_IMAGES_VARIATIONS,
14-
FETCH_EMBEDDINGS
14+
FETCH_EMBEDDINGS,
15+
FETCH_AUDIO_TRANSCRIPTIONS
1516
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package org.devlive.sdk.openai.response;
2+
3+
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
4+
import com.fasterxml.jackson.annotation.JsonProperty;
5+
import lombok.Data;
6+
7+
@Data
8+
@JsonIgnoreProperties(ignoreUnknown = true)
9+
public class AudioResponse
10+
{
11+
@JsonProperty(value = "text")
12+
private String text;
13+
}

0 commit comments

Comments
 (0)