Skip to content

Commit e681172

Browse files
authored
feat: Upgrade SAA to 1.1.0.0-RC1 (#409)
* Playground adapt spring-ai 1.1.0 * Fix audio model examples
1 parent 28515bf commit e681172

File tree

26 files changed

+116
-563
lines changed

26 files changed

+116
-563
lines changed

pom.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@
4747

4848
<!-- Spring AI Alibaba -->
4949
<!-- Install Spring AI Alibaba in your local. -->
50-
<spring-ai-alibaba.version>1.1.0.0-M5</spring-ai-alibaba.version>
51-
<spring-ai-alibaba.extensions.version>1.1.0.0-M5</spring-ai-alibaba.extensions.version>
50+
<spring-ai-alibaba.version>1.1.0.0-RC1</spring-ai-alibaba.version>
51+
<spring-ai-alibaba-extensions.version>1.1.0.0-RC1</spring-ai-alibaba-extensions.version>
5252

5353
<!-- maven plugin -->
5454
<maven-deploy-plugin.version>3.1.1</maven-deploy-plugin.version>
@@ -109,7 +109,7 @@
109109
<dependency>
110110
<groupId>com.alibaba.cloud.ai</groupId>
111111
<artifactId>spring-ai-alibaba-extensions-bom</artifactId>
112-
<version>${spring-ai-alibaba.extensions.version}</version>
112+
<version>${spring-ai-alibaba-extensions.version}</version>
113113
<type>pom</type>
114114
<scope>import</scope>
115115
</dependency>
Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
11
# Spring AI Alibaba Audio Example
22

33
演示使用阿里通义大模型进行音频处理。包含语音转录和语音合成。
4-
5-
> Tips: 此模块最新示例基于 spring ai alibaba 1.0.0.3,中央仓库未发布,请本地编译安装。

spring-ai-alibaba-audio-example/dashscope-audio/src/main/java/com/alibaba/cloud/ai/example/audio/AudioSpeechController.java

Lines changed: 38 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -16,104 +16,102 @@
1616

1717
package com.alibaba.cloud.ai.example.audio;
1818

19-
import com.alibaba.cloud.ai.dashscope.api.DashScopeAudioSpeechApi;
20-
import com.alibaba.cloud.ai.dashscope.audio.DashScopeAudioSpeechOptions;
21-
import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisModel;
22-
import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisPrompt;
23-
import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisResponse;
19+
import java.io.File;
20+
import java.io.FileOutputStream;
21+
import java.io.IOException;
22+
import java.util.concurrent.CountDownLatch;
23+
2424
import jakarta.annotation.PreDestroy;
2525

26+
import com.alibaba.cloud.ai.dashscope.audio.DashScopeAudioSpeechModel;
27+
import com.alibaba.cloud.ai.dashscope.audio.DashScopeAudioSpeechOptions;
2628
import com.alibaba.cloud.ai.dashscope.spec.DashScopeModel;
2729
import org.apache.commons.io.FileUtils;
30+
import org.springframework.ai.audio.tts.TextToSpeechModel;
31+
import org.springframework.ai.audio.tts.TextToSpeechPrompt;
32+
import org.springframework.ai.audio.tts.TextToSpeechResponse;
33+
import org.springframework.beans.factory.annotation.Qualifier;
2834
import org.springframework.boot.ApplicationArguments;
2935
import org.springframework.boot.ApplicationRunner;
3036
import org.springframework.web.bind.annotation.GetMapping;
3137
import org.springframework.web.bind.annotation.RequestMapping;
3238
import org.springframework.web.bind.annotation.RestController;
3339
import reactor.core.publisher.Flux;
3440

35-
import java.io.File;
36-
import java.io.FileOutputStream;
37-
import java.io.IOException;
38-
import java.nio.ByteBuffer;
39-
import java.util.concurrent.CountDownLatch;
40-
4141
/**
4242
* 语音转文本(语音识别)
43+
* <a href="https://help.aliyun.com/zh/model-studio/real-time-speech-recognition">语音识别</a>
44+
*
4345
* @author yuluo
4446
* @author <a href="mailto:yuluo08290126@gmail.com">yuluo</a>
47+
* @see DashScopeAudioSpeechModel
4548
*/
4649

4750
@RestController
4851
@RequestMapping("/ai/speech")
4952
public class AudioSpeechController implements ApplicationRunner {
5053

51-
private final SpeechSynthesisModel speechSynthesisModel;
54+
private final TextToSpeechModel speechSynthesisModel;
5255

5356
private static final String TEXT = "白日依山尽,黄河入海流。这是测试";
5457

5558
private static final String FILE_PATH = "spring-ai-alibaba-audio-example/dashscope-audio/src/main/resources/gen/tts";
5659

57-
public AudioSpeechController(SpeechSynthesisModel speechSynthesisModel) {
60+
public AudioSpeechController(
61+
@Qualifier("dashScopeSpeechSynthesisModel") TextToSpeechModel speechSynthesisModel) {
5862

5963
this.speechSynthesisModel = speechSynthesisModel;
6064
}
6165

62-
@GetMapping
66+
@GetMapping("/call")
6367
public void tts() throws IOException {
6468

65-
SpeechSynthesisResponse response = speechSynthesisModel.call(
66-
new SpeechSynthesisPrompt(
67-
TEXT,
68-
DashScopeAudioSpeechOptions.builder()
69-
.model(DashScopeModel.AudioModel.SAMBERT_ZHICHU_V1.getValue())
70-
.build()
71-
)
69+
TextToSpeechResponse response = speechSynthesisModel.call(
70+
new TextToSpeechPrompt(
71+
TEXT,
72+
DashScopeAudioSpeechOptions.builder()
73+
.model(DashScopeModel.AudioModel.COSYVOICE_V1.getValue())
74+
.build()
75+
)
7276
);
7377

7478
File file = new File(FILE_PATH + "/output.mp3");
7579
try (FileOutputStream fos = new FileOutputStream(file)) {
76-
ByteBuffer byteBuffer = response.getResult().getOutput().getAudio();
77-
fos.write(byteBuffer.array());
78-
}
79-
catch (IOException e) {
80+
fos.write(response.getResult().getOutput());
81+
} catch (IOException e) {
8082
throw new IOException(e.getMessage());
8183
}
8284
}
8385

8486
@GetMapping("/stream")
8587
public void streamTTS() {
8688

87-
Flux<SpeechSynthesisResponse> response = speechSynthesisModel.stream(
88-
new SpeechSynthesisPrompt(
89-
TEXT,
90-
DashScopeAudioSpeechOptions.builder()
91-
.model(DashScopeModel.AudioModel.SAMBERT_ZHITING_V1.getValue())
92-
.build()
93-
)
89+
Flux<TextToSpeechResponse> response = speechSynthesisModel.stream(
90+
new TextToSpeechPrompt(
91+
TEXT,
92+
DashScopeAudioSpeechOptions.builder()
93+
.model(DashScopeModel.AudioModel.SAMBERT_ZHITING_V1.getValue())
94+
.build()
95+
)
9496
);
9597

9698
CountDownLatch latch = new CountDownLatch(1);
9799
File file = new File(FILE_PATH + "/output-stream.mp3");
98100
try (FileOutputStream fos = new FileOutputStream(file)) {
99101

100102
response.doFinally(
101-
signal -> latch.countDown()
103+
signal -> latch.countDown()
102104
).subscribe(synthesisResponse -> {
103-
ByteBuffer byteBuffer = synthesisResponse.getResult().getOutput().getAudio();
104-
byte[] bytes = new byte[byteBuffer.remaining()];
105-
byteBuffer.get(bytes);
105+
byte[] bytes = synthesisResponse.getResult().getOutput();
106106
try {
107107
fos.write(bytes);
108-
}
109-
catch (IOException e) {
108+
} catch (IOException e) {
110109
throw new RuntimeException(e);
111110
}
112111
});
113112

114113
latch.await();
115-
}
116-
catch (IOException | InterruptedException e) {
114+
} catch (IOException | InterruptedException e) {
117115
throw new RuntimeException(e);
118116
}
119117
}

spring-ai-alibaba-audio-example/dashscope-audio/src/main/java/com/alibaba/cloud/ai/example/audio/AudioTranscriptionController.java

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616

1717
package com.alibaba.cloud.ai.example.audio;
1818

19+
import java.util.concurrent.Executors;
20+
import java.util.concurrent.ScheduledExecutorService;
21+
import java.util.stream.Collectors;
22+
23+
import com.alibaba.cloud.ai.dashscope.api.DashScopeAudioTranscriptionApi;
24+
import com.alibaba.cloud.ai.dashscope.audio.DashScopeAudioTranscriptionModel;
1925
import com.alibaba.cloud.ai.dashscope.audio.DashScopeAudioTranscriptionOptions;
2026
import com.alibaba.cloud.ai.dashscope.audio.transcription.AudioTranscriptionModel;
2127
import com.alibaba.cloud.ai.dashscope.spec.DashScopeModel;
@@ -32,14 +38,13 @@
3238
import org.springframework.web.bind.annotation.RestController;
3339
import reactor.core.publisher.Flux;
3440

35-
import java.util.concurrent.Executors;
36-
import java.util.concurrent.ScheduledExecutorService;
37-
import java.util.stream.Collectors;
38-
3941
/**
4042
* 语音转文本(语音合成)
43+
* <a href="https://help.aliyun.com/zh/model-studio/text-to-speech">语音合成</a>
44+
*
4145
* @author yuluo
4246
* @author <a href="mailto:yuluo08290126@gmail.com">yuluo</a>
47+
* @see DashScopeAudioTranscriptionModel
4348
*/
4449

4550
@RestController
@@ -50,8 +55,8 @@ public class AudioTranscriptionController {
5055

5156
private static final Logger log = LoggerFactory.getLogger(AudioTranscriptionController.class);
5257

53-
// 模型列表:https://help.aliyun.com/zh/model-studio/sambert-websocket-api
54-
private static final String DEFAULT_MODEL = DashScopeModel.AudioModel.PARAFORMER_V2.getValue();
58+
public static final String AUDIO_FILE_URL =
59+
"https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav";
5560

5661
private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
5762

@@ -68,16 +73,15 @@ public String callSTT() {
6873

6974
// 录音文件支持HTTP / HTTPS协议
7075
// 若录音文件存储在阿里云OSS,使用RESTful API方式支持使用以 oss://为前缀的临时 URL
71-
Resource resource = new DefaultResourceLoader()
72-
.getResource("https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav");
76+
Resource resource = new DefaultResourceLoader().getResource(AUDIO_FILE_URL);
7377

7478
AudioTranscriptionResponse response = transcriptionModel.call(
75-
new AudioTranscriptionPrompt(
76-
resource,
77-
DashScopeAudioTranscriptionOptions.builder()
78-
.withModel(DEFAULT_MODEL)
79-
.build()
80-
)
79+
new AudioTranscriptionPrompt(
80+
resource,
81+
DashScopeAudioTranscriptionOptions.builder()
82+
.model(DashScopeModel.AudioModel.PARAFORMER_V2.getValue())
83+
.build()
84+
)
8185
);
8286

8387
return response.getResult().getOutput();
@@ -89,19 +93,20 @@ public String callSTT() {
8993
@GetMapping("/stream")
9094
public String streamSTT() {
9195

92-
ClassPathResource audioResource = new ClassPathResource("hello_world_male_16k_16bit_mono.wav");
96+
Resource resource = new DefaultResourceLoader().getResource(AUDIO_FILE_URL);
97+
9398
Flux<AudioTranscriptionResponse> response = transcriptionModel
94-
.stream(
95-
new AudioTranscriptionPrompt(
96-
audioResource,
97-
DashScopeAudioTranscriptionOptions.builder()
98-
.withModel("paraformer-realtime-v2")
99-
.withSampleRate(16000)
100-
.withFormat(DashScopeAudioTranscriptionOptions.AudioFormat.WAV)
101-
.withDisfluencyRemovalEnabled(false)
102-
.build()
103-
)
104-
);
99+
.stream(
100+
new AudioTranscriptionPrompt(
101+
resource,
102+
DashScopeAudioTranscriptionOptions.builder()
103+
.model(DashScopeModel.AudioModel.GUMMY_REALTIME_V1.getValue())
104+
.sampleRate(16000)
105+
.format(DashScopeAudioTranscriptionApi.AudioFormat.WAV)
106+
.disfluencyRemovalEnabled(false)
107+
.build()
108+
)
109+
);
105110

106111
return response.map(AudioTranscriptionResponse::getResult)
107112
.map(AudioTranscription::getOutput)

spring-ai-alibaba-chat-example/ark-chat/README.md

Lines changed: 0 additions & 35 deletions
This file was deleted.

spring-ai-alibaba-chat-example/ark-chat/application.yml

Lines changed: 0 additions & 32 deletions
This file was deleted.

spring-ai-alibaba-chat-example/dashscope-chat/pom.xml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,6 @@
3232
<description>Spring AI Alibaba Dashscope Chat Example</description>
3333
<name>Spring AI Alibaba Dashscope Chat Examples</name>
3434

35-
<properties>
36-
<spring-ai-alibaba.version>1.0.0.3</spring-ai-alibaba.version>
37-
</properties>
38-
3935
<dependencies>
4036
<dependency>
4137
<groupId>org.springframework.boot</groupId>

spring-ai-alibaba-chat-example/moonshot-chat/README.md

Lines changed: 0 additions & 5 deletions
This file was deleted.

0 commit comments

Comments
 (0)