Skip to content

Commit 74245cc

Browse files
committed
add streaming tts scripts
1 parent caaa5cd commit 74245cc

File tree

3 files changed

+122
-0
lines changed

3 files changed

+122
-0
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
3+
# http client test
4+
# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
5+
paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.http.wav
6+
7+
# websocket client test
8+
# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
9+
paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8192 --protocol websocket --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.ws.wav
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# This is the parameter configuration file for streaming tts server.
2+
3+
#################################################################################
4+
# SERVER SETTING #
5+
#################################################################################
6+
host: 0.0.0.0
7+
port: 8192
8+
9+
# The task format in the engin_list is: <speech task>_<engine type>
10+
# engine_list choices = ['tts_online', 'tts_online-onnx'], the inference speed of tts_online-onnx is faster than tts_online.
11+
# protocol choices = ['websocket', 'http']
12+
protocol: 'websocket'
13+
engine_list: ['tts_online-onnx']
14+
15+
16+
#################################################################################
17+
# ENGINE CONFIG #
18+
#################################################################################
19+
20+
################################### TTS #########################################
21+
################### speech task: tts; engine_type: online #######################
22+
tts_online:
23+
# am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']
24+
# fastspeech2_cnndecoder_csmsc support streaming am infer.
25+
am: 'fastspeech2_csmsc'
26+
am_config:
27+
am_ckpt:
28+
am_stat:
29+
phones_dict:
30+
tones_dict:
31+
speaker_dict:
32+
spk_id: 0
33+
34+
# voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
35+
# Both mb_melgan_csmsc and hifigan_csmsc support streaming voc inference
36+
voc: 'mb_melgan_csmsc'
37+
voc_config:
38+
voc_ckpt:
39+
voc_stat:
40+
41+
# others
42+
lang: 'zh'
43+
device: 'cpu' # set 'gpu:id' or 'cpu'
44+
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
45+
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
46+
am_block: 72
47+
am_pad: 12
48+
# voc_pad and voc_block voc model to streaming voc infer,
49+
# when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
50+
# when voc model is hifigan_csmsc, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
51+
voc_block: 36
52+
voc_pad: 14
53+
54+
55+
56+
#################################################################################
57+
# ENGINE CONFIG #
58+
#################################################################################
59+
60+
################################### TTS #########################################
61+
################### speech task: tts; engine_type: online-onnx #######################
62+
tts_online-onnx:
63+
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
64+
# fastspeech2_cnndecoder_csmsc_onnx support streaming am infer.
65+
am: 'fastspeech2_cnndecoder_csmsc_onnx'
66+
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
67+
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
68+
am_ckpt: # list
69+
am_stat:
70+
phones_dict:
71+
tones_dict:
72+
speaker_dict:
73+
spk_id: 0
74+
am_sample_rate: 24000
75+
am_sess_conf:
76+
device: "cpu" # set 'gpu:id' or 'cpu'
77+
use_trt: False
78+
cpu_threads: 4
79+
80+
# voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
81+
# Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference
82+
voc: 'hifigan_csmsc_onnx'
83+
voc_ckpt:
84+
voc_sample_rate: 24000
85+
voc_sess_conf:
86+
device: "cpu" # set 'gpu:id' or 'cpu'
87+
use_trt: False
88+
cpu_threads: 4
89+
90+
# others
91+
lang: 'zh'
92+
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
93+
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
94+
am_block: 72
95+
am_pad: 12
96+
# voc_pad and voc_block voc model to streaming voc infer,
97+
# when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
98+
# when voc model is hifigan_csmsc_onnx, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
99+
voc_block: 36
100+
voc_pad: 14
101+
# voc_upsample should be same as n_shift on voc config.
102+
voc_upsample: 300
103+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
3+
# http server
4+
paddlespeech_server start --config_file ./conf/tts_online_application.yaml &> tts.http.log &
5+
6+
7+
# websocket server
8+
paddlespeech_server start --config_file ./conf/tts_online_ws_application.yaml &> tts.ws.log &
9+
10+

0 commit comments

Comments
 (0)