Merge pull request #2174 from lym0302/mix_test

yt605155624 · web-flow · commit bc2613b77299 · 2022-07-20T19:01:24.000+08:00
[tts] add mix tts test
diff --git a/examples/zh_en_tts/tts3/README.md b/examples/zh_en_tts/tts3/README.md
@@ -0,0 +1,26 @@
+# Test
+We train a Chinese-English mixed fastspeech2 model. The training code is still being sorted out, let's show how to use it first.
+The sample rate of the synthesized audio is 22050 Hz. 
+
+## Download pretrained models
+Put pretrained models in a directory named `models`.
+
+- [fastspeech2_csmscljspeech_add-zhen.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen.zip)
+- [hifigan_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_ckpt_0.2.0.zip)
+
+```bash
+mkdir models
+cd models
+wget https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen.zip
+unzip fastspeech2_csmscljspeech_add-zhen.zip
+wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_ckpt_0.2.0.zip
+unzip hifigan_ljspeech_ckpt_0.2.0.zip
+cd ../
+```
+
+## test
+You can choose `--spk_id` {0, 1} in `local/synthesize_e2e.sh`.
+
+```bash
+bash test.sh
+```
diff --git a/examples/zh_en_tts/tts3/local/synthesize_e2e.sh b/examples/zh_en_tts/tts3/local/synthesize_e2e.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+model_dir=$1
+output=$2
+am_name=fastspeech2_csmscljspeech_add-zhen
+am_model_dir=${model_dir}/${am_name}/
+
+stage=1
+stop_stage=1
+
+
+# hifigan
+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
+    FLAGS_allocator_strategy=naive_best_fit \
+    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
+    python3 ${BIN_DIR}/../synthesize_e2e.py \
+        --am=fastspeech2_mix \
+        --am_config=${am_model_dir}/default.yaml \
+        --am_ckpt=${am_model_dir}/snapshot_iter_94000.pdz \
+        --am_stat=${am_model_dir}/speech_stats.npy \
+        --voc=hifigan_ljspeech \
+        --voc_config=${model_dir}/hifigan_ljspeech_ckpt_0.2.0/default.yaml \
+        --voc_ckpt=${model_dir}/hifigan_ljspeech_ckpt_0.2.0/snapshot_iter_2500000.pdz \
+        --voc_stat=${model_dir}/hifigan_ljspeech_ckpt_0.2.0/feats_stats.npy \
+        --lang=mix \
+        --text=${BIN_DIR}/../sentences_mix.txt \
+        --output_dir=${output}/test_e2e \
+        --phones_dict=${am_model_dir}/phone_id_map.txt \
+        --speaker_dict=${am_model_dir}/speaker_id_map.txt \
+        --spk_id 0 
+fi
diff --git a/examples/zh_en_tts/tts3/path.sh b/examples/zh_en_tts/tts3/path.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+export MAIN_ROOT=`realpath ${PWD}/../../../`
+
+export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
+export LC_ALL=C
+
+export PYTHONDONTWRITEBYTECODE=1
+# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
+export PYTHONIOENCODING=UTF-8
+export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
+
+MODEL=fastspeech2
+export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}
diff --git a/examples/zh_en_tts/tts3/test.sh b/examples/zh_en_tts/tts3/test.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -e
+source path.sh
+
+gpus=0,1
+stage=3
+stop_stage=100
+
+model_dir=models
+output_dir=output
+
+# with the following command, you can choose the stage range you want to run
+# such as `./run.sh --stage 0 --stop-stage 0`
+# this can not be mixed use with `$1`, `$2` ...
+source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
+
+
+if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
+    # synthesize_e2e, vocoder is hifigan by default
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${model_dir} ${output_dir}  || exit -1
+fi
+
diff --git a/paddlespeech/t2s/exps/sentences_mix.txt b/paddlespeech/t2s/exps/sentences_mix.txt
@@ -0,0 +1,8 @@
+001 你好，欢迎使用 Paddle Speech 中英文混合 T T S 功能，开始你的合成之旅吧!
+002 我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN.
+003 Paddle N L P 发布 ERNIE Tiny 全系列中文预训练小模型，快速提升预训练模型部署效率，通用信息抽取技术 U I E Tiny 系列模型全新升级，支持速度更快效果更好的 U I E 小模型。
+004 Paddle Speech 发布 P P A S R 流式语音识别系统、P P T T S 流式语音合成系统、P P V P R 全链路声纹识别系统。
+005 Paddle Bo Bo: 使用 Paddle Speech 的语音合成模块生成虚拟人的声音。
+006 热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！
+007 我喜欢 eat apple, 你喜欢 drink milk。
+008 我们要去云南 team building, 非常非常 happy.