Skip to content

Commit 2145b58

Browse files
committed
keep offline embedding/token extraction for compatibale
1 parent f08872a commit 2145b58

File tree

2 files changed

+34
-2
lines changed

2 files changed

+34
-2
lines changed

examples/libritts/cosyvoice2/run.sh

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,23 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
2424
done
2525
fi
2626

27-
# NOTE embedding/token extraction is not necessary now as we support online feature extraction
27+
# NOTE embedding/token extraction is not necessary now as we support online feature extraction, but training speed will be influenced
28+
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
29+
echo "Extract campplus speaker embedding, you will get spk2embedding.pt and utt2embedding.pt in data/$x dir"
30+
for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
31+
tools/extract_embedding.py --dir data/$x \
32+
--onnx_path $pretrained_model_dir/campplus.onnx
33+
done
34+
fi
35+
36+
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
37+
echo "Extract discrete speech token, you will get utt2speech_token.pt in data/$x dir"
38+
for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
39+
tools/extract_speech_token.py --dir data/$x \
40+
--onnx_path $pretrained_model_dir/speech_tokenizer_v3.onnx
41+
done
42+
fi
43+
2844
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
2945
echo "Prepare required parquet format data, you should have prepared wav.scp/text/utt2spk/spk2utt/utt2embedding.pt/spk2embedding.pt/utt2speech_token.pt"
3046
for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do

examples/libritts/cosyvoice3/run.sh

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,23 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
2525
done
2626
fi
2727

28-
# NOTE embedding/token extraction is not necessary now as we support online feature extraction
28+
# NOTE embedding/token extraction is not necessary now as we support online feature extraction, but training speed will be influenced
29+
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
30+
echo "Extract campplus speaker embedding, you will get spk2embedding.pt and utt2embedding.pt in data/$x dir"
31+
for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
32+
tools/extract_embedding.py --dir data/$x \
33+
--onnx_path $pretrained_model_dir/campplus.onnx
34+
done
35+
fi
36+
37+
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
38+
echo "Extract discrete speech token, you will get utt2speech_token.pt in data/$x dir"
39+
for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
40+
tools/extract_speech_token.py --dir data/$x \
41+
--onnx_path $pretrained_model_dir/speech_tokenizer_v3.onnx
42+
done
43+
fi
44+
2945
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
3046
echo "Prepare required parquet format data, you should have prepared wav.scp/text/utt2spk/spk2utt/utt2embedding.pt/spk2embedding.pt/utt2speech_token.pt"
3147
for x in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do

0 commit comments

Comments
 (0)