Skip to content

Commit dfcbc8d

Browse files
authored
Add Kokoro v1.1-zh (#1942)
1 parent f5dfcf8 commit dfcbc8d

20 files changed

+897
-61
lines changed

.github/workflows/export-kokoro.yaml

Lines changed: 172 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name: export-kokoro-to-onnx
33
on:
44
push:
55
branches:
6-
- export-kokoro
6+
- export-kokoro-2
77

88
workflow_dispatch:
99

@@ -20,7 +20,7 @@ jobs:
2020
fail-fast: false
2121
matrix:
2222
os: [ubuntu-latest]
23-
version: ["0.19", "1.0"]
23+
version: ["0.19", "1.0", "1.1-zh"]
2424
python-version: ["3.10"]
2525

2626
steps:
@@ -34,7 +34,7 @@ jobs:
3434
- name: Install Python dependencies
3535
shell: bash
3636
run: |
37-
pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 librosa soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html misaki[en] misaki[zh] torch==2.6.0+cpu -f https://download.pytorch.org/whl/torch
37+
pip install kokoro "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 librosa soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html misaki[en] misaki[zh] torch==2.6.0+cpu -f https://download.pytorch.org/whl/torch
3838
3939
- name: Run
4040
shell: bash
@@ -49,9 +49,15 @@ jobs:
4949
elif [[ $v == "1.0" ]]; then
5050
cd v1.0
5151
./run.sh
52+
elif [[ $v == "1.1-zh" ]]; then
53+
cd v1.1-zh
54+
./run.sh
55+
else
56+
echo "Unknown version $v"
57+
exit 1
5258
fi
5359
54-
- name: Collect results ${{ matrix.version }}
60+
- name: Collect results 0.19
5561
if: matrix.version == '0.19'
5662
shell: bash
5763
run: |
@@ -71,7 +77,7 @@ jobs:
7177
7278
ls -lh $d.tar.bz2
7379
74-
- name: Collect results ${{ matrix.version }}
80+
- name: Collect results 1.0
7581
if: matrix.version == '1.0'
7682
shell: bash
7783
run: |
@@ -87,7 +93,7 @@ jobs:
8793
8894
d=kokoro-multi-lang-v1_0
8995
mkdir $d
90-
cp -a LICENSE $d/LICENSE
96+
cp -v LICENSE $d/LICENSE
9197
cp -a espeak-ng-data $d/
9298
cp -v $src/kokoro.onnx $d/model.onnx
9399
cp -v $src/voices.bin $d/
@@ -105,7 +111,63 @@ jobs:
105111
106112
ls -lh $d.tar.bz2
107113
108-
- name: Publish to huggingface ${{ matrix.version }}
114+
- name: Collect results 1.1-zh
115+
if: matrix.version == '1.1-zh'
116+
shell: bash
117+
run: |
118+
curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
119+
tar xvf dict.tar.bz2
120+
rm dict.tar.bz2
121+
122+
curl -SL -o date-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
123+
curl -SL -o number-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
124+
curl -SL -o phone-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst
125+
126+
src=scripts/kokoro/v1.1-zh
127+
128+
d=kokoro-multi-lang-v1_1
129+
mkdir $d
130+
cp -v LICENSE $d/LICENSE
131+
cp -a espeak-ng-data $d/
132+
cp -v $src/kokoro.onnx $d/model.onnx
133+
cp -v $src/voices.bin $d/
134+
cp -v $src/tokens.txt $d/
135+
cp -v $src/lexicon*.txt $d/
136+
cp -v $src/README.md $d/README.md
137+
cp -av dict $d/
138+
cp -v ./*.fst $d/
139+
ls -lh $d/
140+
echo "---"
141+
ls -lh $d/dict
142+
143+
tar cjfv $d.tar.bz2 $d
144+
rm -rf $d
145+
ls -lh $d.tar.bz2
146+
147+
d=kokoro-int8-multi-lang-v1_1
148+
mkdir $d
149+
cp -v LICENSE $d/LICENSE
150+
cp -a espeak-ng-data $d/
151+
cp -v $src/kokoro.int8.onnx $d/model.int8.onnx
152+
cp -v $src/voices.bin $d/
153+
cp -v $src/tokens.txt $d/
154+
cp -v $src/lexicon*.txt $d/
155+
cp -v $src/README.md $d/README.md
156+
cp -av dict $d/
157+
cp -v ./*.fst $d/
158+
ls -lh $d/
159+
echo "---"
160+
ls -lh $d/dict
161+
162+
tar cjfv $d.tar.bz2 $d
163+
rm -rf $d
164+
ls -lh $d.tar.bz2
165+
166+
echo "---"
167+
ls -lh *.tar.bz2
168+
169+
170+
- name: Publish to huggingface 0.19
109171
if: matrix.version == '0.19'
110172
env:
111173
HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -154,7 +216,7 @@ jobs:
154216
git commit -m "add models"
155217
git push https://csukuangfj:[email protected]/csukuangfj/kokoro-en-v0_19 main || true
156218
157-
- name: Publish to huggingface ${{ matrix.version }}
219+
- name: Publish to huggingface 1.0
158220
if: matrix.version == '1.0'
159221
env:
160222
HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -205,6 +267,108 @@ jobs:
205267
git commit -m "add models"
206268
git push https://csukuangfj:[email protected]/csukuangfj/kokoro-multi-lang-v1_0 main || true
207269
270+
- name: Publish to huggingface 1.1-zh
271+
if: matrix.version == '1.1-zh'
272+
env:
273+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
274+
uses: nick-fields/retry@v3
275+
with:
276+
max_attempts: 20
277+
timeout_seconds: 200
278+
shell: bash
279+
command: |
280+
git config --global user.email "[email protected]"
281+
git config --global user.name "Fangjun Kuang"
282+
283+
rm -rf huggingface
284+
export GIT_LFS_SKIP_SMUDGE=1
285+
export GIT_CLONE_PROTECTION_ACTIVE=false
286+
287+
git clone https://csukuangfj:[email protected]/csukuangfj/kokoro-multi-lang-v1_1 huggingface
288+
cd huggingface
289+
rm -rf ./*
290+
git fetch
291+
git pull
292+
293+
git lfs track "cmn_dict"
294+
git lfs track "ru_dict"
295+
git lfs track "*.wav"
296+
git lfs track "lexicon*.txt"
297+
298+
cp -a ../espeak-ng-data ./
299+
300+
cp -v ../scripts/kokoro/v1.1-zh/kokoro.onnx ./model.onnx
301+
302+
303+
cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
304+
cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
305+
cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
306+
cp -v ../scripts/kokoro/v1.1-zh/README.md ./README.md
307+
cp -v ../LICENSE ./
308+
cp -av ../dict ./
309+
cp -v ../*.fst ./
310+
311+
git lfs track "*.onnx"
312+
git add .
313+
314+
ls -lh
315+
316+
git status
317+
318+
git commit -m "add models"
319+
git push https://csukuangfj:[email protected]/csukuangfj/kokoro-multi-lang-v1_1 main || true
320+
321+
- name: Publish to huggingface 1.1-zh-int8
322+
if: matrix.version == '1.1-zh'
323+
env:
324+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
325+
uses: nick-fields/retry@v3
326+
with:
327+
max_attempts: 20
328+
timeout_seconds: 200
329+
shell: bash
330+
command: |
331+
git config --global user.email "[email protected]"
332+
git config --global user.name "Fangjun Kuang"
333+
334+
rm -rf huggingface
335+
export GIT_LFS_SKIP_SMUDGE=1
336+
export GIT_CLONE_PROTECTION_ACTIVE=false
337+
338+
git clone https://csukuangfj:[email protected]/csukuangfj/kokoro-int8-multi-lang-v1_1 huggingface
339+
cd huggingface
340+
rm -rf ./*
341+
git fetch
342+
git pull
343+
344+
git lfs track "cmn_dict"
345+
git lfs track "ru_dict"
346+
git lfs track "*.wav"
347+
git lfs track "lexicon*.txt"
348+
349+
cp -a ../espeak-ng-data ./
350+
351+
cp -v ../scripts/kokoro/v1.1-zh/kokoro.int8.onnx ./model.int8.onnx
352+
353+
354+
cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
355+
cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
356+
cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
357+
cp -v ../scripts/kokoro/v1.1-zh/README.md ./README.md
358+
cp -v ../LICENSE ./
359+
cp -av ../dict ./
360+
cp -v ../*.fst ./
361+
362+
git lfs track "*.onnx"
363+
git add .
364+
365+
ls -lh
366+
367+
git status
368+
369+
git commit -m "add models"
370+
git push https://csukuangfj:[email protected]/csukuangfj/kokoro-int8-multi-lang-v1_1 main || true
371+
208372
- name: Release
209373
if: github.repository_owner == 'csukuangfj'
210374
uses: svenstaro/upload-release-action@v2

scripts/apk/generate-tts-apk-script.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,17 @@ def get_kokoro_models() -> List[TtsModel]:
438438
model_dir="kokoro-multi-lang-v1_0",
439439
model_name="model.onnx",
440440
lang="en",
441-
)
441+
),
442+
TtsModel(
443+
model_dir="kokoro-multi-lang-v1_1",
444+
model_name="model.onnx",
445+
lang="en",
446+
),
447+
TtsModel(
448+
model_dir="kokoro-int8-multi-lang-v1_1",
449+
model_name="model.int8.onnx",
450+
lang="en",
451+
),
442452
]
443453
for m in multi_lingual_models:
444454
m.data_dir = f"{m.model_dir}/espeak-ng-data"

scripts/kokoro/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
voices.json
22
voices.bin
33
README-new.md
4+
lexicon-*.txt
5+
config.json

scripts/kokoro/v1.0/add_meta_data.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,6 @@
22
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
33

44

5-
import argparse
6-
import json
7-
from pathlib import Path
8-
9-
import numpy as np
105
import onnx
116
import torch
127

scripts/kokoro/v1.0/generate_lexicon.py renamed to scripts/kokoro/v1.0/generate_lexicon_en.py

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,6 @@
44
import json
55
from typing import List, Tuple
66

7-
from misaki import zh
8-
from pypinyin import load_phrases_dict, phrases_dict, pinyin_dict
9-
10-
user_dict = {
11-
"还田": [["huan2"], ["tian2"]],
12-
"行长": [["hang2"], ["zhang3"]],
13-
"银行行长": [["yin2"], ["hang2"], ["hang2"], ["zhang3"]],
14-
}
15-
16-
load_phrases_dict(user_dict)
17-
18-
phrases_dict.phrases_dict.update(**user_dict)
19-
207

218
def generate_english_lexicon(kind: str):
229
assert kind in ("us", "gb"), kind
@@ -59,28 +46,6 @@ def generate_english_lexicon(kind: str):
5946
return list(user_defined_lower.items()) + list(lexicon.items())
6047

6148

62-
def generate_chinese_lexicon():
63-
word_dict = pinyin_dict.pinyin_dict
64-
phrases = phrases_dict.phrases_dict
65-
66-
g2p = zh.ZHG2P()
67-
lexicon = []
68-
69-
for key in word_dict:
70-
if not (0x4E00 <= key <= 0x9FFF):
71-
continue
72-
w = chr(key)
73-
tokens: str = g2p.word2ipa(w)
74-
tokens = tokens.replace(chr(815), "")
75-
lexicon.append((w, tokens))
76-
77-
for key in phrases:
78-
tokens: str = g2p.word2ipa(key)
79-
tokens = tokens.replace(chr(815), "")
80-
lexicon.append((key, tokens))
81-
return lexicon
82-
83-
8449
def save(filename: str, lexicon: List[Tuple[str, str]]):
8550
with open(filename, "w", encoding="utf-8") as f:
8651
for word, phones in lexicon:
@@ -91,11 +56,9 @@ def save(filename: str, lexicon: List[Tuple[str, str]]):
9156
def main():
9257
us = generate_english_lexicon("us")
9358
gb = generate_english_lexicon("gb")
94-
zh = generate_chinese_lexicon()
9559

9660
save("lexicon-us-en.txt", us)
9761
save("lexicon-gb-en.txt", gb)
98-
save("lexicon-zh.txt", zh)
9962

10063

10164
if __name__ == "__main__":
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env python3
2+
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
3+
4+
from typing import List, Tuple
5+
6+
from misaki import zh
7+
from pypinyin import load_phrases_dict, phrases_dict, pinyin_dict
8+
9+
user_dict = {
10+
"还田": [["huan2"], ["tian2"]],
11+
"行长": [["hang2"], ["zhang3"]],
12+
"银行行长": [["yin2"], ["hang2"], ["hang2"], ["zhang3"]],
13+
}
14+
15+
load_phrases_dict(user_dict)
16+
17+
phrases_dict.phrases_dict.update(**user_dict)
18+
19+
20+
def generate_chinese_lexicon():
21+
word_dict = pinyin_dict.pinyin_dict
22+
phrases = phrases_dict.phrases_dict
23+
24+
g2p = zh.ZHG2P()
25+
lexicon = []
26+
27+
for key in word_dict:
28+
if not (0x4E00 <= key <= 0x9FFF):
29+
continue
30+
w = chr(key)
31+
tokens: str = g2p.word2ipa(w)
32+
tokens = tokens.replace(chr(815), "")
33+
lexicon.append((w, tokens))
34+
35+
for key in phrases:
36+
tokens: str = g2p.word2ipa(key)
37+
tokens = tokens.replace(chr(815), "")
38+
lexicon.append((key, tokens))
39+
return lexicon
40+
41+
42+
def save(filename: str, lexicon: List[Tuple[str, str]]):
43+
with open(filename, "w", encoding="utf-8") as f:
44+
for word, phones in lexicon:
45+
tokens = " ".join(list(phones))
46+
f.write(f"{word} {tokens}\n")
47+
48+
49+
def main():
50+
zh = generate_chinese_lexicon()
51+
52+
save("lexicon-zh.txt", zh)
53+
54+
55+
if __name__ == "__main__":
56+
main()

0 commit comments

Comments
 (0)