Skip to content

Commit 2086f8c

Browse files
authored
Add Go API for Kokoro TTS models (#1722)
1 parent ad61ad6 commit 2086f8c

File tree

6 files changed

+62
-0
lines changed

6 files changed

+62
-0
lines changed

.github/workflows/test-go-package.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,11 @@ jobs:
209209
go build
210210
ls -lh
211211
212+
echo "Test kokoro en"
213+
./run-kokoro-en.sh
214+
rm -rf kokoro-en-*
215+
ls -lh
216+
212217
echo "Test matcha zh"
213218
./run-matcha-zh.sh
214219
rm -rf matcha-icefall-*

.github/workflows/test-go.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,11 @@ jobs:
224224
go build
225225
ls -lh
226226
227+
echo "Test kokoro en"
228+
./run-kokoro-en.sh
229+
rm -rf kokoro-en-*
230+
ls -lh
231+
227232
echo "Test matcha zh"
228233
./run-matcha-zh.sh
229234
rm -rf matcha-icefall-*

go-api-examples/non-streaming-tts/main.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ func main() {
3333
flag.Float32Var(&config.Model.Matcha.NoiseScale, "matcha-noise-scale", 0.667, "noise_scale for Matcha")
3434
flag.Float32Var(&config.Model.Matcha.LengthScale, "matcha-length-scale", 1.0, "length_scale for Matcha. small -> faster in speech speed; large -> slower")
3535

36+
flag.StringVar(&config.Model.Kokoro.Model, "kokoro-model", "", "Path to the Kokoro ONNX model")
37+
flag.StringVar(&config.Model.Kokoro.Voices, "kokoro-voices", "", "Path to voices.bin for Kokoro")
38+
flag.StringVar(&config.Model.Kokoro.Tokens, "kokoro-tokens", "", "Path to tokens.txt for Kokoro")
39+
flag.StringVar(&config.Model.Kokoro.DataDir, "kokoro-data-dir", "", "Path to espeak-ng-data for Kokoro")
40+
flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower")
41+
3642
flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
3743
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
3844
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
6+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
7+
tar xf kokoro-en-v0_19.tar.bz2
8+
rm kokoro-en-v0_19.tar.bz2
9+
fi
10+
11+
go mod tidy
12+
go build
13+
14+
./non-streaming-tts \
15+
--kokoro-model=./kokoro-en-v0_19/model.onnx \
16+
--kokoro-voices=./kokoro-en-v0_19/voices.bin \
17+
--kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
18+
--kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
19+
--debug=1 \
20+
--output-filename=./test-kokoro-en.wav \
21+
"Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../go-api-examples/non-streaming-tts/run-kokoro-en.sh

scripts/go/sherpa_onnx.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,9 +682,18 @@ type OfflineTtsMatchaModelConfig struct {
682682
DictDir string // Path to dict directory for jieba (used only in Chinese tts)
683683
}
684684

685+
type OfflineTtsKokoroModelConfig struct {
686+
Model string // Path to the model for kokoro
687+
Voices string // Path to the voices.bin for kokoro
688+
Tokens string // Path to tokens.txt
689+
DataDir string // Path to espeak-ng-data directory
690+
LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
691+
}
692+
685693
type OfflineTtsModelConfig struct {
686694
Vits OfflineTtsVitsModelConfig
687695
Matcha OfflineTtsMatchaModelConfig
696+
Kokoro OfflineTtsKokoroModelConfig
688697

689698
// Number of threads to use for neural network computation
690699
NumThreads int
@@ -776,6 +785,21 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
776785
c.model.matcha.dict_dir = C.CString(config.Model.Matcha.DictDir)
777786
defer C.free(unsafe.Pointer(c.model.matcha.dict_dir))
778787

788+
// kokoro
789+
c.model.kokoro.model = C.CString(config.Model.Kokoro.Model)
790+
defer C.free(unsafe.Pointer(c.model.kokoro.model))
791+
792+
c.model.kokoro.voices = C.CString(config.Model.Kokoro.Voices)
793+
defer C.free(unsafe.Pointer(c.model.kokoro.voices))
794+
795+
c.model.kokoro.tokens = C.CString(config.Model.Kokoro.Tokens)
796+
defer C.free(unsafe.Pointer(c.model.kokoro.tokens))
797+
798+
c.model.kokoro.data_dir = C.CString(config.Model.Kokoro.DataDir)
799+
defer C.free(unsafe.Pointer(c.model.kokoro.data_dir))
800+
801+
c.model.kokoro.length_scale = C.float(config.Model.Kokoro.LengthScale)
802+
779803
c.model.num_threads = C.int(config.Model.NumThreads)
780804
c.model.debug = C.int(config.Model.Debug)
781805

0 commit comments

Comments
 (0)