Skip to content

Commit ad61ad6

Browse files
authored
Add Swift API for Kokoro TTS models (#1721)
1 parent cc812e6 commit ad61ad6

File tree

8 files changed

+134
-7
lines changed

8 files changed

+134
-7
lines changed

.github/scripts/test-swift.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ ls -lh
1111
ls -lh
1212
rm -rf vits-piper-*
1313

14+
./run-tts-kokoro-en.sh
15+
ls -lh
16+
rm -rf kokoro-en-*
17+
1418
./run-tts-matcha-zh.sh
1519
ls -lh
1620
rm -rf matcha-icefall-*

swift-api-examples/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ keyword-spotting-from-file
1212
add-punctuations
1313
tts-matcha-zh
1414
tts-matcha-en
15+
tts-kokoro-en

swift-api-examples/SherpaOnnx.swift

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,8 @@ func sherpaOnnxOfflineTtsVitsModelConfig(
736736
noise_scale: noiseScale,
737737
noise_scale_w: noiseScaleW,
738738
length_scale: lengthScale,
739-
dict_dir: toCPointer(dictDir))
739+
dict_dir: toCPointer(dictDir)
740+
)
740741
}
741742

742743
func sherpaOnnxOfflineTtsMatchaModelConfig(
@@ -757,12 +758,30 @@ func sherpaOnnxOfflineTtsMatchaModelConfig(
757758
data_dir: toCPointer(dataDir),
758759
noise_scale: noiseScale,
759760
length_scale: lengthScale,
760-
dict_dir: toCPointer(dictDir))
761+
dict_dir: toCPointer(dictDir)
762+
)
763+
}
764+
765+
func sherpaOnnxOfflineTtsKokoroModelConfig(
766+
model: String = "",
767+
voices: String = "",
768+
tokens: String = "",
769+
dataDir: String = "",
770+
lengthScale: Float = 1.0
771+
) -> SherpaOnnxOfflineTtsKokoroModelConfig {
772+
return SherpaOnnxOfflineTtsKokoroModelConfig(
773+
model: toCPointer(model),
774+
voices: toCPointer(voices),
775+
tokens: toCPointer(tokens),
776+
data_dir: toCPointer(dataDir),
777+
length_scale: lengthScale
778+
)
761779
}
762780

763781
func sherpaOnnxOfflineTtsModelConfig(
764782
vits: SherpaOnnxOfflineTtsVitsModelConfig = sherpaOnnxOfflineTtsVitsModelConfig(),
765783
matcha: SherpaOnnxOfflineTtsMatchaModelConfig = sherpaOnnxOfflineTtsMatchaModelConfig(),
784+
kokoro: SherpaOnnxOfflineTtsKokoroModelConfig = sherpaOnnxOfflineTtsKokoroModelConfig(),
766785
numThreads: Int = 1,
767786
debug: Int = 0,
768787
provider: String = "cpu"
@@ -772,15 +791,16 @@ func sherpaOnnxOfflineTtsModelConfig(
772791
num_threads: Int32(numThreads),
773792
debug: Int32(debug),
774793
provider: toCPointer(provider),
775-
matcha: matcha
794+
matcha: matcha,
795+
kokoro: kokoro
776796
)
777797
}
778798

779799
func sherpaOnnxOfflineTtsConfig(
780800
model: SherpaOnnxOfflineTtsModelConfig,
781801
ruleFsts: String = "",
782802
ruleFars: String = "",
783-
maxNumSentences: Int = 2
803+
maxNumSentences: Int = 1
784804
) -> SherpaOnnxOfflineTtsConfig {
785805
return SherpaOnnxOfflineTtsConfig(
786806
model: model,
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [ ! -d ../build-swift-macos ]; then
6+
echo "Please run ../build-swift-macos.sh first!"
7+
exit 1
8+
fi
9+
10+
# please visit
11+
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
12+
# to download more models
13+
if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
14+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
15+
tar xf kokoro-en-v0_19.tar.bz2
16+
rm kokoro-en-v0_19.tar.bz2
17+
fi
18+
19+
if [ ! -e ./tts-kokoro-en ]; then
20+
# Note: We use -lc++ to link against libc++ instead of libstdc++
21+
swiftc \
22+
-lc++ \
23+
-I ../build-swift-macos/install/include \
24+
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
25+
./tts-kokoro-en.swift ./SherpaOnnx.swift \
26+
-L ../build-swift-macos/install/lib/ \
27+
-l sherpa-onnx \
28+
-l onnxruntime \
29+
-o tts-kokoro-en
30+
31+
strip tts-kokoro-en
32+
else
33+
echo "./tts-kokoro-en exists - skip building"
34+
fi
35+
36+
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
37+
./tts-kokoro-en

swift-api-examples/run-tts-matcha-en.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ if [ ! -f ./hifigan_v2.onnx ]; then
2121
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
2222
fi
2323

24-
if [ ! -e ./tts ]; then
24+
if [ ! -e ./tts-matcha-en ]; then
2525
# Note: We use -lc++ to link against libc++ instead of libstdc++
2626
swiftc \
2727
-lc++ \

swift-api-examples/run-tts-matcha-zh.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ if [ ! -f ./hifigan_v2.onnx ]; then
2020
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
2121
fi
2222

23-
if [ ! -e ./tts ]; then
23+
if [ ! -e ./tts-matcha-zh ]; then
2424
# Note: We use -lc++ to link against libc++ instead of libstdc++
2525
swiftc \
2626
-lc++ \

swift-api-examples/run-tts-vits.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ if [ ! -d ./vits-piper-en_US-amy-low ]; then
1515
rm vits-piper-en_US-amy-low.tar.bz2
1616
fi
1717

18-
if [ ! -e ./tts ]; then
18+
if [ ! -e ./tts-vits ]; then
1919
# Note: We use -lc++ to link against libc++ instead of libstdc++
2020
swiftc \
2121
-lc++ \
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
class MyClass {
2+
func playSamples(samples: [Float]) {
3+
print("Play \(samples.count) samples")
4+
}
5+
}
6+
7+
func run() {
8+
let model = "./kokoro-en-v0_19/model.onnx"
9+
let voices = "./kokoro-en-v0_19/voices.bin"
10+
let tokens = "./kokoro-en-v0_19/tokens.txt"
11+
let dataDir = "./kokoro-en-v0_19/espeak-ng-data"
12+
let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig(
13+
model: model,
14+
voices: voices,
15+
tokens: tokens,
16+
dataDir: dataDir
17+
)
18+
let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro, debug: 0)
19+
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
20+
21+
let myClass = MyClass()
22+
23+
// We use Unretained here so myClass must be kept alive as the callback is invoked
24+
//
25+
// See also
26+
// https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
27+
let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()
28+
29+
let callback: TtsCallbackWithArg = { samples, n, arg in
30+
let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
31+
var savedSamples: [Float] = []
32+
for index in 0..<n {
33+
savedSamples.append(samples![Int(index)])
34+
}
35+
36+
o.playSamples(samples: savedSamples)
37+
38+
// return 1 so that it continues generating
39+
return 1
40+
}
41+
42+
let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
43+
44+
let text =
45+
"Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
46+
let sid = 0
47+
let speed: Float = 1.0
48+
49+
let audio = tts.generateWithCallbackWithArg(
50+
text: text, callback: callback, arg: arg, sid: sid, speed: speed)
51+
let filename = "test-kokoro-en.wav"
52+
let ok = audio.save(filename: filename)
53+
if ok == 1 {
54+
print("\nSaved to:\(filename)")
55+
} else {
56+
print("Failed to save to \(filename)")
57+
}
58+
}
59+
60+
@main
61+
struct App {
62+
static func main() {
63+
run()
64+
}
65+
}

0 commit comments

Comments
 (0)