Skip to content

Commit eee5575

Browse files
authored
Add Kotlin and Java API for Dolphin CTC models (#2086)
1 parent 0de7e1b commit eee5575

20 files changed

+517
-18
lines changed

.github/workflows/apk-asr-2pass.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ jobs:
2323
fail-fast: false
2424
matrix:
2525
os: [ubuntu-latest]
26-
total: ["4"]
27-
index: ["0", "1", "2", "3"]
26+
total: ["16"]
27+
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15"]
2828

2929
steps:
3030
- uses: actions/checkout@v4

.github/workflows/apk-vad-asr.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ jobs:
2323
fail-fast: false
2424
matrix:
2525
os: [ubuntu-latest]
26-
total: ["10"]
27-
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
26+
total: ["18"]
27+
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17"]
2828

2929
steps:
3030
- uses: actions/checkout@v4

.github/workflows/run-java-test.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,16 @@ jobs:
105105
make -j4
106106
ls -lh lib
107107
108+
- name: Run java test (VAD + Non-streaming Dolphin CTC)
109+
shell: bash
110+
run: |
111+
cd ./java-api-examples
112+
./run-vad-non-streaming-dolphin-ctc.sh
113+
rm *.onnx
114+
ls -lh *.wav
115+
rm *.wav
116+
rm -rf sherpa-onnx-dolphin-*
117+
108118
- name: Run speech enhancement (GTCRN)
109119
shell: bash
110120
run: |
@@ -135,6 +145,9 @@ jobs:
135145
run: |
136146
cd ./java-api-examples
137147
148+
./run-non-streaming-decode-file-dolphin-ctc.sh
149+
rm -rf sherpa-onnx-dolphin-*
150+
138151
./run-non-streaming-decode-file-moonshine.sh
139152
rm -rf sherpa-onnx-moonshine-*
140153

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,4 @@ README-DEV.txt
140140
*.jit
141141
##clion
142142
.idea
143+
sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// Copyright 2025 Xiaomi Corporation
2+
3+
// This file shows how to use an offline Dolphin CTC model, i.e.,
4+
// non-streaming Dolphin CTC model, to decode files.
5+
import com.k2fsa.sherpa.onnx.*;
6+
7+
public class NonStreamingDecodeFileDolphinCtc {
8+
public static void main(String[] args) {
9+
// please refer to
10+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
11+
// to download model files
12+
String model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx";
13+
String tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt";
14+
15+
String waveFilename =
16+
"./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav";
17+
18+
WaveReader reader = new WaveReader(waveFilename);
19+
20+
OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().setModel(model).build();
21+
22+
OfflineModelConfig modelConfig =
23+
OfflineModelConfig.builder()
24+
.setDolphin(dolphin)
25+
.setTokens(tokens)
26+
.setNumThreads(1)
27+
.setDebug(true)
28+
.build();
29+
30+
OfflineRecognizerConfig config =
31+
OfflineRecognizerConfig.builder()
32+
.setOfflineModelConfig(modelConfig)
33+
.setDecodingMethod("greedy_search")
34+
.build();
35+
36+
OfflineRecognizer recognizer = new OfflineRecognizer(config);
37+
OfflineStream stream = recognizer.createStream();
38+
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
39+
40+
recognizer.decode(stream);
41+
42+
String text = recognizer.getResult(stream).getText();
43+
44+
System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
45+
46+
stream.release();
47+
recognizer.release();
48+
}
49+
}

java-api-examples/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ This directory contains examples for the JAVA API of sherpa-onnx.
2323
## Non-Streaming Speech recognition
2424

2525
```bash
26+
./run-non-streaming-decode-file-dolphin-ctc.sh
2627
./run-non-streaming-decode-file-paraformer.sh
2728
./run-non-streaming-decode-file-sense-voice.sh
2829
./run-non-streaming-decode-file-transducer.sh
@@ -102,6 +103,12 @@ The punctuation model supports both English and Chinese.
102103
./run-vad-remove-slience.sh
103104
```
104105

106+
## VAD + Non-streaming Dolphin CTC for speech recognition
107+
108+
```bash
109+
./run-vad-non-streaming-dolphin-ctc.sh
110+
```
111+
105112
## VAD + Non-streaming SenseVoice for speech recognition
106113

107114
```bash
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// Copyright 2025 Xiaomi Corporation
2+
3+
// This file shows how to use a silero_vad model with a non-streaming Dolphin
4+
// CTC model for speech recognition.
5+
6+
import com.k2fsa.sherpa.onnx.*;
7+
import java.util.Arrays;
8+
9+
public class VadNonStreamingSenseVoice {
10+
public static Vad createVad() {
11+
// please download ./silero_vad.onnx from
12+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
13+
String model = "./silero_vad.onnx";
14+
SileroVadModelConfig sileroVad =
15+
SileroVadModelConfig.builder()
16+
.setModel(model)
17+
.setThreshold(0.5f)
18+
.setMinSilenceDuration(0.25f)
19+
.setMinSpeechDuration(0.5f)
20+
.setWindowSize(512)
21+
.setMaxSpeechDuration(5.0f)
22+
.build();
23+
24+
VadModelConfig config =
25+
VadModelConfig.builder()
26+
.setSileroVadModelConfig(sileroVad)
27+
.setSampleRate(16000)
28+
.setNumThreads(1)
29+
.setDebug(true)
30+
.setProvider("cpu")
31+
.build();
32+
33+
return new Vad(config);
34+
}
35+
36+
public static OfflineRecognizer createOfflineRecognizer() {
37+
// please refer to
38+
// https://k2-fsa.github.io/sherpa/onnx/dolphin/index.html
39+
// to download model files
40+
String model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx";
41+
String tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt";
42+
43+
OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().setModel(model).build();
44+
45+
OfflineModelConfig modelConfig =
46+
OfflineModelConfig.builder()
47+
.setDolphin(dolphin)
48+
.setTokens(tokens)
49+
.setNumThreads(1)
50+
.setDebug(true)
51+
.build();
52+
53+
OfflineRecognizerConfig config =
54+
OfflineRecognizerConfig.builder()
55+
.setOfflineModelConfig(modelConfig)
56+
.setDecodingMethod("greedy_search")
57+
.build();
58+
59+
return new OfflineRecognizer(config);
60+
}
61+
62+
public static void main(String[] args) {
63+
64+
Vad vad = createVad();
65+
OfflineRecognizer recognizer = createOfflineRecognizer();
66+
67+
// You can download the test file from
68+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
69+
String testWaveFilename = "./lei-jun-test.wav";
70+
WaveReader reader = new WaveReader(testWaveFilename);
71+
72+
int numSamples = reader.getSamples().length;
73+
int numIter = numSamples / 512;
74+
75+
for (int i = 0; i != numIter; ++i) {
76+
int start = i * 512;
77+
int end = start + 512;
78+
float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end);
79+
vad.acceptWaveform(samples);
80+
if (vad.isSpeechDetected()) {
81+
while (!vad.empty()) {
82+
SpeechSegment segment = vad.front();
83+
float startTime = segment.getStart() / 16000.0f;
84+
float duration = segment.getSamples().length / 16000.0f;
85+
86+
OfflineStream stream = recognizer.createStream();
87+
stream.acceptWaveform(segment.getSamples(), 16000);
88+
recognizer.decode(stream);
89+
String text = recognizer.getResult(stream).getText();
90+
stream.release();
91+
92+
if (!text.isEmpty()) {
93+
System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
94+
}
95+
96+
vad.pop();
97+
}
98+
}
99+
}
100+
101+
vad.flush();
102+
while (!vad.empty()) {
103+
SpeechSegment segment = vad.front();
104+
float startTime = segment.getStart() / 16000.0f;
105+
float duration = segment.getSamples().length / 16000.0f;
106+
107+
OfflineStream stream = recognizer.createStream();
108+
stream.acceptWaveform(segment.getSamples(), 16000);
109+
recognizer.decode(stream);
110+
String text = recognizer.getResult(stream).getText();
111+
stream.release();
112+
113+
if (!text.isEmpty()) {
114+
System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
115+
}
116+
117+
vad.pop();
118+
}
119+
120+
vad.release();
121+
recognizer.release();
122+
}
123+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
6+
mkdir -p ../build
7+
pushd ../build
8+
cmake \
9+
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
10+
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
11+
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
12+
-DBUILD_SHARED_LIBS=ON \
13+
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
14+
-DSHERPA_ONNX_ENABLE_JNI=ON \
15+
..
16+
17+
make -j4
18+
ls -lh lib
19+
popd
20+
fi
21+
22+
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
23+
pushd ../sherpa-onnx/java-api
24+
make
25+
popd
26+
fi
27+
28+
if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
29+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
30+
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
31+
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
32+
ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
33+
fi
34+
35+
java \
36+
-Djava.library.path=$PWD/../build/lib \
37+
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
38+
NonStreamingDecodeFileDolphinCtc.java
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
6+
mkdir -p ../build
7+
pushd ../build
8+
cmake \
9+
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
10+
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
11+
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
12+
-DBUILD_SHARED_LIBS=ON \
13+
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
14+
-DSHERPA_ONNX_ENABLE_JNI=ON \
15+
..
16+
17+
make -j4
18+
ls -lh lib
19+
popd
20+
fi
21+
22+
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
23+
pushd ../sherpa-onnx/java-api
24+
make
25+
popd
26+
fi
27+
28+
if [ ! -f ./silero_vad.onnx ]; then
29+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
30+
fi
31+
32+
if [ ! -f ./lei-jun-test.wav ]; then
33+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
34+
fi
35+
36+
if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
37+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
38+
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
39+
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
40+
ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
41+
fi
42+
43+
java \
44+
-Djava.library.path=$PWD/../build/lib \
45+
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
46+
./VadNonStreamingDolphinCtc.java

kotlin-api-examples/run.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,13 @@ function testSpokenLanguageIdentification() {
190190
}
191191

192192
function testOfflineAsr() {
193+
if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
194+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
195+
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
196+
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
197+
ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
198+
fi
199+
193200
if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then
194201
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
195202
tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2

0 commit comments

Comments
 (0)